forked from M-Labs/artiq
1
0
Fork 0

drtio: refactor/simplify GTH, use migen

This commit is contained in:
Sebastien Bourdeauducq 2017-12-23 01:19:44 +08:00
parent 77897228ca
commit c57b66497c
3 changed files with 149 additions and 348 deletions

View File

@ -1,113 +0,0 @@
from math import ceil
from functools import reduce
from operator import add
from litex.gen import *
from litex.gen.genlib.cdc import MultiReg, PulseSynchronizer
# Changes the phase of the transceiver RX clock to align the comma to
# the LSBs of RXDATA, fixing the latency.
#
# This is implemented by repeatedly resetting the transceiver until it
# gives out the correct phase. Each reset gives a random phase.
#
# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA
# would achieve this faster and in a cleaner way. But:
# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead
# of 1 UI at every pulse. It is unclear what the latency becomes.
# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed.
# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless
# transceiver "feature".
#
# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped
# compared to the usual 8b10b binary representation.
class BruteforceClockAligner(Module):
def __init__(self, comma, tx_clk_freq, check_period=6e-3):
self.rxdata = Signal(20)
self.restart = Signal()
self.ready = Signal()
check_max_val = ceil(check_period*tx_clk_freq)
check_counter = Signal(max=check_max_val+1)
check = Signal()
reset_check_counter = Signal()
self.sync.rtio_tx += [
check.eq(0),
If(reset_check_counter,
check_counter.eq(check_max_val)
).Else(
If(check_counter == 0,
check.eq(1),
check_counter.eq(check_max_val)
).Else(
check_counter.eq(check_counter-1)
)
)
]
checks_reset = PulseSynchronizer("rtio_tx", "rtio_rx")
self.submodules += checks_reset
comma_n = ~comma & 0b1111111111
comma_seen_rxclk = Signal()
comma_seen = Signal()
comma_seen_rxclk.attr.add("no_retiming")
self.specials += MultiReg(comma_seen_rxclk, comma_seen)
self.sync.rtio_rx += \
If(checks_reset.o,
comma_seen_rxclk.eq(0)
).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n),
comma_seen_rxclk.eq(1)
)
error_seen_rxclk = Signal()
error_seen = Signal()
error_seen_rxclk.attr.add("no_retiming")
self.specials += MultiReg(error_seen_rxclk, error_seen)
rx1cnt = Signal(max=11)
self.sync.rtio_rx += [
rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])),
If(checks_reset.o,
error_seen_rxclk.eq(0)
).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6),
error_seen_rxclk.eq(1)
)
]
fsm = ClockDomainsRenamer("rtio_tx")(FSM(reset_state="WAIT_COMMA"))
self.submodules += fsm
fsm.act("WAIT_COMMA",
If(check,
# Errors are still OK at this stage, as the transceiver
# has just been reset and may output garbage data.
If(comma_seen,
NextState("WAIT_NOERROR")
).Else(
self.restart.eq(1)
),
checks_reset.i.eq(1)
)
)
fsm.act("WAIT_NOERROR",
If(check,
If(comma_seen & ~error_seen,
NextState("READY")
).Else(
self.restart.eq(1),
NextState("WAIT_COMMA")
),
checks_reset.i.eq(1)
)
)
fsm.act("READY",
reset_check_counter.eq(1),
self.ready.eq(1),
If(error_seen,
checks_reset.i.eq(1),
self.restart.eq(1),
NextState("WAIT_COMMA")
)
)

View File

@ -1,224 +1,26 @@
from litex.gen import *
from litex.gen.genlib.resetsync import AsyncResetSynchronizer
from litex.gen.genlib.cdc import MultiReg
from migen import *
from migen.genlib.resetsync import AsyncResetSynchronizer
from migen.genlib.cdc import MultiReg
from litex.soc.interconnect.csr import *
from litex.soc.cores.code_8b10b import Encoder, Decoder
from misoc.interconnect.csr import *
from misoc.cores.code_8b10b import Encoder, Decoder
from drtio.common import TransceiverInterface, ChannelInterface
from drtio.gth_ultrascale_init import GTHInit
from drtio.clock_aligner import BruteforceClockAligner
class GTHChannelPLL(Module):
def __init__(self, refclk, refclk_freq, linerate):
self.refclk = refclk
self.reset = Signal()
self.lock = Signal()
self.config = self.compute_config(refclk_freq, linerate)
@staticmethod
def compute_config(refclk_freq, linerate):
for n1 in 4, 5:
for n2 in 1, 2, 3, 4, 5:
for m in 1, 2:
vco_freq = refclk_freq*(n1*n2)/m
if 2.0e9 <= vco_freq <= 6.25e9:
for d in 1, 2, 4, 8, 16:
current_linerate = vco_freq*2/d
if current_linerate == linerate:
return {"n1": n1, "n2": n2, "m": m, "d": d,
"vco_freq": vco_freq,
"clkin": refclk_freq,
"linerate": linerate}
msg = "No config found for {:3.2f} MHz refclk / {:3.2f} Gbps linerate."
raise ValueError(msg.format(refclk_freq/1e6, linerate/1e9))
def __repr__(self):
r = """
GTHChannelPLL
==============
overview:
---------
+--------------------------------------------------+
| |
| +-----+ +---------------------------+ +-----+ |
| | | | Phase Frequency Detector | | | |
CLKIN +----> /M +--> Charge Pump +-> VCO +---> CLKOUT
| | | | Loop Filter | | | |
| +-----+ +---------------------------+ +--+--+ |
| ^ | |
| | +-------+ +-------+ | |
| +----+ /N2 <----+ /N1 <----+ |
| +-------+ +-------+ |
+--------------------------------------------------+
+-------+
CLKOUT +-> 2/D +-> LINERATE
+-------+
config:
-------
CLKIN = {clkin}MHz
CLKOUT = CLKIN x (N1 x N2) / M = {clkin}MHz x ({n1} x {n2}) / {m}
= {vco_freq}GHz
LINERATE = CLKOUT x 2 / D = {vco_freq}GHz x 2 / {d}
= {linerate}GHz
""".format(clkin=self.config["clkin"]/1e6,
n1=self.config["n1"],
n2=self.config["n2"],
m=self.config["m"],
vco_freq=self.config["vco_freq"]/1e9,
d=self.config["d"],
linerate=self.config["linerate"]/1e9)
return r
class GTHQuadPLL(Module):
def __init__(self, refclk, refclk_freq, linerate):
self.clk = Signal()
self.refclk = Signal()
self.reset = Signal()
self.lock = Signal()
self.config = self.compute_config(refclk_freq, linerate)
# # #
self.specials += \
Instance("GTHE3_COMMON",
# common
i_GTREFCLK00=refclk,
i_GTREFCLK01=refclk,
i_QPLLRSVD1=0,
i_QPLLRSVD2=0,
i_QPLLRSVD3=0,
i_QPLLRSVD4=0,
i_BGBYPASSB=1,
i_BGMONITORENB=1,
i_BGPDB=1,
i_BGRCALOVRD=0b11111,
i_BGRCALOVRDENB=0b1,
i_RCALENB=1,
# qpll0
p_QPLL0_FBDIV=self.config["n"],
p_QPLL0_REFCLK_DIV=self.config["m"],
i_QPLL0CLKRSVD0=0,
i_QPLL0CLKRSVD1=0,
i_QPLL0LOCKDETCLK=ClockSignal(),
i_QPLL0LOCKEN=1,
o_QPLL0LOCK=self.lock if self.config["qpll"] == "qpll0" else
Signal(),
o_QPLL0OUTCLK=self.clk if self.config["qpll"] == "qpll0" else
Signal(),
o_QPLL0OUTREFCLK=self.refclk if self.config["qpll"] == "qpll0" else
Signal(),
i_QPLL0PD=0 if self.config["qpll"] == "qpll0" else 1,
i_QPLL0REFCLKSEL=0b001,
i_QPLL0RESET=self.reset,
# qpll1
p_QPLL1_FBDIV=self.config["n"],
p_QPLL1_REFCLK_DIV=self.config["m"],
i_QPLL1CLKRSVD0=0,
i_QPLL1CLKRSVD1=0,
i_QPLL1LOCKDETCLK=ClockSignal(),
i_QPLL1LOCKEN=1,
o_QPLL1LOCK=self.lock if self.config["qpll"] == "qpll1" else
Signal(),
o_QPLL1OUTCLK=self.clk if self.config["qpll"] == "qpll1" else
Signal(),
o_QPLL1OUTREFCLK=self.refclk if self.config["qpll"] == "qpll1" else
Signal(),
i_QPLL1PD=0 if self.config["qpll"] == "qpll1" else 1,
i_QPLL1REFCLKSEL=0b001,
i_QPLL1RESET=self.reset,
)
@staticmethod
def compute_config(refclk_freq, linerate):
for n in [16, 20, 32, 40, 60, 64, 66, 75, 80, 84,
90, 96, 100, 112, 120, 125, 150, 160]:
for m in 1, 2, 3, 4:
vco_freq = refclk_freq*n/m
if 8e9 <= vco_freq <= 13e9:
qpll = "qpll1"
elif 9.8e9 <= vco_freq <= 16.375e9:
qpll = "qpll0"
else:
qpll = None
if qpll is not None:
for d in 1, 2, 4, 8, 16:
current_linerate = (vco_freq/2)*2/d
if current_linerate == linerate:
return {"n": n, "m": m, "d": d,
"vco_freq": vco_freq,
"qpll": qpll,
"clkin": refclk_freq,
"clkout": vco_freq/2,
"linerate": linerate}
msg = "No config found for {:3.2f} MHz refclk / {:3.2f} Gbps linerate."
raise ValueError(msg.format(refclk_freq/1e6, linerate/1e9))
def __repr__(self):
r = """
GTXQuadPLL
===========
overview:
---------
+-------------------------------------------------------------++
| +------------+ |
| +-----+ +---------------------------+ | QPLL0 | +--+ |
| | | | Phase Frequency Detector +-> VCO | | | |
CLKIN +----> /M +--> Charge Pump | +------------+->/2+--> CLKOUT
| | | | Loop Filter +-> QPLL1 | | | |
| +-----+ +---------------------------+ | VCO | +--+ |
| ^ +-----+------+ |
| | +-------+ | |
| +--------+ /N <----------------+ |
| +-------+ |
+--------------------------------------------------------------+
+-------+
CLKOUT +-> 2/D +-> LINERATE
+-------+
config:
-------
CLKIN = {clkin}MHz
CLKOUT = CLKIN x N / (2 x M) = {clkin}MHz x {n} / (2 x {m})
= {clkout}GHz
VCO = {vco_freq}GHz ({qpll})
LINERATE = CLKOUT x 2 / D = {clkout}GHz x 2 / {d}
= {linerate}GHz
""".format(clkin=self.config["clkin"]/1e6,
n=self.config["n"],
m=self.config["m"],
clkout=self.config["clkout"]/1e9,
vco_freq=self.config["vco_freq"]/1e9,
qpll=self.config["qpll"].upper(),
d=self.config["d"],
linerate=self.config["linerate"]/1e9)
return r
from artiq.gateware.drtio.core import TransceiverInterface, ChannelInterface
from artiq.gateware.drtio.transceiver.gth_ultrascale_init import *
class GTHSingle(Module):
def __init__(self, pll, tx_pads, rx_pads, sys_clk_freq, dw=20, mode="master"):
def __init__(self, refclk, tx_pads, rx_pads, sys_clk_freq, rtio_clk_freq, dw, mode):
assert (dw == 20) or (dw == 40)
assert mode in ["master", "slave"]
# # #
nwords = dw//10
use_cpll = isinstance(pll, GTHChannelPLL)
use_qpll0 = isinstance(pll, GTHQuadPLL) and pll.config["qpll"] == "qpll0"
use_qpll1 = isinstance(pll, GTHQuadPLL) and pll.config["qpll"] == "qpll1"
self.submodules.encoder = encoder = ClockDomainsRenamer("rtio_tx")(
Encoder(nwords, True))
self.submodules.decoders = decoders = [ClockDomainsRenamer("rtio_rx")(
(Decoder(True))) for _ in range(nwords)]
self.rx_ready = Signal()
self.rtio_clk_freq = pll.config["linerate"]/dw
# transceiver direct clock outputs
# useful to specify clock constraints in a way palatable to Vivado
self.txoutclk = Signal()
@ -230,11 +32,13 @@ class GTHSingle(Module):
tx_init = GTHInit(sys_clk_freq, False)
# RX receives restart commands from RTIO domain
rx_init = ClockDomainsRenamer("rtio_tx")(
GTHInit(self.rtio_clk_freq, True))
GTHInit(rtio_clk_freq, True))
self.submodules += tx_init, rx_init
pll_lock = Signal()
self.comb += [
tx_init.plllock.eq(pll.lock),
rx_init.plllock.eq(pll.lock)
tx_init.plllock.eq(pll_lock),
rx_init.plllock.eq(pll_lock)
]
txdata = Signal(dw)
@ -263,31 +67,25 @@ class GTHSingle(Module):
p_CPLL_CFG1=0xa4ac,
p_CPLL_CFG2=0xf007,
p_CPLL_CFG3=0x0000,
p_CPLL_FBDIV=1 if use_qpll0 or use_qpll1 else pll.config["n2"],
p_CPLL_FBDIV_45=4 if use_qpll0 or use_qpll1 else pll.config["n1"],
p_CPLL_REFCLK_DIV=1 if use_qpll0 or use_qpll1 else pll.config["m"],
p_RXOUT_DIV=pll.config["d"],
p_TXOUT_DIV=pll.config["d"],
p_CPLL_FBDIV=5,
p_CPLL_FBDIV_45=4,
p_CPLL_REFCLK_DIV=1,
p_RXOUT_DIV=2,
p_TXOUT_DIV=2,
i_CPLLRESET=0,
i_CPLLPD=0 if use_qpll0 or use_qpll1 else pll.reset,
o_CPLLLOCK=Signal() if use_qpll0 or use_qpll1 else pll.lock,
i_CPLLPD=0,
o_CPLLLOCK=pll_lock,
i_CPLLLOCKEN=1,
i_CPLLREFCLKSEL=0b001,
i_TSTIN=2**20-1,
i_GTREFCLK0=0 if use_qpll0 or use_qpll1 else pll.refclk,
# QPLL
i_QPLL0CLK=0 if use_cpll or use_qpll1 else pll.clk,
i_QPLL0REFCLK=0 if use_cpll or use_qpll1 else pll.refclk,
i_QPLL1CLK=0 if use_cpll or use_qpll0 else pll.clk,
i_QPLL1REFCLK=0 if use_cpll or use_qpll0 else pll.refclk,
i_GTREFCLK0=refclk,
# TX clock
p_TXBUF_EN="FALSE",
p_TX_XCLK_SEL="TXUSR",
o_TXOUTCLK=self.txoutclk,
i_TXSYSCLKSEL=0b00 if use_cpll else 0b10 if use_qpll0 else 0b11,
i_TXPLLCLKSEL=0b00 if use_cpll else 0b11 if use_qpll0 else 0b10,
i_TXSYSCLKSEL=0b00,
i_TXPLLCLKSEL=0b00,
i_TXOUTCLKSEL=0b11,
# TX Startup/Reset
@ -376,11 +174,9 @@ class GTHSingle(Module):
self.sync += tx_reset_deglitched.eq(~tx_init.done)
self.clock_domains.cd_rtio_tx = ClockDomain()
if mode == "master":
tx_bufg_div = pll.config["clkin"]/self.rtio_clk_freq
assert tx_bufg_div == int(tx_bufg_div)
self.specials += \
Instance("BUFG_GT", i_I=self.txoutclk, o_O=self.cd_rtio_tx.clk,
i_DIV=int(tx_bufg_div)-1)
i_DIV=0)
self.specials += AsyncResetSynchronizer(self.cd_rtio_tx, tx_reset_deglitched)
# rx clocking
@ -401,7 +197,7 @@ class GTHSingle(Module):
self.comb += decoders[i].input.eq(rxdata[10*i:10*(i+1)])
# clock alignment
clock_aligner = BruteforceClockAligner(0b0101111100, self.rtio_clk_freq)
clock_aligner = BruteforceClockAligner(0b0101111100, rtio_clk_freq)
self.submodules += clock_aligner
self.comb += [
clock_aligner.rxdata.eq(rxdata),
@ -411,19 +207,24 @@ class GTHSingle(Module):
class GTH(Module, TransceiverInterface):
def __init__(self, plls, tx_pads, rx_pads, sys_clk_freq, dw, master=0):
def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq, rtio_clk_freq, dw=20, master=0):
self.nchannels = nchannels = len(tx_pads)
self.gths = []
# # #
nwords = dw//10
refclk = Signal()
self.specials += Instance("IBUFDS_GTE3",
i_CEB=0,
i_I=clock_pads.p,
i_IB=clock_pads.n,
o_O=refclk)
rtio_tx_clk = Signal()
channel_interfaces = []
for i in range(nchannels):
mode = "master" if i == master else "slave"
gth = GTHSingle(plls[i], tx_pads[i], rx_pads[i], sys_clk_freq, dw, mode)
gth = GTHSingle(refclk, tx_pads[i], rx_pads[i], sys_clk_freq, rtio_clk_freq, dw, mode)
if mode == "master":
self.comb += rtio_tx_clk.eq(gth.cd_rtio_tx.clk)
else:

View File

@ -1,8 +1,13 @@
from math import ceil
from functools import reduce
from operator import add
from litex.gen import *
from litex.gen.genlib.cdc import MultiReg
from litex.gen.genlib.misc import WaitTimer
from migen import *
from migen.genlib.cdc import MultiReg, PulseSynchronizer
from migen.genlib.misc import WaitTimer
__all__ = ["BruteforceClockAligner", "GTHInit"]
class GTHInit(Module):
@ -135,3 +140,111 @@ class GTHInit(Module):
self.done.eq(1),
If(self.restart, NextState("RESET_ALL"))
)
# Changes the phase of the transceiver RX clock to align the comma to
# the LSBs of RXDATA, fixing the latency.
#
# This is implemented by repeatedly resetting the transceiver until it
# gives out the correct phase. Each reset gives a random phase.
#
# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA
# would achieve this faster and in a cleaner way. But:
# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead
# of 1 UI at every pulse. It is unclear what the latency becomes.
# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed.
# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless
# transceiver "feature".
#
# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped
# compared to the usual 8b10b binary representation.
class BruteforceClockAligner(Module):
def __init__(self, comma, tx_clk_freq, check_period=6e-3):
self.rxdata = Signal(20)
self.restart = Signal()
self.ready = Signal()
check_max_val = ceil(check_period*tx_clk_freq)
check_counter = Signal(max=check_max_val+1)
check = Signal()
reset_check_counter = Signal()
self.sync.rtio_tx += [
check.eq(0),
If(reset_check_counter,
check_counter.eq(check_max_val)
).Else(
If(check_counter == 0,
check.eq(1),
check_counter.eq(check_max_val)
).Else(
check_counter.eq(check_counter-1)
)
)
]
checks_reset = PulseSynchronizer("rtio_tx", "rtio_rx")
self.submodules += checks_reset
comma_n = ~comma & 0b1111111111
comma_seen_rxclk = Signal()
comma_seen = Signal()
comma_seen_rxclk.attr.add("no_retiming")
self.specials += MultiReg(comma_seen_rxclk, comma_seen)
self.sync.rtio_rx += \
If(checks_reset.o,
comma_seen_rxclk.eq(0)
).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n),
comma_seen_rxclk.eq(1)
)
error_seen_rxclk = Signal()
error_seen = Signal()
error_seen_rxclk.attr.add("no_retiming")
self.specials += MultiReg(error_seen_rxclk, error_seen)
rx1cnt = Signal(max=11)
self.sync.rtio_rx += [
rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])),
If(checks_reset.o,
error_seen_rxclk.eq(0)
).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6),
error_seen_rxclk.eq(1)
)
]
fsm = ClockDomainsRenamer("rtio_tx")(FSM(reset_state="WAIT_COMMA"))
self.submodules += fsm
fsm.act("WAIT_COMMA",
If(check,
# Errors are still OK at this stage, as the transceiver
# has just been reset and may output garbage data.
If(comma_seen,
NextState("WAIT_NOERROR")
).Else(
self.restart.eq(1)
),
checks_reset.i.eq(1)
)
)
fsm.act("WAIT_NOERROR",
If(check,
If(comma_seen & ~error_seen,
NextState("READY")
).Else(
self.restart.eq(1),
NextState("WAIT_COMMA")
),
checks_reset.i.eq(1)
)
)
fsm.act("READY",
reset_check_counter.eq(1),
self.ready.eq(1),
If(error_seen,
checks_reset.i.eq(1),
self.restart.eq(1),
NextState("WAIT_COMMA")
)
)