From dc7addf3949b4cef1594ae3d1a2f7863ca877a38 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Thu, 31 Dec 2020 13:29:50 +0800 Subject: [PATCH 1/8] Revert "drtio: remove KC705/GTX support" This reverts commit ebdbaaad3250778dd6570fc17db22e2851b3ce2b. --- .../gateware/drtio/transceiver/gtx_7series.py | 265 ++++++++++++++++++ .../drtio/transceiver/gtx_7series_init.py | 226 +++++++++++++++ artiq/gateware/targets/kc705_drtio_master.py | 116 ++++++++ .../gateware/targets/kc705_drtio_satellite.py | 114 ++++++++ 4 files changed, 721 insertions(+) create mode 100644 artiq/gateware/drtio/transceiver/gtx_7series.py create mode 100644 artiq/gateware/drtio/transceiver/gtx_7series_init.py create mode 100755 artiq/gateware/targets/kc705_drtio_master.py create mode 100755 artiq/gateware/targets/kc705_drtio_satellite.py diff --git a/artiq/gateware/drtio/transceiver/gtx_7series.py b/artiq/gateware/drtio/transceiver/gtx_7series.py new file mode 100644 index 000000000..7b6cfaecf --- /dev/null +++ b/artiq/gateware/drtio/transceiver/gtx_7series.py @@ -0,0 +1,265 @@ +from migen import * +from migen.genlib.resetsync import AsyncResetSynchronizer + +from misoc.cores.code_8b10b import Encoder, Decoder +from misoc.interconnect.csr import * + +from artiq.gateware.drtio.core import TransceiverInterface, ChannelInterface +from artiq.gateware.drtio.transceiver.gtx_7series_init import * + + +class GTX_20X(Module, TransceiverInterface): + # Only one channel is supported. + # + # The transceiver clock on clock_pads must be at the RTIO clock + # frequency when clock_div2=False, and 2x that frequency when + # clock_div2=True. + def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq, + clock_div2=False): + encoder = ClockDomainsRenamer("rtio")( + Encoder(2, True)) + self.submodules += encoder + decoders = [ClockDomainsRenamer("rtio_rx0")( + (Decoder(True))) for _ in range(2)] + self.submodules += decoders + + TransceiverInterface.__init__(self, [ChannelInterface(encoder, decoders)]) + + # transceiver direct clock outputs + # useful to specify clock constraints in a way palatable to Vivado + self.txoutclk = Signal() + self.rxoutclk = Signal() + + # # # + + refclk = Signal() + if clock_div2: + self.specials += Instance("IBUFDS_GTE2", + i_CEB=0, + i_I=clock_pads.p, + i_IB=clock_pads.n, + o_ODIV2=refclk + ) + else: + self.specials += Instance("IBUFDS_GTE2", + i_CEB=0, + i_I=clock_pads.p, + i_IB=clock_pads.n, + o_O=refclk + ) + + cplllock = Signal() + # TX generates RTIO clock, init must be in system domain + tx_init = GTXInit(sys_clk_freq, False) + # RX receives restart commands from RTIO domain + rx_init = ClockDomainsRenamer("rtio")( + GTXInit(self.rtio_clk_freq, True)) + self.submodules += tx_init, rx_init + self.comb += tx_init.cplllock.eq(cplllock), \ + rx_init.cplllock.eq(cplllock) + + txdata = Signal(20) + rxdata = Signal(20) + self.specials += \ + Instance("GTXE2_CHANNEL", + # PMA Attributes + p_PMA_RSV=0x00018480, + p_PMA_RSV2=0x2050, + p_PMA_RSV3=0, + p_PMA_RSV4=0, + p_RX_BIAS_CFG=0b100, + p_RX_CM_TRIM=0b010, + p_RX_OS_CFG=0b10000000, + p_RX_CLK25_DIV=5, + p_TX_CLK25_DIV=5, + + # Power-Down Attributes + p_PD_TRANS_TIME_FROM_P2=0x3c, + p_PD_TRANS_TIME_NONE_P2=0x3c, + p_PD_TRANS_TIME_TO_P2=0x64, + + # CPLL + p_CPLL_CFG=0xBC07DC, + p_CPLL_FBDIV=4, + p_CPLL_FBDIV_45=5, + p_CPLL_REFCLK_DIV=1, + p_RXOUT_DIV=2, + p_TXOUT_DIV=2, + o_CPLLLOCK=cplllock, + i_CPLLLOCKEN=1, + i_CPLLREFCLKSEL=0b001, + i_TSTIN=2**20-1, + i_GTREFCLK0=refclk, + + # TX clock + p_TXBUF_EN="FALSE", + p_TX_XCLK_SEL="TXUSR", + o_TXOUTCLK=self.txoutclk, + i_TXSYSCLKSEL=0b00, + i_TXOUTCLKSEL=0b11, + + # TX Startup/Reset + i_GTTXRESET=tx_init.gtXxreset, + o_TXRESETDONE=tx_init.Xxresetdone, + i_TXDLYSRESET=tx_init.Xxdlysreset, + o_TXDLYSRESETDONE=tx_init.Xxdlysresetdone, + o_TXPHALIGNDONE=tx_init.Xxphaligndone, + i_TXUSERRDY=tx_init.Xxuserrdy, + + # TX data + p_TX_DATA_WIDTH=20, + p_TX_INT_DATAWIDTH=0, + i_TXCHARDISPMODE=Cat(txdata[9], txdata[19]), + i_TXCHARDISPVAL=Cat(txdata[8], txdata[18]), + i_TXDATA=Cat(txdata[:8], txdata[10:18]), + i_TXUSRCLK=ClockSignal("rtio"), + i_TXUSRCLK2=ClockSignal("rtio"), + + # TX electrical + i_TXBUFDIFFCTRL=0b100, + i_TXDIFFCTRL=0b1000, + + # RX Startup/Reset + i_GTRXRESET=rx_init.gtXxreset, + o_RXRESETDONE=rx_init.Xxresetdone, + i_RXDLYSRESET=rx_init.Xxdlysreset, + o_RXDLYSRESETDONE=rx_init.Xxdlysresetdone, + o_RXPHALIGNDONE=rx_init.Xxphaligndone, + i_RXUSERRDY=rx_init.Xxuserrdy, + + # RX AFE + p_RX_DFE_XYD_CFG=0, + i_RXDFEXYDEN=1, + i_RXDFEXYDHOLD=0, + i_RXDFEXYDOVRDEN=0, + i_RXLPMEN=0, + + # RX clock + p_RXBUF_EN="FALSE", + p_RX_XCLK_SEL="RXUSR", + i_RXDDIEN=1, + i_RXSYSCLKSEL=0b00, + i_RXOUTCLKSEL=0b010, + o_RXOUTCLK=self.rxoutclk, + i_RXUSRCLK=ClockSignal("rtio_rx0"), + i_RXUSRCLK2=ClockSignal("rtio_rx0"), + p_RXCDR_CFG=0x03000023FF10100020, + + # RX Clock Correction Attributes + p_CLK_CORRECT_USE="FALSE", + p_CLK_COR_SEQ_1_1=0b0100000000, + p_CLK_COR_SEQ_2_1=0b0100000000, + p_CLK_COR_SEQ_1_ENABLE=0b1111, + p_CLK_COR_SEQ_2_ENABLE=0b1111, + + # RX data + p_RX_DATA_WIDTH=20, + p_RX_INT_DATAWIDTH=0, + o_RXDISPERR=Cat(rxdata[9], rxdata[19]), + o_RXCHARISK=Cat(rxdata[8], rxdata[18]), + o_RXDATA=Cat(rxdata[:8], rxdata[10:18]), + + # Pads + i_GTXRXP=rx_pads.p, + i_GTXRXN=rx_pads.n, + o_GTXTXP=tx_pads.p, + o_GTXTXN=tx_pads.n, + ) + + tx_reset_deglitched = Signal() + tx_reset_deglitched.attr.add("no_retiming") + self.sync += tx_reset_deglitched.eq(~tx_init.done) + self.specials += [ + Instance("BUFG", i_I=self.txoutclk, o_O=self.cd_rtio.clk), + AsyncResetSynchronizer(self.cd_rtio, tx_reset_deglitched) + ] + rx_reset_deglitched = Signal() + rx_reset_deglitched.attr.add("no_retiming") + self.sync.rtio += rx_reset_deglitched.eq(~rx_init.done) + self.specials += [ + Instance("BUFG", i_I=self.rxoutclk, o_O=self.cd_rtio_rx0.clk), + AsyncResetSynchronizer(self.cd_rtio_rx0, rx_reset_deglitched) + ] + + chan = self.channels[0] + self.comb += [ + txdata.eq(Cat(chan.encoder.output[0], chan.encoder.output[1])), + chan.decoders[0].input.eq(rxdata[:10]), + chan.decoders[1].input.eq(rxdata[10:]) + ] + + clock_aligner = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"})( + BruteforceClockAligner(0b0101111100, self.rtio_clk_freq)) + self.submodules += clock_aligner + self.comb += [ + clock_aligner.rxdata.eq(rxdata), + rx_init.restart.eq(clock_aligner.restart), + chan.rx_ready.eq(clock_aligner.ready) + ] + + +class GTX_1000BASE_BX10(GTX_20X): + rtio_clk_freq = 62.5e6 + + +class RXSynchronizer(Module, AutoCSR): + """Delays the data received in the rtio_rx domain by a configurable amount + so that it meets s/h in the rtio domain, and recapture it in the rtio + domain. This has fixed latency. + + Since Xilinx doesn't provide decent on-chip delay lines, we implement the + delay with MMCM that provides a clock and a finely configurable phase, used + to resample the data. + + The phase has to be determined either empirically or by making sense of the + Xilinx scriptures (when existent) and should be constant for a given design + placement. + """ + def __init__(self, rtio_clk_freq, initial_phase=0.0): + self.phase_shift = CSR() + self.phase_shift_done = CSRStatus() + + self.clock_domains.cd_rtio_delayed = ClockDomain() + + mmcm_output = Signal() + mmcm_fb = Signal() + mmcm_locked = Signal() + # maximize VCO frequency to maximize phase shift resolution + mmcm_mult = 1200e6//rtio_clk_freq + self.specials += [ + Instance("MMCME2_ADV", + p_CLKIN1_PERIOD=1e9/rtio_clk_freq, + i_CLKIN1=ClockSignal("rtio_rx"), + i_RST=ResetSignal("rtio_rx"), + i_CLKINSEL=1, # yes, 1=CLKIN1 0=CLKIN2 + + p_CLKFBOUT_MULT_F=mmcm_mult, + p_CLKOUT0_DIVIDE_F=mmcm_mult, + p_CLKOUT0_PHASE=initial_phase, + p_DIVCLK_DIVIDE=1, + + # According to Xilinx, there is no guarantee of input/output + # phase relationship when using internal feedback. We assume + # here that the input/ouput skew is constant to save BUFGs. + o_CLKFBOUT=mmcm_fb, + i_CLKFBIN=mmcm_fb, + + p_CLKOUT0_USE_FINE_PS="TRUE", + o_CLKOUT0=mmcm_output, + o_LOCKED=mmcm_locked, + + i_PSCLK=ClockSignal(), + i_PSEN=self.phase_shift.re, + i_PSINCDEC=self.phase_shift.r, + o_PSDONE=self.phase_shift_done.status, + ), + Instance("BUFR", i_I=mmcm_output, o_O=self.cd_rtio_delayed.clk), + AsyncResetSynchronizer(self.cd_rtio_delayed, ~mmcm_locked) + ] + + def resync(self, signal): + delayed = Signal.like(signal, related=signal) + synchronized = Signal.like(signal, related=signal) + self.sync.rtio_delayed += delayed.eq(signal) + self.sync.rtio += synchronized.eq(delayed) + return synchronized diff --git a/artiq/gateware/drtio/transceiver/gtx_7series_init.py b/artiq/gateware/drtio/transceiver/gtx_7series_init.py new file mode 100644 index 000000000..2b7ae39bc --- /dev/null +++ b/artiq/gateware/drtio/transceiver/gtx_7series_init.py @@ -0,0 +1,226 @@ +from math import ceil +from functools import reduce +from operator import add + +from migen import * +from migen.genlib.cdc import MultiReg, PulseSynchronizer +from migen.genlib.misc import WaitTimer +from migen.genlib.fsm import FSM + + +class GTXInit(Module): + # Based on LiteSATA by Enjoy-Digital + def __init__(self, sys_clk_freq, rx): + self.done = Signal() + self.restart = Signal() + + # GTX signals + self.cplllock = Signal() + self.gtXxreset = Signal() + self.Xxresetdone = Signal() + self.Xxdlysreset = Signal() + self.Xxdlysresetdone = Signal() + self.Xxphaligndone = Signal() + self.Xxuserrdy = Signal() + + # # # + + # Double-latch transceiver asynch outputs + cplllock = Signal() + Xxresetdone = Signal() + Xxdlysresetdone = Signal() + Xxphaligndone = Signal() + self.specials += [ + MultiReg(self.cplllock, cplllock), + MultiReg(self.Xxresetdone, Xxresetdone), + MultiReg(self.Xxdlysresetdone, Xxdlysresetdone), + MultiReg(self.Xxphaligndone, Xxphaligndone), + ] + + # Deglitch FSM outputs driving transceiver asynch inputs + gtXxreset = Signal() + Xxdlysreset = Signal() + Xxuserrdy = Signal() + self.sync += [ + self.gtXxreset.eq(gtXxreset), + self.Xxdlysreset.eq(Xxdlysreset), + self.Xxuserrdy.eq(Xxuserrdy) + ] + + # After configuration, transceiver resets have to stay low for + # at least 500ns (see AR43482) + startup_cycles = ceil(500*sys_clk_freq/1000000000) + startup_timer = WaitTimer(startup_cycles) + self.submodules += startup_timer + + startup_fsm = FSM(reset_state="INITIAL") + self.submodules += startup_fsm + + if rx: + cdr_stable_timer = WaitTimer(1024) + self.submodules += cdr_stable_timer + + Xxphaligndone_r = Signal(reset=1) + Xxphaligndone_rising = Signal() + self.sync += Xxphaligndone_r.eq(Xxphaligndone) + self.comb += Xxphaligndone_rising.eq(Xxphaligndone & ~Xxphaligndone_r) + + startup_fsm.act("INITIAL", + startup_timer.wait.eq(1), + If(startup_timer.done, NextState("RESET_GTX")) + ) + startup_fsm.act("RESET_GTX", + gtXxreset.eq(1), + NextState("WAIT_CPLL") + ) + startup_fsm.act("WAIT_CPLL", + gtXxreset.eq(1), + If(cplllock, NextState("RELEASE_RESET")) + ) + # Release GTX reset and wait for GTX resetdone + # (from UG476, GTX is reset on falling edge + # of gttxreset) + if rx: + startup_fsm.act("RELEASE_RESET", + Xxuserrdy.eq(1), + cdr_stable_timer.wait.eq(1), + If(Xxresetdone & cdr_stable_timer.done, NextState("ALIGN")) + ) + else: + startup_fsm.act("RELEASE_RESET", + Xxuserrdy.eq(1), + If(Xxresetdone, NextState("ALIGN")) + ) + # Start delay alignment (pulse) + startup_fsm.act("ALIGN", + Xxuserrdy.eq(1), + Xxdlysreset.eq(1), + NextState("WAIT_ALIGN") + ) + # Wait for delay alignment + startup_fsm.act("WAIT_ALIGN", + Xxuserrdy.eq(1), + If(Xxdlysresetdone, NextState("WAIT_FIRST_ALIGN_DONE")) + ) + # Wait 2 rising edges of rxphaligndone + # (from UG476 in buffer bypass config) + startup_fsm.act("WAIT_FIRST_ALIGN_DONE", + Xxuserrdy.eq(1), + If(Xxphaligndone_rising, NextState("WAIT_SECOND_ALIGN_DONE")) + ) + startup_fsm.act("WAIT_SECOND_ALIGN_DONE", + Xxuserrdy.eq(1), + If(Xxphaligndone_rising, NextState("READY")) + ) + startup_fsm.act("READY", + Xxuserrdy.eq(1), + self.done.eq(1), + If(self.restart, NextState("RESET_GTX")) + ) + + +# Changes the phase of the transceiver RX clock to align the comma to +# the LSBs of RXDATA, fixing the latency. +# +# This is implemented by repeatedly resetting the transceiver until it +# gives out the correct phase. Each reset gives a random phase. +# +# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA +# would achieve this faster and in a cleaner way. But: +# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead +# of 1 UI at every pulse. It is unclear what the latency becomes. +# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. +# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless +# transceiver "feature". +# +# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped +# compared to the usual 8b10b binary representation. +class BruteforceClockAligner(Module): + def __init__(self, comma, rtio_clk_freq, check_period=6e-3): + self.rxdata = Signal(20) + self.restart = Signal() + + self.ready = Signal() + + check_max_val = ceil(check_period*rtio_clk_freq) + check_counter = Signal(max=check_max_val+1) + check = Signal() + reset_check_counter = Signal() + self.sync.rtio += [ + check.eq(0), + If(reset_check_counter, + check_counter.eq(check_max_val) + ).Else( + If(check_counter == 0, + check.eq(1), + check_counter.eq(check_max_val) + ).Else( + check_counter.eq(check_counter-1) + ) + ) + ] + + checks_reset = PulseSynchronizer("rtio", "rtio_rx") + self.submodules += checks_reset + + comma_n = ~comma & 0b1111111111 + comma_seen_rxclk = Signal() + comma_seen = Signal() + comma_seen_rxclk.attr.add("no_retiming") + self.specials += MultiReg(comma_seen_rxclk, comma_seen) + self.sync.rtio_rx += \ + If(checks_reset.o, + comma_seen_rxclk.eq(0) + ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), + comma_seen_rxclk.eq(1) + ) + + error_seen_rxclk = Signal() + error_seen = Signal() + error_seen_rxclk.attr.add("no_retiming") + self.specials += MultiReg(error_seen_rxclk, error_seen) + rx1cnt = Signal(max=11) + self.sync.rtio_rx += [ + rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])), + If(checks_reset.o, + error_seen_rxclk.eq(0) + ).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6), + error_seen_rxclk.eq(1) + ) + ] + + fsm = ClockDomainsRenamer("rtio")(FSM(reset_state="WAIT_COMMA")) + self.submodules += fsm + + fsm.act("WAIT_COMMA", + If(check, + # Errors are still OK at this stage, as the transceiver + # has just been reset and may output garbage data. + If(comma_seen, + NextState("WAIT_NOERROR") + ).Else( + self.restart.eq(1) + ), + checks_reset.i.eq(1) + ) + ) + fsm.act("WAIT_NOERROR", + If(check, + If(comma_seen & ~error_seen, + NextState("READY") + ).Else( + self.restart.eq(1), + NextState("WAIT_COMMA") + ), + checks_reset.i.eq(1) + ) + ) + fsm.act("READY", + reset_check_counter.eq(1), + self.ready.eq(1), + If(error_seen, + checks_reset.i.eq(1), + self.restart.eq(1), + NextState("WAIT_COMMA") + ) + ) diff --git a/artiq/gateware/targets/kc705_drtio_master.py b/artiq/gateware/targets/kc705_drtio_master.py new file mode 100755 index 000000000..c1bf28fb4 --- /dev/null +++ b/artiq/gateware/targets/kc705_drtio_master.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +import argparse + +from migen import * +from migen.build.generic_platform import * + +from misoc.cores import spi as spi_csr +from misoc.targets.kc705 import MiniSoC, soc_kc705_args, soc_kc705_argdict +from misoc.integration.builder import builder_args, builder_argdict + +from artiq.gateware.amp import AMPSoC, build_artiq_soc +from artiq.gateware import rtio +from artiq.gateware.rtio.phy import ttl_simple +from artiq.gateware.drtio.transceiver import gtx_7series +from artiq.gateware.drtio import DRTIOMaster +from artiq import __version__ as artiq_version + + +class Master(MiniSoC, AMPSoC): + mem_map = { + "cri_con": 0x10000000, + "rtio": 0x20000000, + "rtio_dma": 0x30000000, + "drtio_aux": 0x50000000, + "mailbox": 0x70000000 + } + mem_map.update(MiniSoC.mem_map) + + def __init__(self, **kwargs): + MiniSoC.__init__(self, + cpu_type="or1k", + sdram_controller_type="minicon", + l2_size=128*1024, + ident=artiq_version, + ethmac_nrxslots=4, + ethmac_ntxslots=4, + **kwargs) + AMPSoC.__init__(self) + + platform = self.platform + + self.comb += platform.request("sfp_tx_disable_n").eq(1) + tx_pads = platform.request("sfp_tx") + rx_pads = platform.request("sfp_rx") + + # 1000BASE_BX10 Ethernet compatible, 62.5MHz RTIO clock + self.submodules.transceiver = gtx_7series.GTX_1000BASE_BX10( + clock_pads=platform.request("sgmii_clock"), + tx_pads=tx_pads, + rx_pads=rx_pads, + sys_clk_freq=self.clk_freq, + clock_div2=True) + + self.submodules.drtio0 = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"})( + DRTIOMaster(self.transceiver.channels[0])) + self.csr_devices.append("drtio0") + self.add_wb_slave(self.mem_map["drtio_aux"], 0x800, + self.drtio0.aux_controller.bus) + self.add_memory_region("drtio0_aux", self.mem_map["drtio_aux"] | self.shadow_base, 0x800) + self.config["HAS_DRTIO"] = None + self.add_csr_group("drtio", ["drtio0"]) + self.add_memory_group("drtio_aux", ["drtio0_aux"]) + + self.comb += [ + platform.request("user_sma_clock_p").eq(ClockSignal("rtio_rx0")), + platform.request("user_sma_clock_n").eq(ClockSignal("rtio")) + ] + + rtio_clk_period = 1e9/self.transceiver.rtio_clk_freq + platform.add_period_constraint(self.transceiver.txoutclk, rtio_clk_period) + platform.add_period_constraint(self.transceiver.rxoutclk, rtio_clk_period) + platform.add_false_path_constraints( + self.crg.cd_sys.clk, + self.transceiver.txoutclk, self.transceiver.rxoutclk) + + rtio_channels = [] + for i in range(8): + phy = ttl_simple.Output(platform.request("user_led", i)) + self.submodules += phy + rtio_channels.append(rtio.Channel.from_phy(phy)) + for sma in "user_sma_gpio_p", "user_sma_gpio_n": + phy = ttl_simple.InOut(platform.request(sma)) + self.submodules += phy + rtio_channels.append(rtio.Channel.from_phy(phy)) + + self.submodules.rtio_moninj = rtio.MonInj(rtio_channels) + self.csr_devices.append("rtio_moninj") + + self.submodules.rtio_core = rtio.Core(rtio_channels, 3) + self.csr_devices.append("rtio_core") + + self.submodules.rtio = rtio.KernelInitiator() + self.submodules.rtio_dma = ClockDomainsRenamer("sys_kernel")( + rtio.DMA(self.get_native_sdram_if())) + self.register_kernel_cpu_csrdevice("rtio") + self.register_kernel_cpu_csrdevice("rtio_dma") + self.submodules.cri_con = rtio.CRIInterconnectShared( + [self.rtio.cri, self.rtio_dma.cri], + [self.rtio_core.cri, self.drtio0.cri]) + self.register_kernel_cpu_csrdevice("cri_con") + + +def main(): + parser = argparse.ArgumentParser( + description="ARTIQ device binary builder / KC705 DRTIO master") + builder_args(parser) + soc_kc705_args(parser) + args = parser.parse_args() + + soc = Master(**soc_kc705_argdict(args)) + build_artiq_soc(soc, builder_argdict(args)) + + +if __name__ == "__main__": + main() diff --git a/artiq/gateware/targets/kc705_drtio_satellite.py b/artiq/gateware/targets/kc705_drtio_satellite.py new file mode 100755 index 000000000..0f318a0d3 --- /dev/null +++ b/artiq/gateware/targets/kc705_drtio_satellite.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import argparse +import os + +from migen import * +from migen.build.generic_platform import * +from misoc.cores import spi as spi_csr +from misoc.cores import gpio +from misoc.integration.builder import * +from misoc.targets.kc705 import BaseSoC, soc_kc705_args, soc_kc705_argdict + +from artiq.gateware import rtio +from artiq.gateware.rtio.phy import ttl_simple +from artiq.gateware.drtio.transceiver import gtx_7series +from artiq.gateware.drtio import DRTIOSatellite +from artiq import __version__ as artiq_version +from artiq import __artiq_dir__ as artiq_dir + + +class Satellite(BaseSoC): + mem_map = { + "drtio_aux": 0x50000000, + } + mem_map.update(BaseSoC.mem_map) + + def __init__(self, **kwargs): + BaseSoC.__init__(self, + cpu_type="or1k", + sdram_controller_type="minicon", + l2_size=128*1024, + ident=artiq_version, + **kwargs) + + platform = self.platform + + rtio_channels = [] + for i in range(8): + phy = ttl_simple.Output(platform.request("user_led", i)) + self.submodules += phy + rtio_channels.append(rtio.Channel.from_phy(phy)) + for sma in "user_sma_gpio_p", "user_sma_gpio_n": + phy = ttl_simple.InOut(platform.request(sma)) + self.submodules += phy + rtio_channels.append(rtio.Channel.from_phy(phy)) + + self.submodules.rtio_moninj = rtio.MonInj(rtio_channels) + self.csr_devices.append("rtio_moninj") + + self.comb += platform.request("sfp_tx_disable_n").eq(1) + + # 1000BASE_BX10 Ethernet compatible, 62.5MHz RTIO clock + self.submodules.transceiver = gtx_7series.GTX_1000BASE_BX10( + clock_pads=platform.request("si5324_clkout"), + tx_pads=platform.request("sfp_tx"), + rx_pads=platform.request("sfp_rx"), + sys_clk_freq=self.clk_freq) + rx0 = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"}) + self.submodules.rx_synchronizer0 = rx0(gtx_7series.RXSynchronizer( + self.transceiver.rtio_clk_freq, initial_phase=180.0)) + self.submodules.drtio0 = rx0(DRTIOSatellite( + self.transceiver.channels[0], rtio_channels, self.rx_synchronizer0)) + self.csr_devices.append("rx_synchronizer0") + self.csr_devices.append("drtio0") + self.add_wb_slave(self.mem_map["drtio_aux"], 0x800, + self.drtio0.aux_controller.bus) + self.add_memory_region("drtio0_aux", self.mem_map["drtio_aux"] | self.shadow_base, 0x800) + self.config["HAS_DRTIO"] = None + self.add_csr_group("drtio", ["drtio0"]) + self.add_memory_group("drtio_aux", ["drtio0_aux"]) + + self.config["RTIO_FREQUENCY"] = str(self.transceiver.rtio_clk_freq/1e6) + si5324_clkin = platform.request("si5324_clkin") + self.specials += \ + Instance("OBUFDS", + i_I=ClockSignal("rtio_rx0"), + o_O=si5324_clkin.p, o_OB=si5324_clkin.n + ) + self.submodules.si5324_rst_n = gpio.GPIOOut(platform.request("si5324").rst_n) + self.csr_devices.append("si5324_rst_n") + i2c = self.platform.request("i2c") + self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) + self.csr_devices.append("i2c") + self.config["I2C_BUS_COUNT"] = 1 + self.config["HAS_SI5324"] = None + + self.comb += [ + platform.request("user_sma_clock_p").eq(ClockSignal("rtio_rx0")), + platform.request("user_sma_clock_n").eq(ClockSignal("rtio")) + ] + + rtio_clk_period = 1e9/self.transceiver.rtio_clk_freq + platform.add_period_constraint(self.transceiver.txoutclk, rtio_clk_period) + platform.add_period_constraint(self.transceiver.rxoutclk, rtio_clk_period) + platform.add_false_path_constraints( + platform.lookup_request("clk200"), + self.transceiver.txoutclk, self.transceiver.rxoutclk) + + +def main(): + parser = argparse.ArgumentParser( + description="ARTIQ device binary builder / KC705 DRTIO satellite") + builder_args(parser) + soc_kc705_args(parser) + args = parser.parse_args() + + soc = Satellite(**soc_kc705_argdict(args)) + builder = Builder(soc, **builder_argdict(args)) + builder.add_software_package("satman", os.path.join(artiq_dir, "firmware", "satman")) + builder.build() + + +if __name__ == "__main__": + main() From f25e86e93442e02fa4e50af386d42408581b8517 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Wed, 20 Jan 2021 11:25:38 +0800 Subject: [PATCH 2/8] kc705: revive DRTIO satellite with updated syntax, update GTX * Multi-channel has not been implemented yet. --- .../gateware/drtio/transceiver/gtx_7series.py | 148 ++++++++++++++---- .../drtio/transceiver/gtx_7series_init.py | 25 ++- .../gateware/targets/kc705_drtio_satellite.py | 145 ++++++++++------- 3 files changed, 220 insertions(+), 98 deletions(-) diff --git a/artiq/gateware/drtio/transceiver/gtx_7series.py b/artiq/gateware/drtio/transceiver/gtx_7series.py index 7b6cfaecf..e2b1378a9 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series.py @@ -5,17 +5,18 @@ from misoc.cores.code_8b10b import Encoder, Decoder from misoc.interconnect.csr import * from artiq.gateware.drtio.core import TransceiverInterface, ChannelInterface +from artiq.gateware.drtio.transceiver.clock_aligner import BruteforceClockAligner from artiq.gateware.drtio.transceiver.gtx_7series_init import * class GTX_20X(Module, TransceiverInterface): - # Only one channel is supported. - # - # The transceiver clock on clock_pads must be at the RTIO clock - # frequency when clock_div2=False, and 2x that frequency when - # clock_div2=True. - def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq, - clock_div2=False): + # Settings: + # * GTX reference clock (at clock_pads) @ 125MHz == coarse RTIO frequency + # * GTX data width = 20 + # * GTX PLL frequency @ 2.5GHz + # * GTX line rate (TX & RX) @ 2.5Gb/s + # * GTX TX/RX USRCLK @ 125MHz == coarse RTIO frequency + def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq): encoder = ClockDomainsRenamer("rtio")( Encoder(2, True)) self.submodules += encoder @@ -33,21 +34,17 @@ class GTX_20X(Module, TransceiverInterface): # # # refclk = Signal() - if clock_div2: - self.specials += Instance("IBUFDS_GTE2", - i_CEB=0, - i_I=clock_pads.p, - i_IB=clock_pads.n, - o_ODIV2=refclk - ) - else: - self.specials += Instance("IBUFDS_GTE2", - i_CEB=0, - i_I=clock_pads.p, - i_IB=clock_pads.n, - o_O=refclk - ) + stable_clkin_n = Signal() + self.stable_clkin.storage.attr.add("no_retiming") + self.comb += stable_clkin_n.eq(~self.stable_clkin.storage) + self.specials += Instance("IBUFDS_GTE2", + i_CEB=stable_clkin_n, + i_I=clock_pads.p, + i_IB=clock_pads.n, + o_O=refclk + ) + cpllreset = Signal() cplllock = Signal() # TX generates RTIO clock, init must be in system domain tx_init = GTXInit(sys_clk_freq, False) @@ -55,8 +52,11 @@ class GTX_20X(Module, TransceiverInterface): rx_init = ClockDomainsRenamer("rtio")( GTXInit(self.rtio_clk_freq, True)) self.submodules += tx_init, rx_init - self.comb += tx_init.cplllock.eq(cplllock), \ - rx_init.cplllock.eq(cplllock) + self.comb += [ + cpllreset.eq(tx_init.cpllreset), + tx_init.cplllock.eq(cplllock), + rx_init.cplllock.eq(cplllock) + ] txdata = Signal(20) rxdata = Signal(20) @@ -64,12 +64,11 @@ class GTX_20X(Module, TransceiverInterface): Instance("GTXE2_CHANNEL", # PMA Attributes p_PMA_RSV=0x00018480, - p_PMA_RSV2=0x2050, + p_PMA_RSV2=0x2050, # PMA_RSV2[5] = 0: Eye scan feature disabled p_PMA_RSV3=0, - p_PMA_RSV4=0, - p_RX_BIAS_CFG=0b100, - p_RX_CM_TRIM=0b010, - p_RX_OS_CFG=0b10000000, + p_PMA_RSV4=1, # PMA_RSV[4],RX_CM_TRIM[2:0] = 0b1010: Common mode 800mV + p_RX_BIAS_CFG=0b000000000100, + p_RX_OS_CFG=0b0000010000000, p_RX_CLK25_DIV=5, p_TX_CLK25_DIV=5, @@ -85,6 +84,8 @@ class GTX_20X(Module, TransceiverInterface): p_CPLL_REFCLK_DIV=1, p_RXOUT_DIV=2, p_TXOUT_DIV=2, + i_CPLLRESET=cpllreset, + i_CPLLPD=cpllreset, o_CPLLLOCK=cplllock, i_CPLLLOCKEN=1, i_CPLLREFCLKSEL=0b001, @@ -105,6 +106,9 @@ class GTX_20X(Module, TransceiverInterface): o_TXDLYSRESETDONE=tx_init.Xxdlysresetdone, o_TXPHALIGNDONE=tx_init.Xxphaligndone, i_TXUSERRDY=tx_init.Xxuserrdy, + p_TXPMARESET_TIME=1, + p_TXPCSRESET_TIME=1, + i_TXINHIBIT=~self.txenable.storage, # TX data p_TX_DATA_WIDTH=20, @@ -126,24 +130,36 @@ class GTX_20X(Module, TransceiverInterface): o_RXDLYSRESETDONE=rx_init.Xxdlysresetdone, o_RXPHALIGNDONE=rx_init.Xxphaligndone, i_RXUSERRDY=rx_init.Xxuserrdy, + p_RXPMARESET_TIME=1, + p_RXPCSRESET_TIME=1, # RX AFE p_RX_DFE_XYD_CFG=0, + p_RX_CM_SEL=0b11, # RX_CM_SEL = 0b11: Common mode is programmable + p_RX_CM_TRIM=0b010, # PMA_RSV[4],RX_CM_TRIM[2:0] = 0b1010: Common mode 800mV i_RXDFEXYDEN=1, i_RXDFEXYDHOLD=0, i_RXDFEXYDOVRDEN=0, - i_RXLPMEN=0, + i_RXLPMEN=0, # RXLPMEN = 0: DFE mode is enabled + p_RX_DFE_GAIN_CFG=0x0207EA, + p_RX_DFE_VP_CFG=0b00011111100000011, + p_RX_DFE_UT_CFG=0b10001000000000000, + p_RX_DFE_KL_CFG=0b0000011111110, + p_RX_DFE_KL_CFG2=0x3788140A, + p_RX_DFE_H2_CFG=0b000110000000, + p_RX_DFE_H3_CFG=0b000110000000, + p_RX_DFE_H4_CFG=0b00011100000, + p_RX_DFE_H5_CFG=0b00011100000, + p_RX_DFE_LPM_CFG=0x0904, # RX_DFE_LPM_CFG = 0x0904: linerate <= 6.6Gb/s + # = 0x0104: linerate > 6.6Gb/s # RX clock - p_RXBUF_EN="FALSE", - p_RX_XCLK_SEL="RXUSR", i_RXDDIEN=1, i_RXSYSCLKSEL=0b00, i_RXOUTCLKSEL=0b010, o_RXOUTCLK=self.rxoutclk, i_RXUSRCLK=ClockSignal("rtio_rx0"), i_RXUSRCLK2=ClockSignal("rtio_rx0"), - p_RXCDR_CFG=0x03000023FF10100020, # RX Clock Correction Attributes p_CLK_CORRECT_USE="FALSE", @@ -159,11 +175,77 @@ class GTX_20X(Module, TransceiverInterface): o_RXCHARISK=Cat(rxdata[8], rxdata[18]), o_RXDATA=Cat(rxdata[:8], rxdata[10:18]), + # RX Byte and Word Alignment Attributes + p_ALIGN_COMMA_DOUBLE="FALSE", + p_ALIGN_COMMA_ENABLE=0b1111111111, + p_ALIGN_COMMA_WORD=1, + p_ALIGN_MCOMMA_DET="TRUE", + p_ALIGN_MCOMMA_VALUE=0b1010000011, + p_ALIGN_PCOMMA_DET="TRUE", + p_ALIGN_PCOMMA_VALUE=0b0101111100, + p_SHOW_REALIGN_COMMA="FALSE", + p_RXSLIDE_AUTO_WAIT=7, + p_RXSLIDE_MODE="PCS", + p_RX_SIG_VALID_DLY=10, + + # RX 8B/10B Decoder Attributes + p_RX_DISPERR_SEQ_MATCH="FALSE", + p_DEC_MCOMMA_DETECT="TRUE", + p_DEC_PCOMMA_DETECT="TRUE", + p_DEC_VALID_COMMA_ONLY="FALSE", + + # RX Buffer Attributes + p_RXBUF_ADDR_MODE="FAST", + p_RXBUF_EIDLE_HI_CNT=0b1000, + p_RXBUF_EIDLE_LO_CNT=0b0000, + p_RXBUF_EN="FALSE", + p_RX_BUFFER_CFG=0b000000, + p_RXBUF_RESET_ON_CB_CHANGE="TRUE", + p_RXBUF_RESET_ON_COMMAALIGN="FALSE", + p_RXBUF_RESET_ON_EIDLE="FALSE", # RXBUF_RESET_ON_EIDLE = FALSE: OOB is disabled + p_RXBUF_RESET_ON_RATE_CHANGE="TRUE", + p_RXBUFRESET_TIME=0b00001, + p_RXBUF_THRESH_OVFLW=61, + p_RXBUF_THRESH_OVRD="FALSE", + p_RXBUF_THRESH_UNDFLW=4, + p_RXDLY_CFG=0x001F, + p_RXDLY_LCFG=0x030, + p_RXDLY_TAP_CFG=0x0000, + p_RXPH_CFG=0xC00002, + p_RXPHDLY_CFG=0x084020, + p_RXPH_MONITOR_SEL=0b00000, + p_RX_XCLK_SEL="RXUSR", + p_RX_DDI_SEL=0b000000, + p_RX_DEFER_RESET_BUF_EN="TRUE", + + # CDR Attributes + p_RXCDR_CFG=0x03000023FF20400020, # DFE @ <= 6.6Gb/s, scrambled, CDR setting < +/- 200ppm + # (See UG476 (v1.12.1), p.206) + p_RXCDR_FR_RESET_ON_EIDLE=0b0, + p_RXCDR_HOLD_DURING_EIDLE=0b0, + p_RXCDR_PH_RESET_ON_EIDLE=0b0, + p_RXCDR_LOCK_CFG=0b010101, + + # # RX Initialization and Reset Attributes + # p_RXCDRFREQRESET_TIME=0b00001, + # p_RXCDRPHRESET_TIME=0b00001, + # p_RXISCANRESET_TIME=0b00001, + # p_RXPCSRESET_TIME=0b00001, + # p_RXPMARESET_TIME=0b00011, + # Pads i_GTXRXP=rx_pads.p, i_GTXRXN=rx_pads.n, o_GTXTXP=tx_pads.p, o_GTXTXN=tx_pads.n, + + # Other parameters + p_PCS_RSVD_ATTR=0x000, # PCS_RSVD_ATTR[1] = 0: TX Single Lane Auto Mode + # [2] = 0: RX Single Lane Auto Mode + # [8] = 0: OOB is disabled + i_RXELECIDLEMODE=0b11, # RXELECIDLEMODE = 0b11: OOB is disabled + p_RX_DFE_LPM_HOLD_DURING_EIDLE=0b0, + p_ES_EYE_SCAN_EN="TRUE", # Must be TRUE for GTX ) tx_reset_deglitched = Signal() @@ -199,7 +281,7 @@ class GTX_20X(Module, TransceiverInterface): class GTX_1000BASE_BX10(GTX_20X): - rtio_clk_freq = 62.5e6 + rtio_clk_freq = 125e6 class RXSynchronizer(Module, AutoCSR): diff --git a/artiq/gateware/drtio/transceiver/gtx_7series_init.py b/artiq/gateware/drtio/transceiver/gtx_7series_init.py index 2b7ae39bc..6598f2892 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series_init.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series_init.py @@ -16,6 +16,7 @@ class GTXInit(Module): # GTX signals self.cplllock = Signal() + self.cpllreset = Signal() self.gtXxreset = Signal() self.Xxresetdone = Signal() self.Xxdlysreset = Signal() @@ -53,6 +54,12 @@ class GTXInit(Module): startup_timer = WaitTimer(startup_cycles) self.submodules += startup_timer + # PLL reset should be 1 period of refclk + # (i.e. 1/(125MHz) for the case of RTIO @ 125MHz) + pll_reset_cycles = ceil(sys_clk_freq/125e6) + pll_reset_timer = WaitTimer(pll_reset_cycles) + self.submodules += pll_reset_timer + startup_fsm = FSM(reset_state="INITIAL") self.submodules += startup_fsm @@ -67,27 +74,29 @@ class GTXInit(Module): startup_fsm.act("INITIAL", startup_timer.wait.eq(1), - If(startup_timer.done, NextState("RESET_GTX")) + If(startup_timer.done, NextState("RESET_ALL")) ) - startup_fsm.act("RESET_GTX", + startup_fsm.act("RESET_ALL", gtXxreset.eq(1), - NextState("WAIT_CPLL") + self.cpllreset.eq(1), + pll_reset_timer.wait.eq(1), + If(pll_reset_timer.done, NextState("RELEASE_PLL_RESET")) ) - startup_fsm.act("WAIT_CPLL", + startup_fsm.act("RELEASE_PLL_RESET", gtXxreset.eq(1), - If(cplllock, NextState("RELEASE_RESET")) + If(cplllock, NextState("RELEASE_GTH_RESET")) ) # Release GTX reset and wait for GTX resetdone # (from UG476, GTX is reset on falling edge # of gttxreset) if rx: - startup_fsm.act("RELEASE_RESET", + startup_fsm.act("RELEASE_GTH_RESET", Xxuserrdy.eq(1), cdr_stable_timer.wait.eq(1), If(Xxresetdone & cdr_stable_timer.done, NextState("ALIGN")) ) else: - startup_fsm.act("RELEASE_RESET", + startup_fsm.act("RELEASE_GTH_RESET", Xxuserrdy.eq(1), If(Xxresetdone, NextState("ALIGN")) ) @@ -115,7 +124,7 @@ class GTXInit(Module): startup_fsm.act("READY", Xxuserrdy.eq(1), self.done.eq(1), - If(self.restart, NextState("RESET_GTX")) + If(self.restart, NextState("RESET_ALL")) ) diff --git a/artiq/gateware/targets/kc705_drtio_satellite.py b/artiq/gateware/targets/kc705_drtio_satellite.py index 0f318a0d3..048b1153e 100755 --- a/artiq/gateware/targets/kc705_drtio_satellite.py +++ b/artiq/gateware/targets/kc705_drtio_satellite.py @@ -5,6 +5,9 @@ import os from migen import * from migen.build.generic_platform import * +from migen.build.xilinx.vivado import XilinxVivadoToolchain +from migen.build.xilinx.ise import XilinxISEToolchain + from misoc.cores import spi as spi_csr from misoc.cores import gpio from misoc.integration.builder import * @@ -13,27 +16,101 @@ from misoc.targets.kc705 import BaseSoC, soc_kc705_args, soc_kc705_argdict from artiq.gateware import rtio from artiq.gateware.rtio.phy import ttl_simple from artiq.gateware.drtio.transceiver import gtx_7series -from artiq.gateware.drtio import DRTIOSatellite -from artiq import __version__ as artiq_version -from artiq import __artiq_dir__ as artiq_dir +from artiq.gateware.drtio.siphaser import SiPhaser7Series +from artiq.gateware.drtio.rx_synchronizer import XilinxRXSynchronizer +from artiq.gateware.drtio import * +from artiq.build_soc import * + +# DEBUG +from microscope import * class Satellite(BaseSoC): mem_map = { - "drtio_aux": 0x50000000, + "drtioaux": 0x50000000, } mem_map.update(BaseSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, - ident=artiq_version, + integrated_sram_size=8192, **kwargs) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) + + if isinstance(self.platform.toolchain, XilinxVivadoToolchain): + self.platform.toolchain.bitstream_commands.extend([ + "set_property BITSTREAM.GENERAL.COMPRESS True [current_design]", + ]) + if isinstance(self.platform.toolchain, XilinxISEToolchain): + self.platform.toolchain.bitgen_opt += " -g compress" platform = self.platform + self.comb += platform.request("sfp_tx_disable_n").eq(1) + tx_pads = platform.request("sfp_tx") + rx_pads = platform.request("sfp_rx") + + # 1000BASE_BX10 Ethernet compatible, 125MHz RTIO clock + self.submodules.drtio_transceiver = gtx_7series.GTX_1000BASE_BX10( + clock_pads=platform.request("si5324_clkout"), + tx_pads=tx_pads, + rx_pads=rx_pads, + sys_clk_freq=self.clk_freq) + self.csr_devices.append("drtio_transceiver") + + self.submodules.rtio_tsc = rtio.TSC("sync", glbl_fine_ts_width=3) + cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"}) + + self.submodules.rx_synchronizer = cdr(XilinxRXSynchronizer()) + self.submodules.drtiosat = cdr(DRTIOSatellite( + self.rtio_tsc, self.drtio_transceiver.channels[0], self.rx_synchronizer)) + self.csr_devices.append("drtiosat") + + self.submodules.drtioaux0 = cdr(DRTIOAuxController( + self.drtiosat.link_layer)) + self.csr_devices.append("drtioaux0") + self.add_wb_slave(self.mem_map["drtioaux"], 0x800, + self.drtioaux0.bus) + self.add_memory_region("drtioaux0_mem", self.mem_map["drtioaux"] | self.shadow_base, 0x800) + + self.config["HAS_DRTIO"] = None + self.add_csr_group("drtio", ["drtiosat"]) + self.add_csr_group("drtioaux", ["drtioaux0"]) + self.add_memory_group("drtioaux_mem", ["drtioaux0_mem"]) + + self.config["RTIO_FREQUENCY"] = str(self.drtio_transceiver.rtio_clk_freq/1e6) + # Si5324 Phaser + self.submodules.siphaser = SiPhaser7Series( + si5324_clkin=platform.request("si5324_clkin"), + rx_synchronizer=self.rx_synchronizer, + ultrascale=False, + rtio_clk_freq=self.drtio_transceiver.rtio_clk_freq) + platform.add_false_path_constraints( + self.crg.cd_sys.clk, self.siphaser.mmcm_freerun_output) + self.csr_devices.append("siphaser") + self.submodules.si5324_rst_n = gpio.GPIOOut(platform.request("si5324").rst_n) + self.csr_devices.append("si5324_rst_n") + i2c = self.platform.request("i2c") + self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) + self.csr_devices.append("i2c") + self.config["I2C_BUS_COUNT"] = 1 + self.config["HAS_SI5324"] = None + + self.comb += [ + platform.request("user_sma_clock_p").eq(ClockSignal("rtio_rx0")), + platform.request("user_sma_clock_n").eq(ClockSignal("rtio")) + ] + + rtio_clk_period = 1e9/self.drtio_transceiver.rtio_clk_freq + platform.add_period_constraint(self.drtio_transceiver.txoutclk, rtio_clk_period) + platform.add_period_constraint(self.drtio_transceiver.rxoutclk, rtio_clk_period) + platform.add_false_path_constraints( + self.crg.cd_sys.clk, + self.drtio_transceiver.txoutclk, self.drtio_transceiver.rxoutclk) + rtio_channels = [] for i in range(8): phy = ttl_simple.Output(platform.request("user_led", i)) @@ -47,54 +124,9 @@ class Satellite(BaseSoC): self.submodules.rtio_moninj = rtio.MonInj(rtio_channels) self.csr_devices.append("rtio_moninj") - self.comb += platform.request("sfp_tx_disable_n").eq(1) - - # 1000BASE_BX10 Ethernet compatible, 62.5MHz RTIO clock - self.submodules.transceiver = gtx_7series.GTX_1000BASE_BX10( - clock_pads=platform.request("si5324_clkout"), - tx_pads=platform.request("sfp_tx"), - rx_pads=platform.request("sfp_rx"), - sys_clk_freq=self.clk_freq) - rx0 = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"}) - self.submodules.rx_synchronizer0 = rx0(gtx_7series.RXSynchronizer( - self.transceiver.rtio_clk_freq, initial_phase=180.0)) - self.submodules.drtio0 = rx0(DRTIOSatellite( - self.transceiver.channels[0], rtio_channels, self.rx_synchronizer0)) - self.csr_devices.append("rx_synchronizer0") - self.csr_devices.append("drtio0") - self.add_wb_slave(self.mem_map["drtio_aux"], 0x800, - self.drtio0.aux_controller.bus) - self.add_memory_region("drtio0_aux", self.mem_map["drtio_aux"] | self.shadow_base, 0x800) - self.config["HAS_DRTIO"] = None - self.add_csr_group("drtio", ["drtio0"]) - self.add_memory_group("drtio_aux", ["drtio0_aux"]) - - self.config["RTIO_FREQUENCY"] = str(self.transceiver.rtio_clk_freq/1e6) - si5324_clkin = platform.request("si5324_clkin") - self.specials += \ - Instance("OBUFDS", - i_I=ClockSignal("rtio_rx0"), - o_O=si5324_clkin.p, o_OB=si5324_clkin.n - ) - self.submodules.si5324_rst_n = gpio.GPIOOut(platform.request("si5324").rst_n) - self.csr_devices.append("si5324_rst_n") - i2c = self.platform.request("i2c") - self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) - self.csr_devices.append("i2c") - self.config["I2C_BUS_COUNT"] = 1 - self.config["HAS_SI5324"] = None - - self.comb += [ - platform.request("user_sma_clock_p").eq(ClockSignal("rtio_rx0")), - platform.request("user_sma_clock_n").eq(ClockSignal("rtio")) - ] - - rtio_clk_period = 1e9/self.transceiver.rtio_clk_freq - platform.add_period_constraint(self.transceiver.txoutclk, rtio_clk_period) - platform.add_period_constraint(self.transceiver.rxoutclk, rtio_clk_period) - platform.add_false_path_constraints( - platform.lookup_request("clk200"), - self.transceiver.txoutclk, self.transceiver.rxoutclk) + self.submodules.local_io = SyncRTIO(self.rtio_tsc, rtio_channels) + self.comb += self.drtiosat.async_errors.eq(self.local_io.async_errors) + self.comb += self.drtiosat.cri.connect(self.local_io.cri) def main(): @@ -102,12 +134,11 @@ def main(): description="ARTIQ device binary builder / KC705 DRTIO satellite") builder_args(parser) soc_kc705_args(parser) + parser.set_defaults(output_dir="artiq_kc705/satellite") args = parser.parse_args() soc = Satellite(**soc_kc705_argdict(args)) - builder = Builder(soc, **builder_argdict(args)) - builder.add_software_package("satman", os.path.join(artiq_dir, "firmware", "satman")) - builder.build() + build_artiq_soc(soc, builder_argdict(args)) if __name__ == "__main__": From f6d39fd6baed3789bdf54c1ccd1a6fe4435cab4c Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Wed, 20 Jan 2021 15:05:31 +0800 Subject: [PATCH 3/8] kc705: revive DRTIO master with updated syntax * KC705 master variant now uses Si5324 as synthesiser. * Multi-channel has not been implemented yet. --- artiq/firmware/runtime/rtio_clocking.rs | 4 + artiq/gateware/targets/kc705_drtio_master.py | 91 ++++++++++++++------ 2 files changed, 67 insertions(+), 28 deletions(-) diff --git a/artiq/firmware/runtime/rtio_clocking.rs b/artiq/firmware/runtime/rtio_clocking.rs index 3b230bacc..b6cf5f395 100644 --- a/artiq/firmware/runtime/rtio_clocking.rs +++ b/artiq/firmware/runtime/rtio_clocking.rs @@ -153,6 +153,8 @@ fn setup_si5324_as_synthesizer() { let si5324_ref_input = si5324::Input::Ckin2; #[cfg(soc_platform = "metlino")] let si5324_ref_input = si5324::Input::Ckin2; + #[cfg(soc_platform = "kc705")] + let si5324_ref_input = si5324::Input::Ckin2; si5324::setup(&SI5324_SETTINGS, si5324_ref_input).expect("cannot initialize Si5324"); } @@ -165,6 +167,8 @@ pub fn init() { let si5324_ext_input = si5324::Input::Ckin2; #[cfg(soc_platform = "metlino")] let si5324_ext_input = si5324::Input::Ckin2; + #[cfg(soc_platform = "kc705")] + let si5324_ext_input = si5324::Input::Ckin2; match get_rtio_clock_cfg() { RtioClock::Internal => setup_si5324_as_synthesizer(), RtioClock::External => si5324::bypass(si5324_ext_input).expect("cannot bypass Si5324") diff --git a/artiq/gateware/targets/kc705_drtio_master.py b/artiq/gateware/targets/kc705_drtio_master.py index c1bf28fb4..436d0ada2 100755 --- a/artiq/gateware/targets/kc705_drtio_master.py +++ b/artiq/gateware/targets/kc705_drtio_master.py @@ -4,39 +4,50 @@ import argparse from migen import * from migen.build.generic_platform import * +from migen.build.xilinx.vivado import XilinxVivadoToolchain +from migen.build.xilinx.ise import XilinxISEToolchain from misoc.cores import spi as spi_csr +from misoc.cores import gpio from misoc.targets.kc705 import MiniSoC, soc_kc705_args, soc_kc705_argdict from misoc.integration.builder import builder_args, builder_argdict -from artiq.gateware.amp import AMPSoC, build_artiq_soc +from artiq.gateware.amp import AMPSoC from artiq.gateware import rtio from artiq.gateware.rtio.phy import ttl_simple from artiq.gateware.drtio.transceiver import gtx_7series -from artiq.gateware.drtio import DRTIOMaster -from artiq import __version__ as artiq_version +from artiq.gateware.drtio import * +from artiq.build_soc import * class Master(MiniSoC, AMPSoC): mem_map = { - "cri_con": 0x10000000, - "rtio": 0x20000000, - "rtio_dma": 0x30000000, - "drtio_aux": 0x50000000, - "mailbox": 0x70000000 + "cri_con": 0x10000000, + "rtio": 0x20000000, + "rtio_dma": 0x30000000, + "drtioaux": 0x50000000, + "mailbox": 0x70000000 } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, - ident=artiq_version, + integrated_sram_size=8192, ethmac_nrxslots=4, ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) + + if isinstance(self.platform.toolchain, XilinxVivadoToolchain): + self.platform.toolchain.bitstream_commands.extend([ + "set_property BITSTREAM.GENERAL.COMPRESS True [current_design]", + ]) + if isinstance(self.platform.toolchain, XilinxISEToolchain): + self.platform.toolchain.bitgen_opt += " -g compress" platform = self.platform @@ -44,35 +55,55 @@ class Master(MiniSoC, AMPSoC): tx_pads = platform.request("sfp_tx") rx_pads = platform.request("sfp_rx") - # 1000BASE_BX10 Ethernet compatible, 62.5MHz RTIO clock - self.submodules.transceiver = gtx_7series.GTX_1000BASE_BX10( - clock_pads=platform.request("sgmii_clock"), + # 1000BASE_BX10 Ethernet compatible, 125MHz RTIO clock + self.submodules.drtio_transceiver = gtx_7series.GTX_1000BASE_BX10( + clock_pads=platform.request("si5324_clkout"), tx_pads=tx_pads, rx_pads=rx_pads, - sys_clk_freq=self.clk_freq, - clock_div2=True) + sys_clk_freq=self.clk_freq) + self.csr_devices.append("drtio_transceiver") - self.submodules.drtio0 = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"})( - DRTIOMaster(self.transceiver.channels[0])) + self.submodules.rtio_tsc = rtio.TSC("async", glbl_fine_ts_width=3) + cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"}) + + self.submodules.drtio0 = cdr(DRTIOMaster( + self.rtio_tsc, self.drtio_transceiver.channels[0])) self.csr_devices.append("drtio0") - self.add_wb_slave(self.mem_map["drtio_aux"], 0x800, - self.drtio0.aux_controller.bus) - self.add_memory_region("drtio0_aux", self.mem_map["drtio_aux"] | self.shadow_base, 0x800) + + self.submodules.drtioaux0 = cdr(DRTIOAuxController( + self.drtio0.link_layer)) + self.csr_devices.append("drtioaux0") + self.add_wb_slave(self.mem_map["drtioaux"], 0x800, + self.drtioaux0.bus) + self.add_memory_region("drtioaux0_mem", self.mem_map["drtioaux"] | self.shadow_base, 0x800) + self.config["HAS_DRTIO"] = None + self.config["HAS_DRTIO_ROUTING"] = None self.add_csr_group("drtio", ["drtio0"]) - self.add_memory_group("drtio_aux", ["drtio0_aux"]) + self.add_csr_group("drtioaux", ["drtioaux0"]) + self.add_memory_group("drtioaux_mem", ["drtioaux0_mem"]) + + self.config["RTIO_FREQUENCY"] = str(self.drtio_transceiver.rtio_clk_freq/1e6) + self.submodules.si5324_rst_n = gpio.GPIOOut(platform.request("si5324").rst_n) + self.csr_devices.append("si5324_rst_n") + i2c = self.platform.request("i2c") + self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) + self.csr_devices.append("i2c") + self.config["I2C_BUS_COUNT"] = 1 + self.config["HAS_SI5324"] = None + self.config["SI5324_AS_SYNTHESIZER"] = None self.comb += [ platform.request("user_sma_clock_p").eq(ClockSignal("rtio_rx0")), platform.request("user_sma_clock_n").eq(ClockSignal("rtio")) ] - rtio_clk_period = 1e9/self.transceiver.rtio_clk_freq - platform.add_period_constraint(self.transceiver.txoutclk, rtio_clk_period) - platform.add_period_constraint(self.transceiver.rxoutclk, rtio_clk_period) + rtio_clk_period = 1e9/self.drtio_transceiver.rtio_clk_freq + platform.add_period_constraint(self.drtio_transceiver.txoutclk, rtio_clk_period) + platform.add_period_constraint(self.drtio_transceiver.rxoutclk, rtio_clk_period) platform.add_false_path_constraints( self.crg.cd_sys.clk, - self.transceiver.txoutclk, self.transceiver.rxoutclk) + self.drtio_transceiver.txoutclk, self.drtio_transceiver.rxoutclk) rtio_channels = [] for i in range(8): @@ -87,18 +118,21 @@ class Master(MiniSoC, AMPSoC): self.submodules.rtio_moninj = rtio.MonInj(rtio_channels) self.csr_devices.append("rtio_moninj") - self.submodules.rtio_core = rtio.Core(rtio_channels, 3) + self.submodules.rtio_core = rtio.Core(self.rtio_tsc, rtio_channels) self.csr_devices.append("rtio_core") - self.submodules.rtio = rtio.KernelInitiator() + self.submodules.rtio = rtio.KernelInitiator(self.rtio_tsc) self.submodules.rtio_dma = ClockDomainsRenamer("sys_kernel")( rtio.DMA(self.get_native_sdram_if())) self.register_kernel_cpu_csrdevice("rtio") self.register_kernel_cpu_csrdevice("rtio_dma") self.submodules.cri_con = rtio.CRIInterconnectShared( [self.rtio.cri, self.rtio_dma.cri], - [self.rtio_core.cri, self.drtio0.cri]) + [self.rtio_core.cri, self.drtio0.cri], + enable_routing=True) self.register_kernel_cpu_csrdevice("cri_con") + self.submodules.routing_table = rtio.RoutingTableAccess(self.cri_con) + self.csr_devices.append("routing_table") def main(): @@ -106,6 +140,7 @@ def main(): description="ARTIQ device binary builder / KC705 DRTIO master") builder_args(parser) soc_kc705_args(parser) + parser.set_defaults(output_dir="artiq_kc705/master") args = parser.parse_args() soc = Master(**soc_kc705_argdict(args)) From 52afd4ef6b7f5154e4caa0ca0439aa92b8cf0f53 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Wed, 20 Jan 2021 11:24:20 +0800 Subject: [PATCH 4/8] kc705: add GTX multilane support, add multichannel support on master * One DRTIO master channel is enabled by default. * User can set the SMA as the 2nd master channel (by passing --drtio-sma to the argparser). * Multi-channel (i.e. with repeaters) on KC705 satellite is supported but has not been implemented yet. --- .../gateware/drtio/transceiver/gtx_7series.py | 164 ++++++++++++----- .../drtio/transceiver/gtx_7series_init.py | 172 +++++++++++++++--- artiq/gateware/targets/kc705_drtio_master.py | 77 +++++--- .../gateware/targets/kc705_drtio_satellite.py | 21 ++- 4 files changed, 332 insertions(+), 102 deletions(-) diff --git a/artiq/gateware/drtio/transceiver/gtx_7series.py b/artiq/gateware/drtio/transceiver/gtx_7series.py index e2b1378a9..5b8828f41 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series.py @@ -9,22 +9,23 @@ from artiq.gateware.drtio.transceiver.clock_aligner import BruteforceClockAligne from artiq.gateware.drtio.transceiver.gtx_7series_init import * -class GTX_20X(Module, TransceiverInterface): +class GTX_20X(Module): # Settings: - # * GTX reference clock (at clock_pads) @ 125MHz == coarse RTIO frequency + # * GTX reference clock @ 125MHz == coarse RTIO frequency # * GTX data width = 20 # * GTX PLL frequency @ 2.5GHz # * GTX line rate (TX & RX) @ 2.5Gb/s # * GTX TX/RX USRCLK @ 125MHz == coarse RTIO frequency - def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq): - encoder = ClockDomainsRenamer("rtio")( - Encoder(2, True)) - self.submodules += encoder - decoders = [ClockDomainsRenamer("rtio_rx0")( - (Decoder(True))) for _ in range(2)] - self.submodules += decoders + def __init__(self, refclk, tx_pads, rx_pads, sys_clk_freq, rtio_clk_freq=125e6, tx_mode="single", rx_mode="single"): + assert tx_mode in ["single", "master", "slave"] + assert rx_mode in ["single", "master", "slave"] - TransceiverInterface.__init__(self, [ChannelInterface(encoder, decoders)]) + self.txenable = Signal() + self.submodules.encoder = ClockDomainsRenamer("rtio_tx")( + Encoder(2, True)) + self.submodules.decoders = [ClockDomainsRenamer("rtio_rx")( + (Decoder(True))) for _ in range(2)] + self.rx_ready = Signal() # transceiver direct clock outputs # useful to specify clock constraints in a way palatable to Vivado @@ -33,25 +34,13 @@ class GTX_20X(Module, TransceiverInterface): # # # - refclk = Signal() - stable_clkin_n = Signal() - self.stable_clkin.storage.attr.add("no_retiming") - self.comb += stable_clkin_n.eq(~self.stable_clkin.storage) - self.specials += Instance("IBUFDS_GTE2", - i_CEB=stable_clkin_n, - i_I=clock_pads.p, - i_IB=clock_pads.n, - o_O=refclk - ) - cpllreset = Signal() cplllock = Signal() # TX generates RTIO clock, init must be in system domain - tx_init = GTXInit(sys_clk_freq, False) + self.submodules.tx_init = tx_init = GTXInit(sys_clk_freq, False, mode=tx_mode) # RX receives restart commands from RTIO domain - rx_init = ClockDomainsRenamer("rtio")( - GTXInit(self.rtio_clk_freq, True)) - self.submodules += tx_init, rx_init + self.submodules.rx_init = rx_init = ClockDomainsRenamer("rtio_tx")( + GTXInit(rtio_clk_freq, True, mode=rx_mode)) self.comb += [ cpllreset.eq(tx_init.cpllreset), tx_init.cplllock.eq(cplllock), @@ -60,6 +49,7 @@ class GTX_20X(Module, TransceiverInterface): txdata = Signal(20) rxdata = Signal(20) + # Note: the following parameters were set after consulting AR45360 self.specials += \ Instance("GTXE2_CHANNEL", # PMA Attributes @@ -100,15 +90,22 @@ class GTX_20X(Module, TransceiverInterface): i_TXOUTCLKSEL=0b11, # TX Startup/Reset + i_TXPHDLYRESET=0, + i_TXDLYBYPASS=0, + i_TXPHALIGNEN=1 if tx_mode != "single" else 0, i_GTTXRESET=tx_init.gtXxreset, o_TXRESETDONE=tx_init.Xxresetdone, i_TXDLYSRESET=tx_init.Xxdlysreset, o_TXDLYSRESETDONE=tx_init.Xxdlysresetdone, + i_TXPHINIT=tx_init.txphinit if tx_mode != "single" else 0, + o_TXPHINITDONE=tx_init.txphinitdone if tx_mode != "single" else Signal(), + i_TXPHALIGN=tx_init.Xxphalign if tx_mode != "single" else 0, + i_TXDLYEN=tx_init.Xxdlyen if tx_mode != "single" else 0, o_TXPHALIGNDONE=tx_init.Xxphaligndone, i_TXUSERRDY=tx_init.Xxuserrdy, p_TXPMARESET_TIME=1, p_TXPCSRESET_TIME=1, - i_TXINHIBIT=~self.txenable.storage, + i_TXINHIBIT=~self.txenable, # TX data p_TX_DATA_WIDTH=20, @@ -116,18 +113,23 @@ class GTX_20X(Module, TransceiverInterface): i_TXCHARDISPMODE=Cat(txdata[9], txdata[19]), i_TXCHARDISPVAL=Cat(txdata[8], txdata[18]), i_TXDATA=Cat(txdata[:8], txdata[10:18]), - i_TXUSRCLK=ClockSignal("rtio"), - i_TXUSRCLK2=ClockSignal("rtio"), + i_TXUSRCLK=ClockSignal("rtio_tx"), + i_TXUSRCLK2=ClockSignal("rtio_tx"), # TX electrical i_TXBUFDIFFCTRL=0b100, i_TXDIFFCTRL=0b1000, # RX Startup/Reset + i_RXPHDLYRESET=0, + i_RXDLYBYPASS=0, + i_RXPHALIGNEN=1 if rx_mode != "single" else 0, i_GTRXRESET=rx_init.gtXxreset, o_RXRESETDONE=rx_init.Xxresetdone, i_RXDLYSRESET=rx_init.Xxdlysreset, o_RXDLYSRESETDONE=rx_init.Xxdlysresetdone, + i_RXPHALIGN=rx_init.Xxphalign if rx_mode != "single" else 0, + i_RXDLYEN=rx_init.Xxdlyen if rx_mode != "single" else 0, o_RXPHALIGNDONE=rx_init.Xxphaligndone, i_RXUSERRDY=rx_init.Xxuserrdy, p_RXPMARESET_TIME=1, @@ -158,8 +160,8 @@ class GTX_20X(Module, TransceiverInterface): i_RXSYSCLKSEL=0b00, i_RXOUTCLKSEL=0b010, o_RXOUTCLK=self.rxoutclk, - i_RXUSRCLK=ClockSignal("rtio_rx0"), - i_RXUSRCLK2=ClockSignal("rtio_rx0"), + i_RXUSRCLK=ClockSignal("rtio_rx"), + i_RXUSRCLK2=ClockSignal("rtio_rx"), # RX Clock Correction Attributes p_CLK_CORRECT_USE="FALSE", @@ -240,48 +242,110 @@ class GTX_20X(Module, TransceiverInterface): o_GTXTXN=tx_pads.n, # Other parameters - p_PCS_RSVD_ATTR=0x000, # PCS_RSVD_ATTR[1] = 0: TX Single Lane Auto Mode - # [2] = 0: RX Single Lane Auto Mode - # [8] = 0: OOB is disabled + p_PCS_RSVD_ATTR=( + (tx_mode != "single") << 1 | # PCS_RSVD_ATTR[1] = 0: TX Single Lane Auto Mode + # = 1: TX Manual Mode + (rx_mode != "single") << 2 | # [2] = 0: RX Single Lane Auto Mode + # = 1: RX Manual Mode + 0 << 8 # [8] = 0: OOB is disabled + ), i_RXELECIDLEMODE=0b11, # RXELECIDLEMODE = 0b11: OOB is disabled p_RX_DFE_LPM_HOLD_DURING_EIDLE=0b0, p_ES_EYE_SCAN_EN="TRUE", # Must be TRUE for GTX ) + # TX clocking tx_reset_deglitched = Signal() tx_reset_deglitched.attr.add("no_retiming") self.sync += tx_reset_deglitched.eq(~tx_init.done) - self.specials += [ - Instance("BUFG", i_I=self.txoutclk, o_O=self.cd_rtio.clk), - AsyncResetSynchronizer(self.cd_rtio, tx_reset_deglitched) - ] + self.clock_domains.cd_rtio_tx = ClockDomain() + if tx_mode == "single" or tx_mode == "master": + self.specials += Instance("BUFG", i_I=self.txoutclk, o_O=self.cd_rtio_tx.clk) + self.specials += AsyncResetSynchronizer(self.cd_rtio_tx, tx_reset_deglitched) + + # RX clocking rx_reset_deglitched = Signal() rx_reset_deglitched.attr.add("no_retiming") self.sync.rtio += rx_reset_deglitched.eq(~rx_init.done) - self.specials += [ - Instance("BUFG", i_I=self.rxoutclk, o_O=self.cd_rtio_rx0.clk), - AsyncResetSynchronizer(self.cd_rtio_rx0, rx_reset_deglitched) - ] + self.clock_domains.cd_rtio_rx = ClockDomain() + if rx_mode == "single" or rx_mode == "master": + self.specials += Instance("BUFG", i_I=self.rxoutclk, o_O=self.cd_rtio_rx.clk), + self.specials += AsyncResetSynchronizer(self.cd_rtio_rx, rx_reset_deglitched) - chan = self.channels[0] self.comb += [ - txdata.eq(Cat(chan.encoder.output[0], chan.encoder.output[1])), - chan.decoders[0].input.eq(rxdata[:10]), - chan.decoders[1].input.eq(rxdata[10:]) + txdata.eq(Cat(self.encoder.output[0], self.encoder.output[1])), + self.decoders[0].input.eq(rxdata[:10]), + self.decoders[1].input.eq(rxdata[10:]) ] - clock_aligner = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"})( - BruteforceClockAligner(0b0101111100, self.rtio_clk_freq)) + clock_aligner = BruteforceClockAligner(0b0101111100, rtio_clk_freq) self.submodules += clock_aligner self.comb += [ clock_aligner.rxdata.eq(rxdata), rx_init.restart.eq(clock_aligner.restart), - chan.rx_ready.eq(clock_aligner.ready) + self.rx_ready.eq(clock_aligner.ready) ] -class GTX_1000BASE_BX10(GTX_20X): - rtio_clk_freq = 125e6 + +class GTX(Module, TransceiverInterface): + def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq, rtio_clk_freq=125e6, master=0): + assert len(tx_pads) == len(rx_pads) + self.nchannels = nchannels = len(tx_pads) + self.gtxs = [] + self.rtio_clk_freq = rtio_clk_freq + + # # # + + refclk = Signal() + stable_clkin_n = Signal() + self.specials += Instance("IBUFDS_GTE2", + i_CEB=stable_clkin_n, + i_I=clock_pads.p, + i_IB=clock_pads.n, + o_O=refclk + ) + + rtio_tx_clk = Signal() + channel_interfaces = [] + for i in range(nchannels): + if nchannels == 1: + mode = "single" + else: + mode = "master" if i == master else "slave" + # Note: RX phase alignment is to be done on individual lanes, not multi-lane. + gtx = GTX_20X(refclk, tx_pads[i], rx_pads[i], sys_clk_freq, rtio_clk_freq=rtio_clk_freq, tx_mode=mode, rx_mode="single") + # Fan-out (to slave) / Fan-in (from master) of the TXUSRCLK + if mode == "slave": + self.comb += gtx.cd_rtio_tx.clk.eq(rtio_tx_clk) + else: + self.comb += rtio_tx_clk.eq(gtx.cd_rtio_tx.clk) + self.gtxs.append(gtx) + setattr(self.submodules, "gtx"+str(i), gtx) + channel_interface = ChannelInterface(gtx.encoder, gtx.decoders) + self.comb += channel_interface.rx_ready.eq(gtx.rx_ready) + channel_interfaces.append(channel_interface) + + self.submodules.tx_phase_alignment = GTXInitPhaseAlignment([gtx.tx_init for gtx in self.gtxs]) + + TransceiverInterface.__init__(self, channel_interfaces) + for n, gtx in enumerate(self.gtxs): + self.comb += [ + stable_clkin_n.eq(~self.stable_clkin.storage), + gtx.txenable.eq(self.txenable.storage[n]) + ] + + # Connect master's `rtio_tx` clock to `rtio` clock + self.comb += [ + self.cd_rtio.clk.eq(self.gtxs[master].cd_rtio_tx.clk), + self.cd_rtio.rst.eq(reduce(or_, [gtx.cd_rtio_tx.rst for gtx in self.gtxs])) + ] + # Connect slave i's `rtio_rx` clock to `rtio_rxi` clock + for i in range(nchannels): + self.comb += [ + getattr(self, "cd_rtio_rx" + str(i)).clk.eq(self.gtxs[i].cd_rtio_rx.clk), + getattr(self, "cd_rtio_rx" + str(i)).rst.eq(self.gtxs[i].cd_rtio_rx.rst) + ] class RXSynchronizer(Module, AutoCSR): diff --git a/artiq/gateware/drtio/transceiver/gtx_7series_init.py b/artiq/gateware/drtio/transceiver/gtx_7series_init.py index 6598f2892..9e3f65a15 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series_init.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series_init.py @@ -10,7 +10,14 @@ from migen.genlib.fsm import FSM class GTXInit(Module): # Based on LiteSATA by Enjoy-Digital - def __init__(self, sys_clk_freq, rx): + # Choose between Auto Mode and Manual Mode for TX/RX phase alignment with buffer bypassed: + # * Auto Mode: When only single lane is involved, as suggested by Xilinx (AR59612) + # * Manual Mode: When only multi-lane is involved, as suggested by Xilinx (AR59612) + def __init__(self, sys_clk_freq, rx, mode="single"): + assert isinstance(rx, bool) + assert mode in ["single", "master", "slave"] + self.mode = mode + self.done = Signal() self.restart = Signal() @@ -23,6 +30,21 @@ class GTXInit(Module): self.Xxdlysresetdone = Signal() self.Xxphaligndone = Signal() self.Xxuserrdy = Signal() + # GTX signals exclusive to multi-lane + if mode != "single": + self.Xxphalign = Signal() + self.Xxdlyen = Signal() + # TX only: + if not rx: + self.txphinit = Signal() + self.txphinitdone = Signal() + + # Strobe from master channel to initialize TX/RX phase alignment on slaves + self.master_phaligndone = Signal() + # Strobe from slave channels to re-enable TX/RX delay alignment on master; + # To be combinatorially AND-ed from all slave's `done` + if mode == "master": + self.slaves_phaligndone = Signal() # # # @@ -37,6 +59,9 @@ class GTXInit(Module): MultiReg(self.Xxdlysresetdone, Xxdlysresetdone), MultiReg(self.Xxphaligndone, Xxphaligndone), ] + if mode != "single": + txphinitdone = Signal() + self.specials += MultiReg(self.txphinitdone, txphinitdone) # Deglitch FSM outputs driving transceiver asynch inputs gtXxreset = Signal() @@ -47,6 +72,16 @@ class GTXInit(Module): self.Xxdlysreset.eq(Xxdlysreset), self.Xxuserrdy.eq(Xxuserrdy) ] + if mode != "single": + Xxphalign = Signal() + Xxdlyen = Signal() + self.sync += [ + self.Xxphalign.eq(Xxphalign), + self.Xxdlyen.eq(Xxdlyen) + ] + if not rx: + txphinit = Signal() + self.sync += self.txphinit.eq(txphinit) # After configuration, transceiver resets have to stay low for # at least 500ns (see AR43482) @@ -67,6 +102,7 @@ class GTXInit(Module): cdr_stable_timer = WaitTimer(1024) self.submodules += cdr_stable_timer + # Rising edge detection for phase alignment "done" Xxphaligndone_r = Signal(reset=1) Xxphaligndone_rising = Signal() self.sync += Xxphaligndone_r.eq(Xxphaligndone) @@ -93,34 +129,103 @@ class GTXInit(Module): startup_fsm.act("RELEASE_GTH_RESET", Xxuserrdy.eq(1), cdr_stable_timer.wait.eq(1), - If(Xxresetdone & cdr_stable_timer.done, NextState("ALIGN")) + If(Xxresetdone & cdr_stable_timer.done, NextState("DELAY_ALIGN")) ) else: startup_fsm.act("RELEASE_GTH_RESET", Xxuserrdy.eq(1), - If(Xxresetdone, NextState("ALIGN")) + If(Xxresetdone, NextState("DELAY_ALIGN")) ) - # Start delay alignment (pulse) - startup_fsm.act("ALIGN", - Xxuserrdy.eq(1), - Xxdlysreset.eq(1), - NextState("WAIT_ALIGN") - ) - # Wait for delay alignment - startup_fsm.act("WAIT_ALIGN", - Xxuserrdy.eq(1), - If(Xxdlysresetdone, NextState("WAIT_FIRST_ALIGN_DONE")) - ) - # Wait 2 rising edges of rxphaligndone - # (from UG476 in buffer bypass config) - startup_fsm.act("WAIT_FIRST_ALIGN_DONE", - Xxuserrdy.eq(1), - If(Xxphaligndone_rising, NextState("WAIT_SECOND_ALIGN_DONE")) - ) - startup_fsm.act("WAIT_SECOND_ALIGN_DONE", - Xxuserrdy.eq(1), - If(Xxphaligndone_rising, NextState("READY")) - ) + + # State(s) exclusive to Auto Mode: + if mode == "single": + # Start delay alignment (pulse) + startup_fsm.act("DELAY_ALIGN", + Xxuserrdy.eq(1), + Xxdlysreset.eq(1), + NextState("WAIT_DELAY_ALIGN") + ) + # Wait for delay alignment + startup_fsm.act("WAIT_DELAY_ALIGN", + Xxuserrdy.eq(1), + If(Xxdlysresetdone, NextState("WAIT_FIRST_PHASE_ALIGN_DONE")) + ) + # Wait 2 rising edges of rxphaligndone + # (from UG476 in buffer bypass config) + startup_fsm.act("WAIT_FIRST_PHASE_ALIGN_DONE", + Xxuserrdy.eq(1), + If(Xxphaligndone_rising, NextState("WAIT_SECOND_PHASE_ALIGN_DONE")) + ) + startup_fsm.act("WAIT_SECOND_PHASE_ALIGN_DONE", + Xxuserrdy.eq(1), + If(Xxphaligndone_rising, NextState("READY")) + ) + + # State(s) exclusive to Manual Mode: + else: + # Start delay alignment (hold) + startup_fsm.act("DELAY_ALIGN", + Xxuserrdy.eq(1), + Xxdlysreset.eq(1), + If(Xxdlysresetdone, + # TX master: proceed to initialize phase alignment manually + (NextState("PHASE_ALIGN_INIT") if not rx else + # RX master: proceed to start phase alignment manually + NextState("PHASE_ALIGN")) if mode == "master" else + # TX/RX slave: wait for phase alignment "done" on master + NextState("WAIT_MASTER") + ) + ) + if mode == "slave": + # TX slave: Wait for phase alignment "done" on master + startup_fsm.act("WAIT_MASTER", + Xxuserrdy.eq(1), + If(self.master_phaligndone, + # TX slave: proceed to initialize phase alignment manually + NextState("PHASE_ALIGN_INIT") if not rx else + # RX slave: proceed to start phase alignment manually + NextState("PHASE_ALIGN") + ) + ) + if not rx: + # TX master/slave: Initialize phase alignment, wait rising edge on "done" + startup_fsm.act("PHASE_ALIGN_INIT", + Xxuserrdy.eq(1), + txphinit.eq(1), + If(txphinitdone, NextState("PHASE_ALIGN")) + ) + # Do phase ealignment, wait rising edge on "done" + startup_fsm.act("PHASE_ALIGN", + Xxuserrdy.eq(1), + Xxphalign.eq(1), + If(Xxphaligndone_rising, + # TX/RX master: proceed to set T/RXDLYEN + NextState("FIRST_DLYEN") if mode == "master" else + # TX/RX slave: proceed to signal master + NextState("READY") + ) + ) + if mode == "master": + # Enable delay alignment in manual mode, wait rising edge on phase alignment "done" + startup_fsm.act("FIRST_DLYEN", + Xxuserrdy.eq(1), + Xxdlyen.eq(1), + If(Xxphaligndone_rising, NextState("WAIT_SLAVES")) + ) + # Wait for phase alignment "done" on all slaves + startup_fsm.act("WAIT_SLAVES", + Xxuserrdy.eq(1), + self.master_phaligndone.eq(1), + If(self.slaves_phaligndone, NextState("SECOND_DLYEN")) + ) + # Re-enable delay alignment in manual mode, wait rising edge on phase alignment "done" + startup_fsm.act("SECOND_DLYEN", + Xxuserrdy.eq(1), + Xxdlyen.eq(1), + If(Xxphaligndone_rising, NextState("READY")) + ) + + # Transceiver is ready, alignment can be restarted startup_fsm.act("READY", Xxuserrdy.eq(1), self.done.eq(1), @@ -128,6 +233,25 @@ class GTXInit(Module): ) +class GTXInitPhaseAlignment(Module): + # Interconnect of phase alignment "done" signals for Manual Mode multi-lane + def __init__(self, gtx_inits): + master_phaligndone = Signal() # Fan-out to `slave.master_phaligndone`s + slaves_phaligndone = Signal(reset=1) # ANDed from `slave.done`s + # Slave channels + for gtx_init in gtx_inits: + if gtx_init.mode == "slave": + self.comb += gtx_init.master_phaligndone.eq(master_phaligndone) + slaves_phaligndone = slaves_phaligndone & gtx_init.done + # Master channels + for gtx_init in gtx_inits: + if gtx_init.mode == "master": + self.comb += [ + master_phaligndone.eq(gtx_init.master_phaligndone), + gtx_init.slaves_phaligndone.eq(slaves_phaligndone) + ] + + # Changes the phase of the transceiver RX clock to align the comma to # the LSBs of RXDATA, fixing the latency. # diff --git a/artiq/gateware/targets/kc705_drtio_master.py b/artiq/gateware/targets/kc705_drtio_master.py index 436d0ada2..7d322e795 100755 --- a/artiq/gateware/targets/kc705_drtio_master.py +++ b/artiq/gateware/targets/kc705_drtio_master.py @@ -30,7 +30,7 @@ class Master(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, gateware_identifier_str=None, **kwargs): + def __init__(self, gateware_identifier_str=None, drtio_sma=False, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -52,11 +52,14 @@ class Master(MiniSoC, AMPSoC): platform = self.platform self.comb += platform.request("sfp_tx_disable_n").eq(1) - tx_pads = platform.request("sfp_tx") - rx_pads = platform.request("sfp_rx") + tx_pads = [platform.request("sfp_tx")] + rx_pads = [platform.request("sfp_rx")] + if drtio_sma: + tx_pads.append(platform.request("user_sma_mgt_tx")) + rx_pads.append(platform.request("user_sma_mgt_rx")) # 1000BASE_BX10 Ethernet compatible, 125MHz RTIO clock - self.submodules.drtio_transceiver = gtx_7series.GTX_1000BASE_BX10( + self.submodules.drtio_transceiver = gtx_7series.GTX( clock_pads=platform.request("si5324_clkout"), tx_pads=tx_pads, rx_pads=rx_pads, @@ -64,24 +67,40 @@ class Master(MiniSoC, AMPSoC): self.csr_devices.append("drtio_transceiver") self.submodules.rtio_tsc = rtio.TSC("async", glbl_fine_ts_width=3) - cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"}) - self.submodules.drtio0 = cdr(DRTIOMaster( - self.rtio_tsc, self.drtio_transceiver.channels[0])) - self.csr_devices.append("drtio0") + drtio_csr_group = [] + drtioaux_csr_group = [] + drtioaux_memory_group = [] + drtio_cri = [] + for i in range(len(self.drtio_transceiver.channels)): + core_name = "drtio" + str(i) + coreaux_name = "drtioaux" + str(i) + memory_name = "drtioaux" + str(i) + "_mem" + drtio_csr_group.append(core_name) + drtioaux_csr_group.append(coreaux_name) + drtioaux_memory_group.append(memory_name) - self.submodules.drtioaux0 = cdr(DRTIOAuxController( - self.drtio0.link_layer)) - self.csr_devices.append("drtioaux0") - self.add_wb_slave(self.mem_map["drtioaux"], 0x800, - self.drtioaux0.bus) - self.add_memory_region("drtioaux0_mem", self.mem_map["drtioaux"] | self.shadow_base, 0x800) + cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx" + str(i)}) + core = cdr(DRTIOMaster( + self.rtio_tsc, self.drtio_transceiver.channels[i])) + setattr(self.submodules, core_name, core) + drtio_cri.append(core.cri) + self.csr_devices.append(core_name) + + coreaux = cdr(DRTIOAuxController(core.link_layer)) + setattr(self.submodules, coreaux_name, coreaux) + self.csr_devices.append(coreaux_name) + + memory_address = self.mem_map["drtioaux"] + 0x800*i + self.add_wb_slave(memory_address, 0x800, + coreaux.bus) + self.add_memory_region(memory_name, memory_address | self.shadow_base, 0x800) self.config["HAS_DRTIO"] = None self.config["HAS_DRTIO_ROUTING"] = None - self.add_csr_group("drtio", ["drtio0"]) - self.add_csr_group("drtioaux", ["drtioaux0"]) - self.add_memory_group("drtioaux_mem", ["drtioaux0_mem"]) + self.add_csr_group("drtio", drtio_csr_group) + self.add_csr_group("drtioaux", drtioaux_csr_group) + self.add_memory_group("drtioaux_mem", drtioaux_memory_group) self.config["RTIO_FREQUENCY"] = str(self.drtio_transceiver.rtio_clk_freq/1e6) self.submodules.si5324_rst_n = gpio.GPIOOut(platform.request("si5324").rst_n) @@ -99,11 +118,20 @@ class Master(MiniSoC, AMPSoC): ] rtio_clk_period = 1e9/self.drtio_transceiver.rtio_clk_freq - platform.add_period_constraint(self.drtio_transceiver.txoutclk, rtio_clk_period) - platform.add_period_constraint(self.drtio_transceiver.rxoutclk, rtio_clk_period) + # Constrain TX & RX timing for the first transceiver channel + # (First channel acts as master for phase alignment for all channels' TX) + gtx0 = self.drtio_transceiver.gtxs[0] + platform.add_period_constraint(gtx0.txoutclk, rtio_clk_period) + platform.add_period_constraint(gtx0.rxoutclk, rtio_clk_period) platform.add_false_path_constraints( self.crg.cd_sys.clk, - self.drtio_transceiver.txoutclk, self.drtio_transceiver.rxoutclk) + gtx0.txoutclk, gtx0.rxoutclk) + # Constrain RX timing for the each transceiver channel + # (Each channel performs single-lane phase alignment for RX) + for gtx in self.drtio_transceiver.gtxs[1:]: + platform.add_period_constraint(gtx.rxoutclk, rtio_clk_period) + platform.add_false_path_constraints( + self.crg.cd_sys.clk, gtx0.txoutclk, gtx.rxoutclk) rtio_channels = [] for i in range(8): @@ -128,7 +156,7 @@ class Master(MiniSoC, AMPSoC): self.register_kernel_cpu_csrdevice("rtio_dma") self.submodules.cri_con = rtio.CRIInterconnectShared( [self.rtio.cri, self.rtio_dma.cri], - [self.rtio_core.cri, self.drtio0.cri], + [self.rtio_core.cri] + drtio_cri, enable_routing=True) self.register_kernel_cpu_csrdevice("cri_con") self.submodules.routing_table = rtio.RoutingTableAccess(self.cri_con) @@ -141,9 +169,14 @@ def main(): builder_args(parser) soc_kc705_args(parser) parser.set_defaults(output_dir="artiq_kc705/master") + parser.add_argument("--drtio-sma", default=False, action="store_true", + help="use the SMA connectors (RX: J17, J18, TX: J19, J20) as 2nd DRTIO channel") args = parser.parse_args() - soc = Master(**soc_kc705_argdict(args)) + argdict = dict() + argdict["drtio_sma"] = args.drtio_sma + + soc = Master(**soc_kc705_argdict(args), **argdict) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/kc705_drtio_satellite.py b/artiq/gateware/targets/kc705_drtio_satellite.py index 048b1153e..4cb502fca 100755 --- a/artiq/gateware/targets/kc705_drtio_satellite.py +++ b/artiq/gateware/targets/kc705_drtio_satellite.py @@ -50,11 +50,11 @@ class Satellite(BaseSoC): platform = self.platform self.comb += platform.request("sfp_tx_disable_n").eq(1) - tx_pads = platform.request("sfp_tx") - rx_pads = platform.request("sfp_rx") + tx_pads = [platform.request("sfp_tx")] + rx_pads = [platform.request("sfp_rx")] # 1000BASE_BX10 Ethernet compatible, 125MHz RTIO clock - self.submodules.drtio_transceiver = gtx_7series.GTX_1000BASE_BX10( + self.submodules.drtio_transceiver = gtx_7series.GTX( clock_pads=platform.request("si5324_clkout"), tx_pads=tx_pads, rx_pads=rx_pads, @@ -105,11 +105,20 @@ class Satellite(BaseSoC): ] rtio_clk_period = 1e9/self.drtio_transceiver.rtio_clk_freq - platform.add_period_constraint(self.drtio_transceiver.txoutclk, rtio_clk_period) - platform.add_period_constraint(self.drtio_transceiver.rxoutclk, rtio_clk_period) + # Constrain TX & RX timing for the first transceiver channel + # (First channel acts as master for phase alignment for all channels' TX) + gtx0 = self.drtio_transceiver.gtxs[0] + platform.add_period_constraint(gtx0.txoutclk, rtio_clk_period) + platform.add_period_constraint(gtx0.rxoutclk, rtio_clk_period) platform.add_false_path_constraints( self.crg.cd_sys.clk, - self.drtio_transceiver.txoutclk, self.drtio_transceiver.rxoutclk) + gtx0.txoutclk, gtx0.rxoutclk) + # Constrain RX timing for the each transceiver channel + # (Each channel performs single-lane phase alignment for RX) + for gtx in self.drtio_transceiver.gtxs[1:]: + platform.add_period_constraint(gtx.rxoutclk, rtio_clk_period) + platform.add_false_path_constraints( + self.crg.cd_sys.clk, gtx.rxoutclk) rtio_channels = [] for i in range(8): From 9daf77bd58b77069b30a8d19b3bfff2c16d4c7f2 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Wed, 20 Jan 2021 14:51:10 +0800 Subject: [PATCH 5/8] kc705: add multichannel support on satellite * Two DRTIO channels (i.e. satellite and repeater) are enabled by default. * User can choose either the SFP or SMA as the satellite channel (by passing `--drtio-sat sfp` or --drtio-sat sma` to the argparser), and the unchosen would become the repeater channel. --- .../gateware/targets/kc705_drtio_satellite.py | 84 ++++++++++++++----- 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/artiq/gateware/targets/kc705_drtio_satellite.py b/artiq/gateware/targets/kc705_drtio_satellite.py index 4cb502fca..073d81343 100755 --- a/artiq/gateware/targets/kc705_drtio_satellite.py +++ b/artiq/gateware/targets/kc705_drtio_satellite.py @@ -31,13 +31,15 @@ class Satellite(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, gateware_identifier_str=None, **kwargs): + def __init__(self, gateware_identifier_str=None, drtio_sat="sfp", **kwargs): BaseSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, integrated_sram_size=8192, **kwargs) + assert drtio_sat in ["sfp", "sma"] + add_identifier(self, gateware_identifier_str=gateware_identifier_str) if isinstance(self.platform.toolchain, XilinxVivadoToolchain): @@ -50,8 +52,15 @@ class Satellite(BaseSoC): platform = self.platform self.comb += platform.request("sfp_tx_disable_n").eq(1) - tx_pads = [platform.request("sfp_tx")] - rx_pads = [platform.request("sfp_rx")] + tx_pads = [ + platform.request("sfp_tx"), platform.request("user_sma_mgt_tx") + ] + rx_pads = [ + platform.request("sfp_rx"), platform.request("user_sma_mgt_rx") + ] + if drtio_sat == "sma": + tx_pads = tx_pads[::-1] + rx_pads = rx_pads[::-1] # 1000BASE_BX10 Ethernet compatible, 125MHz RTIO clock self.submodules.drtio_transceiver = gtx_7series.GTX( @@ -62,24 +71,49 @@ class Satellite(BaseSoC): self.csr_devices.append("drtio_transceiver") self.submodules.rtio_tsc = rtio.TSC("sync", glbl_fine_ts_width=3) - cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx0"}) - self.submodules.rx_synchronizer = cdr(XilinxRXSynchronizer()) - self.submodules.drtiosat = cdr(DRTIOSatellite( - self.rtio_tsc, self.drtio_transceiver.channels[0], self.rx_synchronizer)) - self.csr_devices.append("drtiosat") + drtioaux_csr_group = [] + drtioaux_memory_group = [] + drtiorep_csr_group = [] + self.drtio_cri = [] + for i in range(len(self.drtio_transceiver.channels)): + coreaux_name = "drtioaux" + str(i) + memory_name = "drtioaux" + str(i) + "_mem" + drtioaux_csr_group.append(coreaux_name) + drtioaux_memory_group.append(memory_name) - self.submodules.drtioaux0 = cdr(DRTIOAuxController( - self.drtiosat.link_layer)) - self.csr_devices.append("drtioaux0") - self.add_wb_slave(self.mem_map["drtioaux"], 0x800, - self.drtioaux0.bus) - self.add_memory_region("drtioaux0_mem", self.mem_map["drtioaux"] | self.shadow_base, 0x800) + cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx" + str(i)}) + # Satellite + if i == 0: + self.submodules.rx_synchronizer = cdr(XilinxRXSynchronizer()) + core = cdr(DRTIOSatellite( + self.rtio_tsc, self.drtio_transceiver.channels[0], self.rx_synchronizer)) + self.submodules.drtiosat = core + self.csr_devices.append("drtiosat") + # Repeaters + else: + corerep_name = "drtiorep" + str(i-1) + drtiorep_csr_group.append(corerep_name) + core = cdr(DRTIORepeater( + self.rtio_tsc, self.drtio_transceiver.channels[i])) + setattr(self.submodules, corerep_name, core) + self.drtio_cri.append(core.cri) + self.csr_devices.append(corerep_name) + + coreaux = cdr(DRTIOAuxController(core.link_layer)) + setattr(self.submodules, coreaux_name, coreaux) + self.csr_devices.append(coreaux_name) + + memory_address = self.mem_map["drtioaux"] + 0x800*i + self.add_wb_slave(memory_address, 0x800, + coreaux.bus) + self.add_memory_region(memory_name, memory_address | self.shadow_base, 0x800) self.config["HAS_DRTIO"] = None - self.add_csr_group("drtio", ["drtiosat"]) - self.add_csr_group("drtioaux", ["drtioaux0"]) - self.add_memory_group("drtioaux_mem", ["drtioaux0_mem"]) + self.config["HAS_DRTIO_ROUTING"] = None + self.add_csr_group("drtioaux", drtioaux_csr_group) + self.add_memory_group("drtioaux_mem", drtioaux_memory_group) + self.add_csr_group("drtiorep", drtiorep_csr_group) self.config["RTIO_FREQUENCY"] = str(self.drtio_transceiver.rtio_clk_freq/1e6) # Si5324 Phaser @@ -135,7 +169,13 @@ class Satellite(BaseSoC): self.submodules.local_io = SyncRTIO(self.rtio_tsc, rtio_channels) self.comb += self.drtiosat.async_errors.eq(self.local_io.async_errors) - self.comb += self.drtiosat.cri.connect(self.local_io.cri) + self.submodules.cri_con = rtio.CRIInterconnectShared( + [self.drtiosat.cri], + [self.local_io.cri] + self.drtio_cri, + mode="sync", enable_routing=True) + self.csr_devices.append("cri_con") + self.submodules.routing_table = rtio.RoutingTableAccess(self.cri_con) + self.csr_devices.append("routing_table") def main(): @@ -144,9 +184,15 @@ def main(): builder_args(parser) soc_kc705_args(parser) parser.set_defaults(output_dir="artiq_kc705/satellite") + parser.add_argument("--drtio-sat", default="sfp", + help="use the SFP or the SMA connectors (RX: J17, J18, TX: J19, J20) " + "as DRTIO satellite channel (choices: sfp, sma; default: sfp)") args = parser.parse_args() - soc = Satellite(**soc_kc705_argdict(args)) + argdict = dict() + argdict["drtio_sat"] = args.drtio_sat + + soc = Satellite(**soc_kc705_argdict(args), **argdict) build_artiq_soc(soc, builder_argdict(args)) From 88b14082b623a083b34b342a3b3dbc90425e785a Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Wed, 20 Jan 2021 14:40:09 +0800 Subject: [PATCH 6/8] drtio/transceiver/gtx: delete obsolete modules --- .../gateware/drtio/transceiver/gtx_7series.py | 63 ----------- .../drtio/transceiver/gtx_7series_init.py | 107 ------------------ 2 files changed, 170 deletions(-) diff --git a/artiq/gateware/drtio/transceiver/gtx_7series.py b/artiq/gateware/drtio/transceiver/gtx_7series.py index 5b8828f41..a977d2b4c 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series.py @@ -346,66 +346,3 @@ class GTX(Module, TransceiverInterface): getattr(self, "cd_rtio_rx" + str(i)).clk.eq(self.gtxs[i].cd_rtio_rx.clk), getattr(self, "cd_rtio_rx" + str(i)).rst.eq(self.gtxs[i].cd_rtio_rx.rst) ] - - -class RXSynchronizer(Module, AutoCSR): - """Delays the data received in the rtio_rx domain by a configurable amount - so that it meets s/h in the rtio domain, and recapture it in the rtio - domain. This has fixed latency. - - Since Xilinx doesn't provide decent on-chip delay lines, we implement the - delay with MMCM that provides a clock and a finely configurable phase, used - to resample the data. - - The phase has to be determined either empirically or by making sense of the - Xilinx scriptures (when existent) and should be constant for a given design - placement. - """ - def __init__(self, rtio_clk_freq, initial_phase=0.0): - self.phase_shift = CSR() - self.phase_shift_done = CSRStatus() - - self.clock_domains.cd_rtio_delayed = ClockDomain() - - mmcm_output = Signal() - mmcm_fb = Signal() - mmcm_locked = Signal() - # maximize VCO frequency to maximize phase shift resolution - mmcm_mult = 1200e6//rtio_clk_freq - self.specials += [ - Instance("MMCME2_ADV", - p_CLKIN1_PERIOD=1e9/rtio_clk_freq, - i_CLKIN1=ClockSignal("rtio_rx"), - i_RST=ResetSignal("rtio_rx"), - i_CLKINSEL=1, # yes, 1=CLKIN1 0=CLKIN2 - - p_CLKFBOUT_MULT_F=mmcm_mult, - p_CLKOUT0_DIVIDE_F=mmcm_mult, - p_CLKOUT0_PHASE=initial_phase, - p_DIVCLK_DIVIDE=1, - - # According to Xilinx, there is no guarantee of input/output - # phase relationship when using internal feedback. We assume - # here that the input/ouput skew is constant to save BUFGs. - o_CLKFBOUT=mmcm_fb, - i_CLKFBIN=mmcm_fb, - - p_CLKOUT0_USE_FINE_PS="TRUE", - o_CLKOUT0=mmcm_output, - o_LOCKED=mmcm_locked, - - i_PSCLK=ClockSignal(), - i_PSEN=self.phase_shift.re, - i_PSINCDEC=self.phase_shift.r, - o_PSDONE=self.phase_shift_done.status, - ), - Instance("BUFR", i_I=mmcm_output, o_O=self.cd_rtio_delayed.clk), - AsyncResetSynchronizer(self.cd_rtio_delayed, ~mmcm_locked) - ] - - def resync(self, signal): - delayed = Signal.like(signal, related=signal) - synchronized = Signal.like(signal, related=signal) - self.sync.rtio_delayed += delayed.eq(signal) - self.sync.rtio += synchronized.eq(delayed) - return synchronized diff --git a/artiq/gateware/drtio/transceiver/gtx_7series_init.py b/artiq/gateware/drtio/transceiver/gtx_7series_init.py index 9e3f65a15..0536cf47a 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series_init.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series_init.py @@ -250,110 +250,3 @@ class GTXInitPhaseAlignment(Module): master_phaligndone.eq(gtx_init.master_phaligndone), gtx_init.slaves_phaligndone.eq(slaves_phaligndone) ] - - -# Changes the phase of the transceiver RX clock to align the comma to -# the LSBs of RXDATA, fixing the latency. -# -# This is implemented by repeatedly resetting the transceiver until it -# gives out the correct phase. Each reset gives a random phase. -# -# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA -# would achieve this faster and in a cleaner way. But: -# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead -# of 1 UI at every pulse. It is unclear what the latency becomes. -# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. -# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless -# transceiver "feature". -# -# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped -# compared to the usual 8b10b binary representation. -class BruteforceClockAligner(Module): - def __init__(self, comma, rtio_clk_freq, check_period=6e-3): - self.rxdata = Signal(20) - self.restart = Signal() - - self.ready = Signal() - - check_max_val = ceil(check_period*rtio_clk_freq) - check_counter = Signal(max=check_max_val+1) - check = Signal() - reset_check_counter = Signal() - self.sync.rtio += [ - check.eq(0), - If(reset_check_counter, - check_counter.eq(check_max_val) - ).Else( - If(check_counter == 0, - check.eq(1), - check_counter.eq(check_max_val) - ).Else( - check_counter.eq(check_counter-1) - ) - ) - ] - - checks_reset = PulseSynchronizer("rtio", "rtio_rx") - self.submodules += checks_reset - - comma_n = ~comma & 0b1111111111 - comma_seen_rxclk = Signal() - comma_seen = Signal() - comma_seen_rxclk.attr.add("no_retiming") - self.specials += MultiReg(comma_seen_rxclk, comma_seen) - self.sync.rtio_rx += \ - If(checks_reset.o, - comma_seen_rxclk.eq(0) - ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), - comma_seen_rxclk.eq(1) - ) - - error_seen_rxclk = Signal() - error_seen = Signal() - error_seen_rxclk.attr.add("no_retiming") - self.specials += MultiReg(error_seen_rxclk, error_seen) - rx1cnt = Signal(max=11) - self.sync.rtio_rx += [ - rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])), - If(checks_reset.o, - error_seen_rxclk.eq(0) - ).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6), - error_seen_rxclk.eq(1) - ) - ] - - fsm = ClockDomainsRenamer("rtio")(FSM(reset_state="WAIT_COMMA")) - self.submodules += fsm - - fsm.act("WAIT_COMMA", - If(check, - # Errors are still OK at this stage, as the transceiver - # has just been reset and may output garbage data. - If(comma_seen, - NextState("WAIT_NOERROR") - ).Else( - self.restart.eq(1) - ), - checks_reset.i.eq(1) - ) - ) - fsm.act("WAIT_NOERROR", - If(check, - If(comma_seen & ~error_seen, - NextState("READY") - ).Else( - self.restart.eq(1), - NextState("WAIT_COMMA") - ), - checks_reset.i.eq(1) - ) - ) - fsm.act("READY", - reset_check_counter.eq(1), - self.ready.eq(1), - If(error_seen, - checks_reset.i.eq(1), - self.restart.eq(1), - NextState("WAIT_COMMA") - ) - ) From 7c4eed7a114f3a47b92d89f036c7b6f1f087148b Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Wed, 20 Jan 2021 14:57:22 +0800 Subject: [PATCH 7/8] kc705: simplify DRTIO master & satellite * KC705 master: user can no longer choose whether or not the SMA acts as the 2nd DRTIO channel; SFP and SMA now act as the 1st and 2nd channel respectively by default. * KC705 satellite: user should now use `--sma` to enable using the SMA as the satellite channel; SFP acts as the satellite channel by default. --- artiq/gateware/targets/kc705_drtio_master.py | 20 ++++++++----------- .../gateware/targets/kc705_drtio_satellite.py | 14 ++++++------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/artiq/gateware/targets/kc705_drtio_master.py b/artiq/gateware/targets/kc705_drtio_master.py index 7d322e795..0cf74562a 100755 --- a/artiq/gateware/targets/kc705_drtio_master.py +++ b/artiq/gateware/targets/kc705_drtio_master.py @@ -30,7 +30,7 @@ class Master(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, gateware_identifier_str=None, drtio_sma=False, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -52,11 +52,12 @@ class Master(MiniSoC, AMPSoC): platform = self.platform self.comb += platform.request("sfp_tx_disable_n").eq(1) - tx_pads = [platform.request("sfp_tx")] - rx_pads = [platform.request("sfp_rx")] - if drtio_sma: - tx_pads.append(platform.request("user_sma_mgt_tx")) - rx_pads.append(platform.request("user_sma_mgt_rx")) + tx_pads = [ + platform.request("sfp_tx"), platform.request("user_sma_mgt_tx") + ] + rx_pads = [ + platform.request("sfp_rx"), platform.request("user_sma_mgt_rx") + ] # 1000BASE_BX10 Ethernet compatible, 125MHz RTIO clock self.submodules.drtio_transceiver = gtx_7series.GTX( @@ -169,14 +170,9 @@ def main(): builder_args(parser) soc_kc705_args(parser) parser.set_defaults(output_dir="artiq_kc705/master") - parser.add_argument("--drtio-sma", default=False, action="store_true", - help="use the SMA connectors (RX: J17, J18, TX: J19, J20) as 2nd DRTIO channel") args = parser.parse_args() - argdict = dict() - argdict["drtio_sma"] = args.drtio_sma - - soc = Master(**soc_kc705_argdict(args), **argdict) + soc = Master(**soc_kc705_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/kc705_drtio_satellite.py b/artiq/gateware/targets/kc705_drtio_satellite.py index 073d81343..4f7fbb361 100755 --- a/artiq/gateware/targets/kc705_drtio_satellite.py +++ b/artiq/gateware/targets/kc705_drtio_satellite.py @@ -31,15 +31,13 @@ class Satellite(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, gateware_identifier_str=None, drtio_sat="sfp", **kwargs): + def __init__(self, gateware_identifier_str=None, sma_as_sat=False, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, integrated_sram_size=8192, **kwargs) - assert drtio_sat in ["sfp", "sma"] - add_identifier(self, gateware_identifier_str=gateware_identifier_str) if isinstance(self.platform.toolchain, XilinxVivadoToolchain): @@ -58,7 +56,7 @@ class Satellite(BaseSoC): rx_pads = [ platform.request("sfp_rx"), platform.request("user_sma_mgt_rx") ] - if drtio_sat == "sma": + if sma_as_sat: tx_pads = tx_pads[::-1] rx_pads = rx_pads[::-1] @@ -184,13 +182,13 @@ def main(): builder_args(parser) soc_kc705_args(parser) parser.set_defaults(output_dir="artiq_kc705/satellite") - parser.add_argument("--drtio-sat", default="sfp", - help="use the SFP or the SMA connectors (RX: J17, J18, TX: J19, J20) " - "as DRTIO satellite channel (choices: sfp, sma; default: sfp)") + parser.add_argument("--sma", default=False, action="store_true", + help="use the SMA connectors (RX: J17, J18, TX: J19, J20) " + "as DRTIO satellite channel instead of the SFP") args = parser.parse_args() argdict = dict() - argdict["drtio_sat"] = args.drtio_sat + argdict["sma_as_sat"] = args.sma soc = Satellite(**soc_kc705_argdict(args), **argdict) build_artiq_soc(soc, builder_argdict(args)) From a0fd5261eaaaa132fb176e36f361f8c4555bb94a Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Thu, 21 Jan 2021 11:30:11 +0800 Subject: [PATCH 8/8] kc705: cleanup --- artiq/gateware/drtio/transceiver/gtx_7series.py | 7 ------- artiq/gateware/drtio/transceiver/gtx_7series_init.py | 8 +++----- artiq/gateware/targets/kc705_drtio_master.py | 2 +- artiq/gateware/targets/kc705_drtio_satellite.py | 4 ---- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/artiq/gateware/drtio/transceiver/gtx_7series.py b/artiq/gateware/drtio/transceiver/gtx_7series.py index a977d2b4c..a2da39822 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series.py @@ -228,13 +228,6 @@ class GTX_20X(Module): p_RXCDR_PH_RESET_ON_EIDLE=0b0, p_RXCDR_LOCK_CFG=0b010101, - # # RX Initialization and Reset Attributes - # p_RXCDRFREQRESET_TIME=0b00001, - # p_RXCDRPHRESET_TIME=0b00001, - # p_RXISCANRESET_TIME=0b00001, - # p_RXPCSRESET_TIME=0b00001, - # p_RXPMARESET_TIME=0b00011, - # Pads i_GTXRXP=rx_pads.p, i_GTXRXN=rx_pads.n, diff --git a/artiq/gateware/drtio/transceiver/gtx_7series_init.py b/artiq/gateware/drtio/transceiver/gtx_7series_init.py index 0536cf47a..70c69a19c 100644 --- a/artiq/gateware/drtio/transceiver/gtx_7series_init.py +++ b/artiq/gateware/drtio/transceiver/gtx_7series_init.py @@ -1,9 +1,7 @@ from math import ceil -from functools import reduce -from operator import add from migen import * -from migen.genlib.cdc import MultiReg, PulseSynchronizer +from migen.genlib.cdc import MultiReg from migen.genlib.misc import WaitTimer from migen.genlib.fsm import FSM @@ -137,7 +135,7 @@ class GTXInit(Module): If(Xxresetdone, NextState("DELAY_ALIGN")) ) - # State(s) exclusive to Auto Mode: + # States exclusive to Auto Mode: if mode == "single": # Start delay alignment (pulse) startup_fsm.act("DELAY_ALIGN", @@ -161,7 +159,7 @@ class GTXInit(Module): If(Xxphaligndone_rising, NextState("READY")) ) - # State(s) exclusive to Manual Mode: + # States exclusive to Manual Mode: else: # Start delay alignment (hold) startup_fsm.act("DELAY_ALIGN", diff --git a/artiq/gateware/targets/kc705_drtio_master.py b/artiq/gateware/targets/kc705_drtio_master.py index 0cf74562a..78d5d054b 100755 --- a/artiq/gateware/targets/kc705_drtio_master.py +++ b/artiq/gateware/targets/kc705_drtio_master.py @@ -9,8 +9,8 @@ from migen.build.xilinx.ise import XilinxISEToolchain from misoc.cores import spi as spi_csr from misoc.cores import gpio +from misoc.integration.builder import * from misoc.targets.kc705 import MiniSoC, soc_kc705_args, soc_kc705_argdict -from misoc.integration.builder import builder_args, builder_argdict from artiq.gateware.amp import AMPSoC from artiq.gateware import rtio diff --git a/artiq/gateware/targets/kc705_drtio_satellite.py b/artiq/gateware/targets/kc705_drtio_satellite.py index 4f7fbb361..4ff346fdf 100755 --- a/artiq/gateware/targets/kc705_drtio_satellite.py +++ b/artiq/gateware/targets/kc705_drtio_satellite.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import argparse -import os from migen import * from migen.build.generic_platform import * @@ -21,9 +20,6 @@ from artiq.gateware.drtio.rx_synchronizer import XilinxRXSynchronizer from artiq.gateware.drtio import * from artiq.build_soc import * -# DEBUG -from microscope import * - class Satellite(BaseSoC): mem_map = {