From 8fd13061451191745e518dbc6679d8a7f15b0800 Mon Sep 17 00:00:00 2001 From: linuswck Date: Thu, 5 Oct 2023 11:28:45 +0800 Subject: [PATCH 1/3] zynq_clocking: Add sys5x, 208MHz CLK & IDELAYCTRL - Port from artiq repo - Generate sys5x for for EEM Serdes, 208MHz REF Clock for IDELAYCTRL - Add IDELAYCTRL for IDEALYE2 in EEM Serdes --- src/gateware/zynq_clocking.py | 70 ++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/src/gateware/zynq_clocking.py b/src/gateware/zynq_clocking.py index 72bb894..b85c365 100644 --- a/src/gateware/zynq_clocking.py +++ b/src/gateware/zynq_clocking.py @@ -69,6 +69,8 @@ class SYSCRG(Module, AutoCSR): # assumes bootstrap clock is same freq as main and sys output self.clock_domains.cd_sys = ClockDomain() self.clock_domains.cd_sys4x = ClockDomain(reset_less=True) + self.clock_domains.cd_sys5x = ClockDomain(reset_less=True) + self.clock_domains.cd_clk200 = ClockDomain() self.current_clock = CSRStatus() @@ -78,11 +80,6 @@ class SYSCRG(Module, AutoCSR): period = 1e9/freq - pll_locked = Signal() - pll_sys = Signal() - pll_sys4x = Signal() - fb_clk = Signal() - self.submodules.clk_sw_fsm = ClockSwitchFSM() if clk_sw is None: @@ -91,32 +88,55 @@ class SYSCRG(Module, AutoCSR): else: self.comb += self.clk_sw_fsm.i_clk_sw.eq(clk_sw) + mmcm_locked = Signal() + mmcm_sys = Signal() + mmcm_sys4x = Signal() + mmcm_sys5x = Signal() + mmcm_clk208 = Signal() + mmcm_fb_clk = Signal() self.specials += [ - Instance("PLLE2_ADV", - p_STARTUP_WAIT="FALSE", o_LOCKED=pll_locked, - p_BANDWIDTH="HIGH", - p_REF_JITTER1=0.001, - p_CLKIN1_PERIOD=period, i_CLKIN1=main_clk, - p_CLKIN2_PERIOD=period, i_CLKIN2=bootstrap_clk, - i_CLKINSEL=self.clk_sw_fsm.o_clk_sw, + Instance("MMCME2_ADV", + p_STARTUP_WAIT="FALSE", o_LOCKED=mmcm_locked, + p_BANDWIDTH="HIGH", + p_REF_JITTER1=0.001, + p_CLKIN1_PERIOD=period, i_CLKIN1=main_clk, + p_CLKIN2_PERIOD=period, i_CLKIN2=bootstrap_clk, + i_CLKINSEL=self.clk_sw_fsm.o_clk_sw, - # VCO @ 1.5GHz when using 125MHz input - # 1.2GHz for 100MHz (zc706) - p_CLKFBOUT_MULT=12, p_DIVCLK_DIVIDE=1, - i_CLKFBIN=fb_clk, - i_RST=self.clk_sw_fsm.o_reset, + # VCO @ 1.25GHz + p_CLKFBOUT_MULT_F=10, p_DIVCLK_DIVIDE=1, + i_CLKFBIN=mmcm_fb_clk, + i_RST=self.clk_sw_fsm.o_reset, - o_CLKFBOUT=fb_clk, + o_CLKFBOUT=mmcm_fb_clk, - p_CLKOUT0_DIVIDE=3, p_CLKOUT0_PHASE=0.0, - o_CLKOUT0=pll_sys4x, + p_CLKOUT0_DIVIDE_F=2.5, p_CLKOUT0_PHASE=0.0, o_CLKOUT0=mmcm_sys4x, - p_CLKOUT1_DIVIDE=12, p_CLKOUT1_PHASE=0.0, - o_CLKOUT1=pll_sys), - Instance("BUFG", i_I=pll_sys, o_O=self.cd_sys.clk), - Instance("BUFG", i_I=pll_sys4x, o_O=self.cd_sys4x.clk), + # 125MHz + p_CLKOUT1_DIVIDE=10, p_CLKOUT1_PHASE=0.0, o_CLKOUT1=mmcm_sys, - AsyncResetSynchronizer(self.cd_sys, ~pll_locked), + # 625MHz + p_CLKOUT2_DIVIDE=2, p_CLKOUT2_PHASE=0.0, o_CLKOUT2=mmcm_sys5x, + + # 208MHz + p_CLKOUT3_DIVIDE=6, p_CLKOUT3_PHASE=0.0, o_CLKOUT3=mmcm_clk208, + ), + Instance("BUFG", i_I=mmcm_sys5x, o_O=self.cd_sys5x.clk), + Instance("BUFG", i_I=mmcm_sys, o_O=self.cd_sys.clk), + Instance("BUFG", i_I=mmcm_sys4x, o_O=self.cd_sys4x.clk), + Instance("BUFG", i_I=mmcm_clk208, o_O=self.cd_clk200.clk), + AsyncResetSynchronizer(self.cd_sys, ~mmcm_locked), + AsyncResetSynchronizer(self.cd_clk200, ~mmcm_locked), ] + reset_counter = Signal(4, reset=15) + ic_reset = Signal(reset=1) + self.sync.clk200 += \ + If(reset_counter != 0, + reset_counter.eq(reset_counter - 1) + ).Else( + ic_reset.eq(0) + ) + self.specials += Instance("IDELAYCTRL", i_REFCLK=ClockSignal("clk200"), i_RST=ic_reset) + self.comb += self.current_clock.status.eq(self.clk_sw_fsm.o_clk_sw) -- 2.42.0 From b15322b6baa59fc10ac5ece76994ff93d035487e Mon Sep 17 00:00:00 2001 From: linuswck Date: Tue, 10 Oct 2023 10:41:07 +0800 Subject: [PATCH 2/3] kasli_soc: Add support for shuttler on gateware - Port from artiq repo - Add EEM_DRTIO gateware --- src/gateware/kasli_soc.py | 60 ++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/src/gateware/kasli_soc.py b/src/gateware/kasli_soc.py index db65bd2..e782b51 100755 --- a/src/gateware/kasli_soc.py +++ b/src/gateware/kasli_soc.py @@ -16,7 +16,7 @@ from artiq.coredevice import jsondesc from artiq.gateware import rtio, eem_7series from artiq.gateware.rtio.xilinx_clocking import fix_serdes_timing_path from artiq.gateware.rtio.phy import ttl_simple -from artiq.gateware.drtio.transceiver import gtx_7series +from artiq.gateware.drtio.transceiver import gtx_7series, eem_serdes from artiq.gateware.drtio.siphaser import SiPhaser7Series from artiq.gateware.drtio.rx_synchronizer import XilinxRXSynchronizer from artiq.gateware.drtio import * @@ -201,6 +201,7 @@ class GenericMaster(SoCCore): def __init__(self, description, acpki=False): clk_freq = description["rtio_frequency"] + has_drtio_over_eem = any(peripheral["type"] == "shuttler" for peripheral in description["peripherals"]) self.acpki = acpki platform = kasli_soc.Platform() @@ -246,6 +247,8 @@ class GenericMaster(SoCCore): self.rtio_channels = [] has_grabber = any(peripheral["type"] == "grabber" for peripheral in description["peripherals"]) + if has_drtio_over_eem: + self.eem_drtio_channels = [] if has_grabber: self.grabber_csr_group = [] eem_7series.add_peripherals(self, description["peripherals"], iostandard=eem_iostandard) @@ -260,17 +263,17 @@ class GenericMaster(SoCCore): self.submodules.rtio_tsc = rtio.TSC(glbl_fine_ts_width=3) - drtio_csr_group = [] - drtioaux_csr_group = [] - drtioaux_memory_group = [] + self.drtio_csr_group = [] + self.drtioaux_csr_group = [] + self.drtioaux_memory_group = [] self.drtio_cri = [] for i in range(len(self.gt_drtio.channels)): core_name = "drtio" + str(i) coreaux_name = "drtioaux" + str(i) memory_name = "drtioaux" + str(i) + "_mem" - drtio_csr_group.append(core_name) - drtioaux_csr_group.append(coreaux_name) - drtioaux_memory_group.append(memory_name) + self.drtio_csr_group.append(core_name) + self.drtioaux_csr_group.append(coreaux_name) + self.drtioaux_memory_group.append(memory_name) cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx" + str(i)}) @@ -289,9 +292,10 @@ class GenericMaster(SoCCore): self.add_memory_region(memory_name, self.mem_map["csr"] + memory_address, size * 2) self.config["HAS_DRTIO"] = None self.config["HAS_DRTIO_ROUTING"] = None - self.add_csr_group("drtio", drtio_csr_group) - self.add_csr_group("drtioaux", drtioaux_csr_group) - self.add_memory_group("drtioaux_mem", drtioaux_memory_group) + + if has_drtio_over_eem: + self.add_eem_drtio(self.eem_drtio_channels) + self.add_drtio_cpuif_groups() self.submodules.rtio_core = rtio.Core( self.rtio_tsc, self.rtio_channels, lane_count=description["sed_lanes"] @@ -340,6 +344,42 @@ class GenericMaster(SoCCore): self.comb += [self.virtual_leds.get(i).eq(channel.rx_ready) for i, channel in enumerate(self.gt_drtio.channels)] + def add_eem_drtio(self, eem_drtio_channels): + # Must be called before invoking add_rtio() to construct the CRI + # interconnect properly + self.submodules.eem_transceiver = eem_serdes.EEMSerdes(self.platform, eem_drtio_channels) + self.csr_devices.append("eem_transceiver") + self.config["HAS_DRTIO_EEM"] = None + self.config["EEM_DRTIO_COUNT"] = len(eem_drtio_channels) + + cdr = ClockDomainsRenamer({"rtio_rx": "sys"}) + for i in range(len(self.eem_transceiver.channels)): + channel = i + len(self.gt_drtio.channels) + core_name = "drtio" + str(channel) + coreaux_name = "drtioaux" + str(channel) + memory_name = "drtioaux" + str(channel) + "_mem" + self.drtio_csr_group.append(core_name) + self.drtioaux_csr_group.append(coreaux_name) + self.drtioaux_memory_group.append(memory_name) + + core = cdr(DRTIOMaster(self.rtio_tsc, self.eem_transceiver.channels[i])) + setattr(self.submodules, core_name, core) + self.drtio_cri.append(core.cri) + self.csr_devices.append(core_name) + + coreaux = cdr(drtio_aux_controller.DRTIOAuxControllerBare(core.link_layer)) + setattr(self.submodules, coreaux_name, coreaux) + self.csr_devices.append(coreaux_name) + + size = coreaux.get_mem_size() + memory_address = self.axi2csr.register_port(coreaux.get_tx_port(), size) + self.axi2csr.register_port(coreaux.get_rx_port(), size) + self.add_memory_region(memory_name, self.mem_map["csr"] + memory_address, size * 2) + + def add_drtio_cpuif_groups(self): + self.add_csr_group("drtio", self.drtio_csr_group) + self.add_csr_group("drtioaux", self.drtioaux_csr_group) + self.add_memory_group("drtioaux_mem", self.drtioaux_memory_group) class GenericSatellite(SoCCore): -- 2.42.0 From a4d1be00c050b6d03cd7e7e726e7c076d916fbb5 Mon Sep 17 00:00:00 2001 From: linuswck Date: Tue, 10 Oct 2023 10:47:24 +0800 Subject: [PATCH 3/3] Firmware: Add drtio_eem.rs support - Port from Artiq repo - Initialize the drtio_eem on main, rtio_clocking - Driver for eem_transceiver --- src/Cargo.lock | 1 + src/libboard_artiq/Cargo.toml.tpl | 1 + src/libboard_artiq/src/drtio_eem.rs | 233 ++++++++++++++++++++++++++++ src/libboard_artiq/src/lib.rs | 2 + src/runtime/src/main.rs | 5 + src/runtime/src/rtio_clocking.rs | 3 + 6 files changed, 245 insertions(+) create mode 100644 src/libboard_artiq/src/drtio_eem.rs diff --git a/src/Cargo.lock b/src/Cargo.lock index 0d1f74e..01162ee 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -271,6 +271,7 @@ dependencies = [ "libconfig", "libcortex_a9", "libregister", + "libsupport_zynq", "log", "log_buffer", "nb 1.0.0", diff --git a/src/libboard_artiq/Cargo.toml.tpl b/src/libboard_artiq/Cargo.toml.tpl index 3fc8809..9c1ad24 100644 --- a/src/libboard_artiq/Cargo.toml.tpl +++ b/src/libboard_artiq/Cargo.toml.tpl @@ -25,6 +25,7 @@ void = { version = "1", default-features = false } io = { path = "../libio", features = ["byteorder"] } libboard_zynq = { path = "@@ZYNQ_RS@@/libboard_zynq" } +libsupport_zynq = { path = "@@ZYNQ_RS@@/libsupport_zynq", default-features = false, features = ["alloc_core"] } libregister = { path = "@@ZYNQ_RS@@/libregister" } libconfig = { path = "@@ZYNQ_RS@@/libconfig", features = ["fat_lfn"] } libcortex_a9 = { path = "@@ZYNQ_RS@@/libcortex_a9" } diff --git a/src/libboard_artiq/src/drtio_eem.rs b/src/libboard_artiq/src/drtio_eem.rs new file mode 100644 index 0000000..90b77e3 --- /dev/null +++ b/src/libboard_artiq/src/drtio_eem.rs @@ -0,0 +1,233 @@ +use crate::pl; +use embedded_hal::prelude::_embedded_hal_blocking_delay_DelayUs; +use libboard_zynq::timer::GlobalTimer; +use libconfig::Config; +use libsupport_zynq::alloc::format; +use log::{debug, error, info}; + +struct SerdesConfig { + pub delay: [u8; 4], +} + +impl SerdesConfig { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + core::slice::from_raw_parts( + (self as *const SerdesConfig) as *const u8, + core::mem::size_of::(), + ) + } + } +} + +fn select_lane(lane_no: u8) { + unsafe { + pl::csr::eem_transceiver::lane_sel_write(lane_no); + } +} + +fn apply_delay(tap: u8, timer: &mut GlobalTimer) { + unsafe { + pl::csr::eem_transceiver::dly_cnt_in_write(tap); + pl::csr::eem_transceiver::dly_ld_write(1); + timer.delay_us(1); + assert!(tap as u8 == pl::csr::eem_transceiver::dly_cnt_out_read()); + } +} + +fn apply_config(config: &SerdesConfig, timer: &mut GlobalTimer) { + for lane_no in 0..4 { + select_lane(lane_no as u8); + apply_delay(config.delay[lane_no], timer); + } +} + +unsafe fn assign_delay(timer: &mut GlobalTimer) -> SerdesConfig { + // Select an appropriate delay for lane 0 + select_lane(0); + + // + + let mut best_dly = None; + + loop { + let mut prev = None; + for curr_dly in 0..32 { + //let read_align = read_align_fn(curr_dly, timer); + let curr_low_rate = read_align(curr_dly, timer); + + if let Some(prev_low_rate) = prev { + // This is potentially a crossover position + if prev_low_rate <= curr_low_rate && curr_low_rate >= 0.5 { + let prev_dev = 0.5 - prev_low_rate; + let curr_dev = curr_low_rate - 0.5; + let selected_idx = if prev_dev < curr_dev { + curr_dly - 1 + } else { + curr_dly + }; + + // The setup setup/hold calibration timing (even with + // tolerance) might be invalid in other lanes due to skew. + // 5 taps is very conservative, generally it is 1 or 2 + if selected_idx < 5 { + prev = None; + continue; + } else { + best_dly = Some(selected_idx); + break; + } + } + } + + // Only rising slope from <= 0.5 can result in a rising low rate + // crossover at 50%. + if curr_low_rate <= 0.5 { + prev = Some(curr_low_rate); + } + } + + if best_dly.is_none() { + error!("setup/hold timing calibration failed, retry in 1s..."); + timer.delay_us(1_000_000); + } else { + break; + } + } + + let best_dly = best_dly.unwrap(); + + apply_delay(best_dly, timer); + let mut delay_list = [best_dly; 4]; + + // Assign delay for other lanes + for lane_no in 1..=3 { + select_lane(lane_no as u8); + + let mut min_deviation = 0.5; + let mut min_idx = 0; + for dly_delta in -3..=3 { + let index = (best_dly as isize + dly_delta) as u8; + let low_rate = read_align(index, timer); + // abs() from f32 is not available in core library + let deviation = if low_rate < 0.5 { + 0.5 - low_rate + } else { + low_rate - 0.5 + }; + + if deviation < min_deviation { + min_deviation = deviation; + min_idx = index; + } + } + + apply_delay(min_idx, timer); + delay_list[lane_no] = min_idx; + } + + debug!("setup/hold timing calibration: {:?}", delay_list); + + SerdesConfig { + delay: delay_list, + } +} + +fn read_align(dly: u8, timer: &mut GlobalTimer) -> f32 { + unsafe { + apply_delay(dly, timer); + pl::csr::eem_transceiver::counter_reset_write(1); + + pl::csr::eem_transceiver::counter_enable_write(1); + timer.delay_us(2000); + pl::csr::eem_transceiver::counter_enable_write(0); + + let (high, low) = ( + pl::csr::eem_transceiver::counter_high_count_read(), + pl::csr::eem_transceiver::counter_low_count_read(), + ); + if pl::csr::eem_transceiver::counter_overflow_read() == 1 { + panic!("Unexpected phase detector counter overflow"); + } + + low as f32 / (low + high) as f32 + } +} + +unsafe fn align_comma(timer: &mut GlobalTimer) { + loop { + for slip in 1..=10 { + // The soft transceiver has 2 8b10b decoders, which receives lane + // 0/1 and lane 2/3 respectively. The decoder are time-multiplexed + // to decode exactly 1 lane each sysclk cycle. + // + // The decoder decodes lane 0/2 data on odd sysclk cycles, buffer + // on even cycles, and vice versa for lane 1/3. Data/Clock latency + // could change timing. The extend bit flips the decoding timing, + // so lane 0/2 data are decoded on even cycles, and lane 1/3 data + // are decoded on odd cycles. + // + // This is needed because transmitting/receiving a 8b10b character + // takes 2 sysclk cycles. Adjusting bitslip only via ISERDES + // limits the range to 1 cycle. The wordslip bit extends the range + // to 2 sysclk cycles. + pl::csr::eem_transceiver::wordslip_write((slip > 5) as u8); + + // Apply a double bitslip since the ISERDES is 2x oversampled. + // Bitslip is used for comma alignment purposes once setup/hold + // timing is met. + pl::csr::eem_transceiver::bitslip_write(1); + pl::csr::eem_transceiver::bitslip_write(1); + timer.delay_us(1); + + pl::csr::eem_transceiver::comma_align_reset_write(1); + timer.delay_us(100); + + if pl::csr::eem_transceiver::comma_read() == 1 { + debug!("comma alignment completed after {} bitslips", slip); + return; + } + } + + error!("comma alignment failed, retrying in 1s..."); + timer.delay_us(1_000_000); + } +} + +pub fn init(timer: &mut GlobalTimer, cfg: &Config) { + for trx_no in 0..pl::csr::CONFIG_EEM_DRTIO_COUNT { + unsafe { + pl::csr::eem_transceiver::transceiver_sel_write(trx_no as u8); + } + + let key = format!("eem_drtio_delay{}", trx_no); + + let cfg_read = cfg.read(&key); + match cfg_read { + Ok(record) => { + info!("loading calibrated timing values from sd card"); + unsafe { + apply_config(&*(record.as_ptr() as *const SerdesConfig), timer); + } + } + Err(_) => { + info!("calibrating..."); + let config = unsafe { assign_delay(timer) }; + + match cfg.write(&key, config.as_bytes().to_vec()) { + Ok(()) => { + info!("storing calibration timing values into sd card"); + } + Err(e) => { + error!("calibration successful but calibration timing values cannot be stored into sd card. Error:{}", e); + } + }; + } + } + + unsafe { + align_comma(timer); + pl::csr::eem_transceiver::rx_ready_write(1); + } + } +} diff --git a/src/libboard_artiq/src/lib.rs b/src/libboard_artiq/src/lib.rs index 2704e2e..23e6836 100644 --- a/src/libboard_artiq/src/lib.rs +++ b/src/libboard_artiq/src/lib.rs @@ -31,6 +31,8 @@ pub mod mem; pub mod pl; #[cfg(has_si5324)] pub mod si5324; +#[cfg(has_drtio_eem)] +pub mod drtio_eem; use core::{cmp, str}; diff --git a/src/runtime/src/main.rs b/src/runtime/src/main.rs index 982a8e5..a2cbadc 100644 --- a/src/runtime/src/main.rs +++ b/src/runtime/src/main.rs @@ -16,6 +16,8 @@ use libasync::task; #[cfg(feature = "target_kasli_soc")] use libboard_artiq::io_expander; use libboard_artiq::{identifier_read, logger, pl}; +#[cfg(has_drtio_eem)] +use libboard_artiq::drtio_eem; use libboard_zynq::{gic, mpcore, timer::GlobalTimer}; use libconfig::Config; use libcortex_a9::l2c::enable_l2_cache; @@ -109,6 +111,9 @@ pub fn main_core0() { rtio_clocking::init(&mut timer, &cfg); + #[cfg(has_drtio_eem)] + drtio_eem::init(&mut timer, &cfg); + task::spawn(ksupport::report_async_rtio_errors()); #[cfg(feature = "target_kasli_soc")] diff --git a/src/runtime/src/rtio_clocking.rs b/src/runtime/src/rtio_clocking.rs index 783af6d..e6cf632 100644 --- a/src/runtime/src/rtio_clocking.rs +++ b/src/runtime/src/rtio_clocking.rs @@ -104,6 +104,9 @@ fn init_drtio(timer: &mut GlobalTimer) { unsafe { pl::csr::rtio_core::reset_phy_write(1); pl::csr::gt_drtio::txenable_write(0xffffffffu32 as _); + + #[cfg(has_drtio_eem)] + pl::csr::eem_transceiver::txenable_write(0xffffffffu32 as _); } } -- 2.42.0