Shuttler: Add Support on Kasli-Soc Platform #268

Merged
sb10q merged 3 commits from linuswck/artiq-zynq:shuttler_kasli_soc_port into master 2023-10-11 10:20:42 +08:00
8 changed files with 340 additions and 35 deletions

1
src/Cargo.lock generated
View File

@ -271,6 +271,7 @@ dependencies = [
"libconfig",
"libcortex_a9",
"libregister",
"libsupport_zynq",
"log",
"log_buffer",
"nb 1.0.0",

View File

@ -16,7 +16,7 @@ from artiq.coredevice import jsondesc
from artiq.gateware import rtio, eem_7series
from artiq.gateware.rtio.xilinx_clocking import fix_serdes_timing_path
from artiq.gateware.rtio.phy import ttl_simple
from artiq.gateware.drtio.transceiver import gtx_7series
from artiq.gateware.drtio.transceiver import gtx_7series, eem_serdes
from artiq.gateware.drtio.siphaser import SiPhaser7Series
from artiq.gateware.drtio.rx_synchronizer import XilinxRXSynchronizer
from artiq.gateware.drtio import *
@ -201,6 +201,7 @@ class GenericMaster(SoCCore):
def __init__(self, description, acpki=False):
clk_freq = description["rtio_frequency"]
has_drtio_over_eem = any(peripheral["type"] == "shuttler" for peripheral in description["peripherals"])
self.acpki = acpki
platform = kasli_soc.Platform()
@ -246,6 +247,8 @@ class GenericMaster(SoCCore):
self.rtio_channels = []
has_grabber = any(peripheral["type"] == "grabber" for peripheral in description["peripherals"])
if has_drtio_over_eem:
self.eem_drtio_channels = []
if has_grabber:
self.grabber_csr_group = []
eem_7series.add_peripherals(self, description["peripherals"], iostandard=eem_iostandard)
@ -260,17 +263,17 @@ class GenericMaster(SoCCore):
self.submodules.rtio_tsc = rtio.TSC(glbl_fine_ts_width=3)
drtio_csr_group = []
drtioaux_csr_group = []
drtioaux_memory_group = []
self.drtio_csr_group = []
self.drtioaux_csr_group = []
self.drtioaux_memory_group = []
self.drtio_cri = []
for i in range(len(self.gt_drtio.channels)):
core_name = "drtio" + str(i)
coreaux_name = "drtioaux" + str(i)
memory_name = "drtioaux" + str(i) + "_mem"
drtio_csr_group.append(core_name)
drtioaux_csr_group.append(coreaux_name)
drtioaux_memory_group.append(memory_name)
self.drtio_csr_group.append(core_name)
self.drtioaux_csr_group.append(coreaux_name)
self.drtioaux_memory_group.append(memory_name)
cdr = ClockDomainsRenamer({"rtio_rx": "rtio_rx" + str(i)})
@ -289,9 +292,10 @@ class GenericMaster(SoCCore):
self.add_memory_region(memory_name, self.mem_map["csr"] + memory_address, size * 2)
self.config["HAS_DRTIO"] = None
self.config["HAS_DRTIO_ROUTING"] = None
self.add_csr_group("drtio", drtio_csr_group)
self.add_csr_group("drtioaux", drtioaux_csr_group)
self.add_memory_group("drtioaux_mem", drtioaux_memory_group)
if has_drtio_over_eem:
self.add_eem_drtio(self.eem_drtio_channels)
self.add_drtio_cpuif_groups()
self.submodules.rtio_core = rtio.Core(
self.rtio_tsc, self.rtio_channels, lane_count=description["sed_lanes"]
@ -340,6 +344,42 @@ class GenericMaster(SoCCore):
self.comb += [self.virtual_leds.get(i).eq(channel.rx_ready)
for i, channel in enumerate(self.gt_drtio.channels)]
def add_eem_drtio(self, eem_drtio_channels):
# Must be called before invoking add_rtio() to construct the CRI
# interconnect properly
self.submodules.eem_transceiver = eem_serdes.EEMSerdes(self.platform, eem_drtio_channels)
self.csr_devices.append("eem_transceiver")
self.config["HAS_DRTIO_EEM"] = None
self.config["EEM_DRTIO_COUNT"] = len(eem_drtio_channels)
cdr = ClockDomainsRenamer({"rtio_rx": "sys"})
for i in range(len(self.eem_transceiver.channels)):
channel = i + len(self.gt_drtio.channels)
core_name = "drtio" + str(channel)
coreaux_name = "drtioaux" + str(channel)
memory_name = "drtioaux" + str(channel) + "_mem"
self.drtio_csr_group.append(core_name)
self.drtioaux_csr_group.append(coreaux_name)
self.drtioaux_memory_group.append(memory_name)
core = cdr(DRTIOMaster(self.rtio_tsc, self.eem_transceiver.channels[i]))
setattr(self.submodules, core_name, core)
self.drtio_cri.append(core.cri)
self.csr_devices.append(core_name)
coreaux = cdr(drtio_aux_controller.DRTIOAuxControllerBare(core.link_layer))
setattr(self.submodules, coreaux_name, coreaux)
self.csr_devices.append(coreaux_name)
size = coreaux.get_mem_size()
memory_address = self.axi2csr.register_port(coreaux.get_tx_port(), size)
self.axi2csr.register_port(coreaux.get_rx_port(), size)
self.add_memory_region(memory_name, self.mem_map["csr"] + memory_address, size * 2)
def add_drtio_cpuif_groups(self):
self.add_csr_group("drtio", self.drtio_csr_group)
self.add_csr_group("drtioaux", self.drtioaux_csr_group)
self.add_memory_group("drtioaux_mem", self.drtioaux_memory_group)
class GenericSatellite(SoCCore):

View File

@ -69,6 +69,8 @@ class SYSCRG(Module, AutoCSR):
# assumes bootstrap clock is same freq as main and sys output
self.clock_domains.cd_sys = ClockDomain()
self.clock_domains.cd_sys4x = ClockDomain(reset_less=True)
self.clock_domains.cd_sys5x = ClockDomain(reset_less=True)
self.clock_domains.cd_clk200 = ClockDomain()
self.current_clock = CSRStatus()
@ -78,11 +80,6 @@ class SYSCRG(Module, AutoCSR):
period = 1e9/freq
pll_locked = Signal()
pll_sys = Signal()
pll_sys4x = Signal()
fb_clk = Signal()
self.submodules.clk_sw_fsm = ClockSwitchFSM()
if clk_sw is None:
@ -91,32 +88,55 @@ class SYSCRG(Module, AutoCSR):
else:
self.comb += self.clk_sw_fsm.i_clk_sw.eq(clk_sw)
mmcm_locked = Signal()
mmcm_sys = Signal()
mmcm_sys4x = Signal()
mmcm_sys5x = Signal()
mmcm_clk208 = Signal()
mmcm_fb_clk = Signal()
self.specials += [
Instance("PLLE2_ADV",
p_STARTUP_WAIT="FALSE", o_LOCKED=pll_locked,
p_BANDWIDTH="HIGH",
p_REF_JITTER1=0.001,
p_CLKIN1_PERIOD=period, i_CLKIN1=main_clk,
p_CLKIN2_PERIOD=period, i_CLKIN2=bootstrap_clk,
i_CLKINSEL=self.clk_sw_fsm.o_clk_sw,
Instance("MMCME2_ADV",
p_STARTUP_WAIT="FALSE", o_LOCKED=mmcm_locked,
p_BANDWIDTH="HIGH",
p_REF_JITTER1=0.001,
p_CLKIN1_PERIOD=period, i_CLKIN1=main_clk,
p_CLKIN2_PERIOD=period, i_CLKIN2=bootstrap_clk,
i_CLKINSEL=self.clk_sw_fsm.o_clk_sw,
# VCO @ 1.5GHz when using 125MHz input
# 1.2GHz for 100MHz (zc706)
p_CLKFBOUT_MULT=12, p_DIVCLK_DIVIDE=1,
i_CLKFBIN=fb_clk,
i_RST=self.clk_sw_fsm.o_reset,
# VCO @ 1.25GHz
p_CLKFBOUT_MULT_F=10, p_DIVCLK_DIVIDE=1,
i_CLKFBIN=mmcm_fb_clk,
i_RST=self.clk_sw_fsm.o_reset,
o_CLKFBOUT=fb_clk,
o_CLKFBOUT=mmcm_fb_clk,
p_CLKOUT0_DIVIDE=3, p_CLKOUT0_PHASE=0.0,
o_CLKOUT0=pll_sys4x,
p_CLKOUT0_DIVIDE_F=2.5, p_CLKOUT0_PHASE=0.0, o_CLKOUT0=mmcm_sys4x,
p_CLKOUT1_DIVIDE=12, p_CLKOUT1_PHASE=0.0,
o_CLKOUT1=pll_sys),
Instance("BUFG", i_I=pll_sys, o_O=self.cd_sys.clk),
Instance("BUFG", i_I=pll_sys4x, o_O=self.cd_sys4x.clk),
# 125MHz
p_CLKOUT1_DIVIDE=10, p_CLKOUT1_PHASE=0.0, o_CLKOUT1=mmcm_sys,
AsyncResetSynchronizer(self.cd_sys, ~pll_locked),
# 625MHz
p_CLKOUT2_DIVIDE=2, p_CLKOUT2_PHASE=0.0, o_CLKOUT2=mmcm_sys5x,
# 208MHz
p_CLKOUT3_DIVIDE=6, p_CLKOUT3_PHASE=0.0, o_CLKOUT3=mmcm_clk208,
),
Instance("BUFG", i_I=mmcm_sys5x, o_O=self.cd_sys5x.clk),
Instance("BUFG", i_I=mmcm_sys, o_O=self.cd_sys.clk),
Instance("BUFG", i_I=mmcm_sys4x, o_O=self.cd_sys4x.clk),
Instance("BUFG", i_I=mmcm_clk208, o_O=self.cd_clk200.clk),
AsyncResetSynchronizer(self.cd_sys, ~mmcm_locked),
AsyncResetSynchronizer(self.cd_clk200, ~mmcm_locked),
]
reset_counter = Signal(4, reset=15)
ic_reset = Signal(reset=1)
self.sync.clk200 += \
If(reset_counter != 0,
reset_counter.eq(reset_counter - 1)
).Else(
ic_reset.eq(0)
)
self.specials += Instance("IDELAYCTRL", i_REFCLK=ClockSignal("clk200"), i_RST=ic_reset)
self.comb += self.current_clock.status.eq(self.clk_sw_fsm.o_clk_sw)

View File

@ -25,6 +25,7 @@ void = { version = "1", default-features = false }
io = { path = "../libio", features = ["byteorder"] }
libboard_zynq = { path = "@@ZYNQ_RS@@/libboard_zynq" }
libsupport_zynq = { path = "@@ZYNQ_RS@@/libsupport_zynq", default-features = false, features = ["alloc_core"] }
libregister = { path = "@@ZYNQ_RS@@/libregister" }
libconfig = { path = "@@ZYNQ_RS@@/libconfig", features = ["fat_lfn"] }
libcortex_a9 = { path = "@@ZYNQ_RS@@/libcortex_a9" }

View File

@ -0,0 +1,233 @@
use crate::pl;
use embedded_hal::prelude::_embedded_hal_blocking_delay_DelayUs;
use libboard_zynq::timer::GlobalTimer;
use libconfig::Config;
use libsupport_zynq::alloc::format;
use log::{debug, error, info};
struct SerdesConfig {
pub delay: [u8; 4],
}
impl SerdesConfig {
pub fn as_bytes(&self) -> &[u8] {
unsafe {
core::slice::from_raw_parts(
(self as *const SerdesConfig) as *const u8,
core::mem::size_of::<SerdesConfig>(),
)
}
}
}
fn select_lane(lane_no: u8) {
unsafe {
pl::csr::eem_transceiver::lane_sel_write(lane_no);
}
}
fn apply_delay(tap: u8, timer: &mut GlobalTimer) {
unsafe {
pl::csr::eem_transceiver::dly_cnt_in_write(tap);
pl::csr::eem_transceiver::dly_ld_write(1);
timer.delay_us(1);
assert!(tap as u8 == pl::csr::eem_transceiver::dly_cnt_out_read());
}
}
fn apply_config(config: &SerdesConfig, timer: &mut GlobalTimer) {
for lane_no in 0..4 {
select_lane(lane_no as u8);
apply_delay(config.delay[lane_no], timer);
}
}
unsafe fn assign_delay(timer: &mut GlobalTimer) -> SerdesConfig {
// Select an appropriate delay for lane 0
select_lane(0);
//
let mut best_dly = None;
loop {
let mut prev = None;
for curr_dly in 0..32 {
//let read_align = read_align_fn(curr_dly, timer);
let curr_low_rate = read_align(curr_dly, timer);
if let Some(prev_low_rate) = prev {
// This is potentially a crossover position
if prev_low_rate <= curr_low_rate && curr_low_rate >= 0.5 {
let prev_dev = 0.5 - prev_low_rate;
let curr_dev = curr_low_rate - 0.5;
let selected_idx = if prev_dev < curr_dev {
curr_dly - 1
} else {
curr_dly
};
// The setup setup/hold calibration timing (even with
// tolerance) might be invalid in other lanes due to skew.
// 5 taps is very conservative, generally it is 1 or 2
if selected_idx < 5 {
prev = None;
continue;
} else {
best_dly = Some(selected_idx);
break;
}
}
}
// Only rising slope from <= 0.5 can result in a rising low rate
// crossover at 50%.
if curr_low_rate <= 0.5 {
prev = Some(curr_low_rate);
}
}
if best_dly.is_none() {
error!("setup/hold timing calibration failed, retry in 1s...");
timer.delay_us(1_000_000);
} else {
break;
}
}
let best_dly = best_dly.unwrap();
apply_delay(best_dly, timer);
let mut delay_list = [best_dly; 4];
// Assign delay for other lanes
for lane_no in 1..=3 {
select_lane(lane_no as u8);
let mut min_deviation = 0.5;
let mut min_idx = 0;
for dly_delta in -3..=3 {
let index = (best_dly as isize + dly_delta) as u8;
let low_rate = read_align(index, timer);
// abs() from f32 is not available in core library
let deviation = if low_rate < 0.5 {
0.5 - low_rate
} else {
low_rate - 0.5
};
if deviation < min_deviation {
min_deviation = deviation;
min_idx = index;
}
}
apply_delay(min_idx, timer);
delay_list[lane_no] = min_idx;
}
debug!("setup/hold timing calibration: {:?}", delay_list);
SerdesConfig {
delay: delay_list,
}
}
fn read_align(dly: u8, timer: &mut GlobalTimer) -> f32 {
unsafe {
apply_delay(dly, timer);
pl::csr::eem_transceiver::counter_reset_write(1);
pl::csr::eem_transceiver::counter_enable_write(1);
timer.delay_us(2000);
pl::csr::eem_transceiver::counter_enable_write(0);
let (high, low) = (
pl::csr::eem_transceiver::counter_high_count_read(),
pl::csr::eem_transceiver::counter_low_count_read(),
);
if pl::csr::eem_transceiver::counter_overflow_read() == 1 {
panic!("Unexpected phase detector counter overflow");
}
low as f32 / (low + high) as f32
}
}
unsafe fn align_comma(timer: &mut GlobalTimer) {
loop {
for slip in 1..=10 {
// The soft transceiver has 2 8b10b decoders, which receives lane
// 0/1 and lane 2/3 respectively. The decoder are time-multiplexed
// to decode exactly 1 lane each sysclk cycle.
//
// The decoder decodes lane 0/2 data on odd sysclk cycles, buffer
// on even cycles, and vice versa for lane 1/3. Data/Clock latency
// could change timing. The extend bit flips the decoding timing,
// so lane 0/2 data are decoded on even cycles, and lane 1/3 data
// are decoded on odd cycles.
//
// This is needed because transmitting/receiving a 8b10b character
// takes 2 sysclk cycles. Adjusting bitslip only via ISERDES
// limits the range to 1 cycle. The wordslip bit extends the range
// to 2 sysclk cycles.
pl::csr::eem_transceiver::wordslip_write((slip > 5) as u8);
// Apply a double bitslip since the ISERDES is 2x oversampled.
// Bitslip is used for comma alignment purposes once setup/hold
// timing is met.
pl::csr::eem_transceiver::bitslip_write(1);
pl::csr::eem_transceiver::bitslip_write(1);
timer.delay_us(1);
pl::csr::eem_transceiver::comma_align_reset_write(1);
timer.delay_us(100);
if pl::csr::eem_transceiver::comma_read() == 1 {
debug!("comma alignment completed after {} bitslips", slip);
return;
}
}
error!("comma alignment failed, retrying in 1s...");
timer.delay_us(1_000_000);
}
}
pub fn init(timer: &mut GlobalTimer, cfg: &Config) {
for trx_no in 0..pl::csr::CONFIG_EEM_DRTIO_COUNT {
unsafe {
pl::csr::eem_transceiver::transceiver_sel_write(trx_no as u8);
}
let key = format!("eem_drtio_delay{}", trx_no);
let cfg_read = cfg.read(&key);
match cfg_read {
Ok(record) => {
info!("loading calibrated timing values from sd card");
unsafe {
apply_config(&*(record.as_ptr() as *const SerdesConfig), timer);
}
}
Err(_) => {
info!("calibrating...");
let config = unsafe { assign_delay(timer) };
match cfg.write(&key, config.as_bytes().to_vec()) {
Ok(()) => {
info!("storing calibration timing values into sd card");
}
Err(e) => {
error!("calibration successful but calibration timing values cannot be stored into sd card. Error:{}", e);
}
};
}
}
unsafe {
align_comma(timer);
pl::csr::eem_transceiver::rx_ready_write(1);
}
}
}

View File

@ -31,6 +31,8 @@ pub mod mem;
pub mod pl;
#[cfg(has_si5324)]
pub mod si5324;
#[cfg(has_drtio_eem)]
pub mod drtio_eem;
use core::{cmp, str};

View File

@ -16,6 +16,8 @@ use libasync::task;
#[cfg(feature = "target_kasli_soc")]
use libboard_artiq::io_expander;
use libboard_artiq::{identifier_read, logger, pl};
#[cfg(has_drtio_eem)]
use libboard_artiq::drtio_eem;
use libboard_zynq::{gic, mpcore, timer::GlobalTimer};
use libconfig::Config;
use libcortex_a9::l2c::enable_l2_cache;
@ -109,6 +111,9 @@ pub fn main_core0() {
rtio_clocking::init(&mut timer, &cfg);
#[cfg(has_drtio_eem)]
drtio_eem::init(&mut timer, &cfg);
task::spawn(ksupport::report_async_rtio_errors());
#[cfg(feature = "target_kasli_soc")]

View File

@ -104,6 +104,9 @@ fn init_drtio(timer: &mut GlobalTimer) {
unsafe {
pl::csr::rtio_core::reset_phy_write(1);
pl::csr::gt_drtio::txenable_write(0xffffffffu32 as _);
#[cfg(has_drtio_eem)]
pl::csr::eem_transceiver::txenable_write(0xffffffffu32 as _);
}
}