From 64d3f867a0026ce62efc2e8a7c83fb1a337baf55 Mon Sep 17 00:00:00 2001 From: occheung <54844539+occheung@users.noreply.github.com> Date: Wed, 9 Aug 2023 16:59:40 -0700 Subject: [PATCH] add DRTIO-over-EEM PHY for EFC and perhaps Phaser --- artiq/firmware/libboard_artiq/Cargo.toml | 1 + artiq/firmware/libboard_artiq/drtio_eem.rs | 219 ++++++++ artiq/firmware/libboard_artiq/lib.rs | 5 + artiq/firmware/runtime/Cargo.toml | 2 +- artiq/firmware/runtime/main.rs | 5 + artiq/firmware/satman/Cargo.toml | 2 +- artiq/firmware/satman/main.rs | 6 +- .../gateware/drtio/transceiver/eem_serdes.py | 481 ++++++++++++++++++ artiq/gateware/eem.py | 28 + 9 files changed, 746 insertions(+), 3 deletions(-) create mode 100644 artiq/firmware/libboard_artiq/drtio_eem.rs create mode 100644 artiq/gateware/drtio/transceiver/eem_serdes.py diff --git a/artiq/firmware/libboard_artiq/Cargo.toml b/artiq/firmware/libboard_artiq/Cargo.toml index 0f13ca2de..8405ee892 100644 --- a/artiq/firmware/libboard_artiq/Cargo.toml +++ b/artiq/firmware/libboard_artiq/Cargo.toml @@ -24,3 +24,4 @@ proto_artiq = { path = "../libproto_artiq" } [features] uart_console = [] +alloc = [] diff --git a/artiq/firmware/libboard_artiq/drtio_eem.rs b/artiq/firmware/libboard_artiq/drtio_eem.rs new file mode 100644 index 000000000..4434f8cb1 --- /dev/null +++ b/artiq/firmware/libboard_artiq/drtio_eem.rs @@ -0,0 +1,219 @@ +use board_misoc::{csr, clock, config}; +#[cfg(feature = "alloc")] +use alloc::format; + + +struct SerdesConfig { + pub delay: [u8; 4], +} + +impl SerdesConfig { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + core::slice::from_raw_parts( + (self as *const SerdesConfig) as *const u8, + core::mem::size_of::(), + ) + } + } +} + +fn select_lane(lane_no: u8) { + unsafe { + csr::eem_transceiver::lane_sel_write(lane_no); + } +} + +fn apply_delay(tap: u8) { + unsafe { + csr::eem_transceiver::dly_cnt_in_write(tap); + csr::eem_transceiver::dly_ld_write(1); + clock::spin_us(1); + assert!(tap as u8 == csr::eem_transceiver::dly_cnt_out_read()); + } +} + +fn apply_config(config: &SerdesConfig) { + for lane_no in 0..4 { + select_lane(lane_no as u8); + apply_delay(config.delay[lane_no]); + } +} + +unsafe fn assign_delay() -> SerdesConfig { + // Select an appropriate delay for lane 0 + select_lane(0); + + let read_align = |dly: u8| -> f32 { + apply_delay(dly); + csr::eem_transceiver::counter_reset_write(1); + + csr::eem_transceiver::counter_enable_write(1); + clock::spin_us(2000); + csr::eem_transceiver::counter_enable_write(0); + + let (high, low) = ( + csr::eem_transceiver::counter_high_count_read(), + csr::eem_transceiver::counter_low_count_read(), + ); + if csr::eem_transceiver::counter_overflow_read() == 1 { + panic!("Unexpected phase detector counter overflow"); + } + + low as f32 / (low + high) as f32 + }; + + let mut best_dly = None; + + loop { + let mut prev = None; + for curr_dly in 0..32 { + let curr_low_rate = read_align(curr_dly); + + if let Some(prev_low_rate) = prev { + // This is potentially a crossover position + if prev_low_rate <= curr_low_rate && curr_low_rate >= 0.5 { + let prev_dev = 0.5 - prev_low_rate; + let curr_dev = curr_low_rate - 0.5; + let selected_idx = if prev_dev < curr_dev { + curr_dly - 1 + } else { + curr_dly + }; + + // The setup setup/hold calibration timing (even with + // tolerance) might be invalid in other lanes due to skew. + // 5 taps is very conservative, generally it is 1 or 2 + if selected_idx < 5 { + prev = None; + continue; + } else { + best_dly = Some(selected_idx); + break; + } + } + } + + // Only rising slope from <= 0.5 can result in a rising low rate + // crossover at 50%. + if curr_low_rate <= 0.5 { + prev = Some(curr_low_rate); + } + } + + if best_dly.is_none() { + error!("setup/hold timing calibration failed, retry in 1s..."); + clock::spin_us(1_000_000); + } else { + break; + } + } + + let best_dly = best_dly.unwrap(); + + apply_delay(best_dly); + let mut delay_list = [best_dly; 4]; + + // Assign delay for other lanes + for lane_no in 1..=3 { + select_lane(lane_no as u8); + + let mut min_deviation = 0.5; + let mut min_idx = 0; + for dly_delta in -3..=3 { + let index = (best_dly as isize + dly_delta) as u8; + let low_rate = read_align(index); + // abs() from f32 is not available in core library + let deviation = if low_rate < 0.5 { + 0.5 - low_rate + } else { + low_rate - 0.5 + }; + + if deviation < min_deviation { + min_deviation = deviation; + min_idx = index; + } + } + + apply_delay(min_idx); + delay_list[lane_no] = min_idx; + } + + debug!("setup/hold timing calibration: {:?}", delay_list); + + SerdesConfig { + delay: delay_list, + } +} + +unsafe fn align_comma() { + loop { + for slip in 1..=10 { + // The soft transceiver has 2 8b10b decoders, which receives lane + // 0/1 and lane 2/3 respectively. The decoder are time-multiplexed + // to decode exactly 1 lane each sysclk cycle. + // + // The decoder decodes lane 0/2 data on odd sysclk cycles, buffer + // on even cycles, and vice versa for lane 1/3. Data/Clock latency + // could change timing. The extend bit flips the decoding timing, + // so lane 0/2 data are decoded on even cycles, and lane 1/3 data + // are decoded on odd cycles. + // + // This is needed because transmitting/receiving a 8b10b character + // takes 2 sysclk cycles. Adjusting bitslip only via ISERDES + // limits the range to 1 cycle. The wordslip bit extends the range + // to 2 sysclk cycles. + csr::eem_transceiver::wordslip_write((slip > 5) as u8); + + // Apply a double bitslip since the ISERDES is 2x oversampled. + // Bitslip is used for comma alignment purposes once setup/hold + // timing is met. + csr::eem_transceiver::bitslip_write(1); + csr::eem_transceiver::bitslip_write(1); + clock::spin_us(1); + + csr::eem_transceiver::comma_align_reset_write(1); + clock::spin_us(100); + + if csr::eem_transceiver::comma_read() == 1 { + debug!("comma alignment completed after {} bitslips", slip); + return; + } + } + + error!("comma alignment failed, retrying in 1s..."); + clock::spin_us(1_000_000); + } +} + +pub fn init() { + for trx_no in 0..csr::CONFIG_EEM_TRANSCEIVERS { + unsafe { + csr::eem_transceiver::transceiver_sel_write(trx_no as u8); + } + + let key = format!("eem_drtio_delay{}", trx_no); + config::read(&key, |r| { + match r { + Ok(record) => { + info!("loading calibrated timing values from flash"); + unsafe { + apply_config(&*(record.as_ptr() as *const SerdesConfig)); + } + }, + + Err(_) => { + info!("calibrating..."); + let config = unsafe { assign_delay() }; + config::write(&key, config.as_bytes()).unwrap(); + } + } + }); + + unsafe { + align_comma(); + csr::eem_transceiver::rx_ready_write(1); + } + } +} diff --git a/artiq/firmware/libboard_artiq/lib.rs b/artiq/firmware/libboard_artiq/lib.rs index 209da3048..9579acaaa 100644 --- a/artiq/firmware/libboard_artiq/lib.rs +++ b/artiq/firmware/libboard_artiq/lib.rs @@ -12,6 +12,8 @@ extern crate log; extern crate io; extern crate board_misoc; extern crate proto_artiq; +#[cfg(feature = "alloc")] +extern crate alloc; pub mod spi; @@ -29,3 +31,6 @@ pub mod grabber; #[cfg(has_drtio)] pub mod drtioaux; pub mod drtio_routing; + +#[cfg(all(has_drtio_eem, feature = "alloc"))] +pub mod drtio_eem; diff --git a/artiq/firmware/runtime/Cargo.toml b/artiq/firmware/runtime/Cargo.toml index 86db5ed08..16477707b 100644 --- a/artiq/firmware/runtime/Cargo.toml +++ b/artiq/firmware/runtime/Cargo.toml @@ -26,7 +26,7 @@ io = { path = "../libio", features = ["byteorder"] } alloc_list = { path = "../liballoc_list" } board_misoc = { path = "../libboard_misoc", features = ["uart_console", "smoltcp"] } logger_artiq = { path = "../liblogger_artiq" } -board_artiq = { path = "../libboard_artiq" } +board_artiq = { path = "../libboard_artiq", features = ["alloc"] } proto_artiq = { path = "../libproto_artiq", features = ["log", "alloc"] } riscv = { version = "0.6.0", features = ["inline-asm"] } diff --git a/artiq/firmware/runtime/main.rs b/artiq/firmware/runtime/main.rs index 4809efa46..94cfd23c5 100644 --- a/artiq/firmware/runtime/main.rs +++ b/artiq/firmware/runtime/main.rs @@ -40,6 +40,8 @@ use board_artiq::drtioaux; use board_artiq::drtio_routing; use board_artiq::{mailbox, rpc_queue}; use proto_artiq::{mgmt_proto, moninj_proto, rpc_proto, session_proto, kernel_proto}; +#[cfg(has_drtio_eem)] +use board_artiq::drtio_eem; #[cfg(has_rtio_analyzer)] use proto_artiq::analyzer_proto; @@ -126,6 +128,9 @@ fn startup() { } rtio_clocking::init(); + #[cfg(has_drtio_eem)] + drtio_eem::init(); + let mut net_device = unsafe { ethmac::EthernetDevice::new() }; net_device.reset_phy_if_any(); diff --git a/artiq/firmware/satman/Cargo.toml b/artiq/firmware/satman/Cargo.toml index 0b219c15f..20dec311f 100644 --- a/artiq/firmware/satman/Cargo.toml +++ b/artiq/firmware/satman/Cargo.toml @@ -15,7 +15,7 @@ build_misoc = { path = "../libbuild_misoc" } [dependencies] log = { version = "0.4", default-features = false } board_misoc = { path = "../libboard_misoc", features = ["uart_console", "log"] } -board_artiq = { path = "../libboard_artiq" } +board_artiq = { path = "../libboard_artiq", features = ["alloc"] } alloc_list = { path = "../liballoc_list" } riscv = { version = "0.6.0", features = ["inline-asm"] } proto_artiq = { path = "../libproto_artiq", features = ["log", "alloc"] } diff --git a/artiq/firmware/satman/main.rs b/artiq/firmware/satman/main.rs index cd101d28c..553b2b3fd 100644 --- a/artiq/firmware/satman/main.rs +++ b/artiq/firmware/satman/main.rs @@ -17,6 +17,7 @@ use board_artiq::si5324; use board_artiq::{spi, drtioaux}; use board_artiq::drtio_routing; use proto_artiq::drtioaux_proto::ANALYZER_MAX_SIZE; +use board_artiq::drtio_eem; use riscv::register::{mcause, mepc, mtval}; use dma::Manager as DmaManager; use analyzer::Analyzer; @@ -541,13 +542,16 @@ pub extern fn main() -> i32 { io_expander.service().unwrap(); } - #[cfg(not(soc_platform = "efc"))] + #[cfg(not(has_drtio_eem))] unsafe { csr::drtio_transceiver::txenable_write(0xffffffffu32 as _); } init_rtio_crg(); + #[cfg(has_drtio_eem)] + drtio_eem::init(); + #[cfg(has_drtio_routing)] let mut repeaters = [repeater::Repeater::default(); csr::DRTIOREP.len()]; #[cfg(not(has_drtio_routing))] diff --git a/artiq/gateware/drtio/transceiver/eem_serdes.py b/artiq/gateware/drtio/transceiver/eem_serdes.py new file mode 100644 index 000000000..60fadc065 --- /dev/null +++ b/artiq/gateware/drtio/transceiver/eem_serdes.py @@ -0,0 +1,481 @@ +from migen import * +from misoc.interconnect.csr import * +from misoc.cores.code_8b10b import SingleEncoder, Decoder +from artiq.gateware.drtio.core import TransceiverInterface, ChannelInterface + + +class RXSerdes(Module): + def __init__(self, i_pads): + self.rxdata = [ Signal(10) for _ in range(4) ] + self.ld = [ Signal() for _ in range(4) ] + self.cnt_in = [ Signal(5) for _ in range(4) ] + self.cnt_out = [ Signal(5) for _ in range(4) ] + self.bitslip = [ Signal() for _ in range(4) ] + + ser_in_no_dly = [ Signal() for _ in range(4) ] + ser_in = [ Signal() for _ in range(4) ] + shifts = [ Signal(2) for _ in range(4) ] + + for i in range(4): + self.specials += [ + # Master deserializer + Instance("ISERDESE2", + p_DATA_RATE="DDR", + p_DATA_WIDTH=10, + p_INTERFACE_TYPE="NETWORKING", + p_NUM_CE=1, + p_SERDES_MODE="MASTER", + p_IOBDELAY="IFD", + o_Q1=self.rxdata[i][9], + o_Q2=self.rxdata[i][8], + o_Q3=self.rxdata[i][7], + o_Q4=self.rxdata[i][6], + o_Q5=self.rxdata[i][5], + o_Q6=self.rxdata[i][4], + o_Q7=self.rxdata[i][3], + o_Q8=self.rxdata[i][2], + o_SHIFTOUT1=shifts[i][0], + o_SHIFTOUT2=shifts[i][1], + i_DDLY=ser_in[i], + i_BITSLIP=self.bitslip[i], + i_CLK=ClockSignal("sys5x"), + i_CLKB=~ClockSignal("sys5x"), + i_CE1=1, + i_RST=ResetSignal(), + i_CLKDIV=ClockSignal()), + + # Slave deserializer + Instance("ISERDESE2", + p_DATA_RATE="DDR", + p_DATA_WIDTH=10, + p_INTERFACE_TYPE="NETWORKING", + p_NUM_CE=1, + p_SERDES_MODE="SLAVE", + p_IOBDELAY="IFD", + o_Q3=self.rxdata[i][1], + o_Q4=self.rxdata[i][0], + i_BITSLIP=self.bitslip[i], + i_CLK=ClockSignal("sys5x"), + i_CLKB=~ClockSignal("sys5x"), + i_CE1=1, + i_RST=ResetSignal(), + i_CLKDIV=ClockSignal(), + i_SHIFTIN1=shifts[i][0], + i_SHIFTIN2=shifts[i][1]), + + # Tunable delay + # IDELAYCTRL is with the clocking + Instance("IDELAYE2", + p_DELAY_SRC="IDATAIN", + p_SIGNAL_PATTERN="DATA", + p_CINVCTRL_SEL="FALSE", + p_HIGH_PERFORMANCE_MODE="TRUE", + # REFCLK refers to the clock source of IDELAYCTRL + p_REFCLK_FREQUENCY=200.0, + p_PIPE_SEL="FALSE", + p_IDELAY_TYPE="VAR_LOAD", + p_IDELAY_VALUE=0, + + i_C=ClockSignal(), + i_LD=self.ld[i], + i_CE=0, + i_LDPIPEEN=0, + i_INC=1, # Always increment + + # Set the optimal delay tap via the aligner + i_CNTVALUEIN=self.cnt_in[i], + # Allow the aligner to check the tap value + o_CNTVALUEOUT=self.cnt_out[i], + + i_IDATAIN=ser_in_no_dly[i], + o_DATAOUT=ser_in[i] + ), + + # IOB + Instance("IBUFDS", + p_DIFF_TERM="TRUE", + i_I=i_pads.p[i], + i_IB=i_pads.n[i], + o_O=ser_in_no_dly[i], + ) + ] + + +class TXSerdes(Module): + def __init__(self, o_pads): + self.txdata = [ Signal(5) for _ in range(4) ] + ser_out = [ Signal() for _ in range(4) ] + t_out = [ Signal() for _ in range(4) ] + + for i in range(4): + self.specials += [ + # Serializer + Instance("OSERDESE2", + p_DATA_RATE_OQ="SDR", p_DATA_RATE_TQ="BUF", + p_DATA_WIDTH=5, p_TRISTATE_WIDTH=1, + p_INIT_OQ=0b00000, + o_OQ=ser_out[i], + o_TQ=t_out[i], + i_RST=ResetSignal(), + i_CLK=ClockSignal("sys5x"), + i_CLKDIV=ClockSignal(), + i_D1=self.txdata[i][0], + i_D2=self.txdata[i][1], + i_D3=self.txdata[i][2], + i_D4=self.txdata[i][3], + i_D5=self.txdata[i][4], + i_TCE=1, i_OCE=1, + i_T1=0 + ), + + # IOB + Instance("OBUFTDS", + i_I=ser_out[i], + o_O=o_pads.p[i], + o_OB=o_pads.n[i], + i_T=t_out[i], + ) + ] + + +# This module owns 2 8b10b encoders, each encoder route codewords to 2 lanes, +# through time multiplexing. The scheduler releases 2 bytes every clock cycle, +# and the encoders each encode 1 byte. +# +# Since each lane only transmits 5 bits per sysclk cycle, the encoder selects +# a lane to first transmit the least significant word (LSW, 5 bits), and send +# the rest in the next cycle using the same lane. It takes advantage of the +# arrival sequence of bytes from the scrambler to achieve the transmission +# pattern shown in the MultiDecoder module. +class MultiEncoder(Module): + def __init__(self): + # Keep the link layer interface identical to standard encoders + self.d = [ Signal(8) for _ in range(2) ] + self.k = [ Signal() for _ in range(2) ] + + # Output interface + self.output = [ [ Signal(5) for _ in range(2) ] for _ in range(2) ] + + # Clock enable signal + # Alternate between sending encoded character to EEM 0/2 and EEM 1/3 + # every cycle + self.clk_div2 = Signal() + + # Intermediate registers for output and disparity + # More significant bits are buffered due to channel geometry + # Disparity bit is delayed. The same encoder is shared by 2 SERDES + output_bufs = [ Signal(5) for _ in range(2) ] + disp_bufs = [ Signal() for _ in range(2) ] + + encoders = [ SingleEncoder() for _ in range(2) ] + self.submodules += encoders + + # Encoded characters are routed to the EEM pairs: + # The first character goes through EEM 0/2 + # The second character goes through EEM 1/3, and repeat... + # Lower order bits go first, so higher order bits are buffered and + # transmitted in the next cycle. + for d, k, output, output_buf, disp_buf, encoder in \ + zip(self.d, self.k, self.output, output_bufs, disp_bufs, encoders): + self.comb += [ + encoder.d.eq(d), + encoder.k.eq(k), + + If(self.clk_div2, + output[0].eq(encoder.output[0:5]), + output[1].eq(output_buf), + ).Else( + output[0].eq(output_buf), + output[1].eq(encoder.output[0:5]), + ), + ] + # Handle intermediate registers + self.sync += [ + disp_buf.eq(encoder.disp_out), + encoder.disp_in.eq(disp_buf), + output_buf.eq(encoder.output[5:10]), + ] + + +# Owns 2 8b10b decoders, each decodes data from lane 0/1 and lane 2/3class +# respectively. The decoders are time multiplexed among the 2 lanes, and +# each decoder decodes exactly 1 lane per sysclk cycle. +# +# The transmitter could send the following data pattern over the 4 lanes. +# Capital letters denote the most significant word (MSW); The lowercase denote +# the least significant word (LSW) of the same 8b10b character. +# +# Cycle \ Lane 0 1 2 3 +# 0 a Y b Z +# 1 A c B d +# 2 a' C b' D +# 3 A' c' B' d' +# +# Lane 0/2 and lane 1/3 transmit word of different significance by design (see +# MultiEncoder). +# +# This module buffers the LSW, and immediately send the whole 8b10b character +# to the coresponding decoder once the MSW is also received. +class MultiDecoder(Module): + def __init__(self): + self.raw_input = [ Signal(5) for _ in range(2) ] + self.d = Signal(8) + self.k = Signal() + + # Clock enable signal + # Alternate between decoding encoded character from EEM 0/2 and + # EEM 1/3 every cycle + self.clk_div2 = Signal() + + # Extended bitslip mechanism. ISERDESE2 bitslip can only adjust bit + # position by 5 bits (1 cycle). However, an encoded character takes 2 + # cycles to transmit/receive. The module needs to correctly reassemble + # the 8b10b character. This is useful received waveform is the 1-cycle + # delayed version of the above waveform. The same scheme would + # incorrectly buffer words and create wrong symbols. + # + # Hence, wordslip put LSW as MSW and vice versa, effectively injects + # an additional 5 bit positions worth of bitslips. + self.wordslip = Signal() + + # Intermediate register for input + buffer = Signal(5) + + self.submodules.decoder = Decoder() + + # The decoder does the following actions: + # - Process received characters from EEM 0/2 + # - Same, but from EEM 1/3 + # + # Wordslipping is equivalent to swapping task between clock cycles. + # (i.e. Swap processing target. Instead of processing EEM 0/2, process + # EEM 1/3, and vice versa on the next cycle.) This effectively shifts + # the processing time of any encoded character by 1 clock cycle (5 + # bitslip equivalent without considering oversampling, 10 otherwise). + self.sync += [ + If(self.clk_div2 ^ self.wordslip, + buffer.eq(self.raw_input[1]) + ).Else( + buffer.eq(self.raw_input[0]) + ) + ] + + self.comb += [ + If(self.clk_div2 ^ self.wordslip, + self.decoder.input.eq(Cat(buffer, self.raw_input[0])) + ).Else( + self.decoder.input.eq(Cat(buffer, self.raw_input[1])) + ) + ] + + self.comb += [ + self.d.eq(self.decoder.d), + self.k.eq(self.decoder.k), + ] + + +class BangBangPhaseDetector(Module): + def __init__(self): + self.s = Signal(3) + + self.high = Signal() + self.low = Signal() + + self.comb += If(~self.s[0] & self.s[2], + self.high.eq(self.s[1]), + self.low.eq(~self.s[1]), + ).Else( + self.high.eq(0), + self.low.eq(0), + ) + + +class PhaseErrorCounter(Module, AutoCSR): + def __init__(self): + self.high_count = CSRStatus(18) + self.low_count = CSRStatus(18) + + # Odd indices are always oversampled bits + self.rxdata = Signal(10) + + # Measure setup/hold timing, count phase error in the following + self.submodules.detector = BangBangPhaseDetector() + self.comb += self.detector.s.eq(self.rxdata[:3]) + + self.reset = CSR() + self.enable = CSRStorage() + + self.overflow = CSRStatus() + high_carry = Signal() + low_carry = Signal() + + self.sync += [ + If(self.reset.re, + self.high_count.status.eq(0), + self.low_count.status.eq(0), + high_carry.eq(0), + low_carry.eq(0), + self.overflow.status.eq(0), + ).Elif(self.enable.storage, + Cat(self.high_count.status, high_carry).eq( + self.high_count.status + self.detector.high), + Cat(self.low_count.status, low_carry).eq( + self.low_count.status + self.detector.low), + If(high_carry | low_carry, self.overflow.status.eq(1)), + ) + ] + + +class SerdesSingle(Module): + def __init__(self, i_pads, o_pads): + # Serdes modules + self.submodules.rx_serdes = RXSerdes(i_pads) + self.submodules.tx_serdes = TXSerdes(o_pads) + + self.lane_sel = Signal(2) + + self.bitslip = Signal() + + for i in range(4): + self.comb += self.rx_serdes.bitslip[i].eq(self.bitslip) + + self.dly_cnt_in = Signal(5) + self.dly_ld = Signal() + + for i in range(4): + self.comb += [ + self.rx_serdes.cnt_in[i].eq(self.dly_cnt_in), + self.rx_serdes.ld[i].eq((self.lane_sel == i) & self.dly_ld), + ] + + self.dly_cnt_out = Signal(5) + + self.comb += Case(self.lane_sel, { + idx: self.dly_cnt_out.eq(self.rx_serdes.cnt_out[idx]) for idx in range(4) + }) + + self.wordslip = Signal() + + # Encoder/Decoder interfaces + self.submodules.encoder = MultiEncoder() + self.submodules.decoders = decoders = Array(MultiDecoder() for _ in range(2)) + + self.comb += [ + decoders[0].wordslip.eq(self.wordslip), + decoders[1].wordslip.eq(self.wordslip), + ] + + # Route encoded symbols to TXSerdes, decoded symbols from RXSerdes + for i in range(4): + self.comb += [ + self.tx_serdes.txdata[i].eq(self.encoder.output[i//2][i%2]), + decoders[i//2].raw_input[i%2].eq(self.rx_serdes.rxdata[i][0::2]), + ] + + self.clk_div2 = Signal() + self.comb += [ + self.encoder.clk_div2.eq(self.clk_div2), + self.decoders[0].clk_div2.eq(self.clk_div2), + self.decoders[1].clk_div2.eq(self.clk_div2), + ] + + # Monitor lane 0 decoder output for bitslip alignment + self.comma_align_reset = Signal() + self.comma = Signal() + + self.sync += If(self.comma_align_reset, + self.comma.eq(0), + ).Elif(~self.comma, + self.comma.eq( + ((decoders[0].d == 0x3C) | (decoders[0].d == 0xBC)) + & decoders[0].k)) + + +class EEMSerdes(Module, TransceiverInterface, AutoCSR): + def __init__(self, platform, data_pads): + self.rx_ready = CSRStorage() + + self.transceiver_sel = CSRStorage(max(1, log2_int(len(data_pads)))) + self.lane_sel = CSRStorage(2) + + self.bitslip = CSR() + + self.dly_cnt_in = CSRStorage(5) + self.dly_ld = CSR() + self.dly_cnt_out = CSRStatus(5) + + # Slide a word back/forward by 1 cycle, shared by all lanes of the + # same transceiver. This is to determine if this cycle should decode + # lane 0/2 or lane 1/3. See MultiEncoder/MultiDecoder for the full + # scheme & timing. + self.wordslip = CSRStorage() + + # Monitor lane 0 decoder output for bitslip alignment + self.comma_align_reset = CSR() + self.comma = CSRStatus() + + clk_div2 = Signal() + self.sync += clk_div2.eq(~clk_div2) + + channel_interfaces = [] + serdes_list = [] + for i_pads, o_pads in data_pads: + serdes = SerdesSingle(i_pads, o_pads) + self.comb += serdes.clk_div2.eq(clk_div2) + serdes_list.append(serdes) + + chan_if = ChannelInterface(serdes.encoder, serdes.decoders) + self.comb += chan_if.rx_ready.eq(self.rx_ready.storage) + channel_interfaces.append(chan_if) + + # Route CSR signals using transceiver_sel + self.comb += Case(self.transceiver_sel.storage, { + trx_no: [ + serdes.bitslip.eq(self.bitslip.re), + serdes.dly_ld.eq(self.dly_ld.re), + + self.dly_cnt_out.status.eq(serdes.dly_cnt_out), + self.comma.status.eq(serdes.comma), + ] for trx_no, serdes in enumerate(serdes_list) + }) + + # Wordslip needs to be latched. It needs to hold when calibrating + # other transceivers and/or after calibration. + self.sync += If(self.wordslip.re, + Case(self.transceiver_sel.storage, { + trx_no: [ + serdes.wordslip.eq(self.wordslip.storage) + ] for trx_no, serdes in enumerate(serdes_list) + }) + ) + + for serdes in serdes_list: + self.comb += [ + # Delay counter write only comes into effect after dly_ld + # So, just MUX dly_ld instead. + serdes.dly_cnt_in.eq(self.dly_cnt_in.storage), + + # Comma align reset & lane selection can be broadcasted + # without MUXing. Transceivers are aligned one-by-one + serdes.lane_sel.eq(self.lane_sel.storage), + serdes.comma_align_reset.eq(self.comma_align_reset.re), + ] + + # Setup/hold timing calibration module + self.submodules.counter = PhaseErrorCounter() + self.comb += Case(self.transceiver_sel.storage, { + trx_no: Case(self.lane_sel.storage, { + lane_idx: self.counter.rxdata.eq(serdes.rx_serdes.rxdata[lane_idx]) + for lane_idx in range(4) + }) for trx_no, serdes in enumerate(serdes_list) + }) + + self.submodules += serdes_list + + TransceiverInterface.__init__(self, channel_interfaces) + + for i in range(len(serdes_list)): + self.comb += [ + getattr(self, "cd_rtio_rx" + str(i)).clk.eq(ClockSignal()), + getattr(self, "cd_rtio_rx" + str(i)).rst.eq(ResetSignal()) + ] diff --git a/artiq/gateware/eem.py b/artiq/gateware/eem.py index bcb3db7be..8b611e803 100644 --- a/artiq/gateware/eem.py +++ b/artiq/gateware/eem.py @@ -757,3 +757,31 @@ class HVAmp(_EEM): phy = ttl_out_cls(pads.p, pads.n) target.submodules += phy target.rtio_channels.append(rtio.Channel.from_phy(phy)) + + +class EFC(_EEM): + @staticmethod + def io(eem, iostandard=default_iostandard): + # Master: Pair 0~3 data IN, 4~7 OUT + data_in = ("efc{}_drtio_rx".format(eem), 0, + Subsignal("p", Pins("{} {} {} {}".format(*[ + _eem_pin(eem, i, "p") for i in range(4) + ]))), + Subsignal("n", Pins("{} {} {} {}".format(*[ + _eem_pin(eem, i, "n") for i in range(4) + ]))), + iostandard(eem), + Misc("DIFF_TERM=TRUE"), + ) + + data_out = ("efc{}_drtio_tx".format(eem), 0, + Subsignal("p", Pins("{} {} {} {}".format(*[ + _eem_pin(eem, i, "p") for i in range(4, 8) + ]))), + Subsignal("n", Pins("{} {} {} {}".format(*[ + _eem_pin(eem, i, "n") for i in range(4, 8) + ]))), + iostandard(eem), + ) + + return [data_in, data_out]