serdes-transceiver/sync_serdes.py

764 lines
23 KiB
Python

from migen import *
from migen.genlib.misc import WaitTimer
from migen.genlib.fifo import SyncFIFO
from util import PriorityEncoderMSB
class SingleLineTX(Module):
def __init__(self):
self.txdata = Signal(5)
self.ser_out = Signal()
self.t_out = Signal()
# TX SERDES
self.specials += Instance("OSERDESE2",
p_DATA_RATE_OQ="SDR", p_DATA_RATE_TQ="BUF",
p_DATA_WIDTH=5, p_TRISTATE_WIDTH=1,
p_INIT_OQ=0b00000,
o_OQ=self.ser_out, o_TQ=self.t_out,
i_RST=ResetSignal(),
i_CLK=ClockSignal("sys5x"),
i_CLKDIV=ClockSignal(),
i_D1=self.txdata[0],
i_D2=self.txdata[1],
i_D3=self.txdata[2],
i_D4=self.txdata[3],
i_D5=self.txdata[4],
i_TCE=1, i_OCE=1,
# TODO: Hardcode t_in? Output disable is always unnecessary?
i_T1=0)
class SingleLineRX(Module):
def __init__(self):
self.rxdata = Signal(10)
self.ser_in_no_dly = Signal()
self.ld = Signal()
self.ce = Signal()
self.cnt_in = Signal(5)
self.cnt_out = Signal(5)
self.master_bitslip = Signal()
self.slave_bitslip = Signal()
ser_in = Signal()
shifts = Signal(2)
self.specials += [
# Master deserializer
Instance("ISERDESE2",
p_DATA_RATE="DDR",
p_DATA_WIDTH=10,
p_INTERFACE_TYPE="NETWORKING",
p_NUM_CE=1,
p_SERDES_MODE="MASTER",
p_IOBDELAY="IFD",
o_Q1=self.rxdata[9],
o_Q2=self.rxdata[8],
o_Q3=self.rxdata[7],
o_Q4=self.rxdata[6],
o_Q5=self.rxdata[5],
o_Q6=self.rxdata[4],
o_Q7=self.rxdata[3],
o_Q8=self.rxdata[2],
o_SHIFTOUT1=shifts[0],
o_SHIFTOUT2=shifts[1],
i_DDLY=ser_in,
i_BITSLIP=self.master_bitslip,
i_CLK=ClockSignal("rx_sys5x"),
i_CLKB=~ClockSignal("rx_sys5x"),
i_CE1=1,
i_RST=ResetSignal("rx_sys"),
i_CLKDIV=ClockSignal("rx_sys")),
# Slave deserializer
Instance("ISERDESE2",
p_DATA_RATE="DDR",
p_DATA_WIDTH=10,
p_INTERFACE_TYPE="NETWORKING",
p_NUM_CE=1,
p_SERDES_MODE="SLAVE",
p_IOBDELAY="IFD",
o_Q3=self.rxdata[1],
o_Q4=self.rxdata[0],
# i_DDLY=ser_in,
i_BITSLIP=self.slave_bitslip,
i_CLK=ClockSignal("rx_sys5x"),
i_CLKB=~ClockSignal("rx_sys5x"),
i_CE1=1,
i_RST=ResetSignal("rx_sys"),
i_CLKDIV=ClockSignal("rx_sys"),
i_SHIFTIN1=shifts[0],
i_SHIFTIN2=shifts[1]),
# Tunable delay
Instance("IDELAYE2",
p_DELAY_SRC="IDATAIN",
p_SIGNAL_PATTERN="DATA",
p_CINVCTRL_SEL="FALSE",
p_HIGH_PERFORMANCE_MODE="TRUE",
# REFCLK refers to the clock source of IDELAYCTRL
p_REFCLK_FREQUENCY=200.0,
p_PIPE_SEL="FALSE",
p_IDELAY_TYPE="VAR_LOAD",
p_IDELAY_VALUE=0,
i_C=ClockSignal("rx_sys"),
i_LD=self.ld,
i_CE=self.ce,
i_LDPIPEEN=0,
i_INC=1, # Always increment
# Set the optimal delay tap via the aligner
i_CNTVALUEIN=self.cnt_in,
# Allow the aligner to check the tap value
o_CNTVALUEOUT=self.cnt_out,
i_IDATAIN=self.ser_in_no_dly,
o_DATAOUT=ser_in
),
# IDELAYCTRL is with the clocking
]
class BitSlipReader(Module):
def __init__(self):
# IN
self.loopback_rxdata = Signal(10)
self.start = Signal()
# Wait for stabilization after bitslip
self.submodules.stab_timer = WaitTimer(511)
# OUT
self.done = Signal()
self.bitslip = Signal()
self.data_result = Array(Signal(10) for _ in range(5))
self.slip_count = Signal(3)
fsm = FSM(reset_state="WAIT_START")
self.submodules += fsm
fsm.act("WAIT_START",
If(self.start,
NextState("WAIT_TIMER"),
).Else(
NextState("WAIT_START"),
)
)
fsm.act("WAIT_TIMER",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SAMPLE"),
)
)
fsm.act("SAMPLE",
# Wait is reset now
# Explicit assignment is unnecessary, as combinatorial statement
# falls back to he default value when not driven
# Keep result alive until reset
NextValue(self.data_result[self.slip_count], self.loopback_rxdata),
NextValue(self.slip_count, self.slip_count + 1),
NextState("HIGH_BITSLIP_FIRST"),
)
# Pulsing BITSLIP alternate between 1 right shift and 3 left shifts
# We are trying to figure out which 2-bits are the slave copying from
# Hence, we only want shifts by 2. Pulsing twice does exactly that.
fsm.act("HIGH_BITSLIP_FIRST",
self.bitslip.eq(1),
NextState("LOW_BITSLIP"),
)
fsm.act("LOW_BITSLIP",
# bitslip signal is auto-reset
NextState("HIGH_BITSLIP_SECOND"),
)
fsm.act("HIGH_BITSLIP_SECOND",
self.bitslip.eq(1),
If(self.slip_count == 5,
NextState("TERMINATE"),
).Else(
NextState("WAIT_TIMER"),
)
)
fsm.act("TERMINATE",
self.done.eq(1),
NextState("TERMINATE"),
)
class SlaveAligner(Module):
def __init__(self):
# IN
self.loopback_rxdata = Signal(10)
self.start = Signal()
# Wait for stabilization after bitslip
self.submodules.stab_timer = WaitTimer(511)
# OUT
self.done = Signal()
self.master_bitslip = Signal()
self.slave_bitslip = Signal()
slip_count = Signal(3)
check_odd = Signal()
check_even = Signal()
fsm = FSM(reset_state="WAIT_START")
self.submodules += fsm
fsm.act("WAIT_START",
If(self.start,
NextState("WAIT_TIMER"),
)
)
fsm.act("WAIT_TIMER",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SAMPLE"),
)
)
fsm.act("SAMPLE",
# Wait is reset now
# Explicit assignment is unnecessary, as combinatorial statement
# falls back to he default value when not driven
# Detect the last 2 bits
# If signal is received, detune the master bitslip if necessary
If(self.loopback_rxdata[0] | self.loopback_rxdata[1],
NextValue(check_odd, self.loopback_rxdata[1]),
NextValue(check_even, self.loopback_rxdata[0]),
NextState("CHECK_MASTER_BITSLIP"),
).Else(
NextValue(slip_count, slip_count + 1),
NextState("HIGH_BITSLIP_FIRST"),
)
)
# Pulsing BITSLIP alternate between 1 right shift and 3 left shifts
# We are trying to figure out which 2-bits are the slave copying from
# Hence, we only want shifts by 2. Pulsing twice does exactly that.
fsm.act("HIGH_BITSLIP_FIRST",
self.master_bitslip.eq(1),
self.slave_bitslip.eq(1),
NextState("LOW_BITSLIP"),
)
fsm.act("LOW_BITSLIP",
# bitslip signal is auto-reset
NextState("HIGH_BITSLIP_SECOND"),
)
fsm.act("HIGH_BITSLIP_SECOND",
self.master_bitslip.eq(1),
self.slave_bitslip.eq(1),
If(slip_count == 5,
NextState("SHIFT_WAIT_TIMER"),
).Else(
NextState("WAIT_TIMER"),
)
)
odd_master_rxdata = self.loopback_rxdata[3::2]
even_master_rxdata = self.loopback_rxdata[2::2]
# Alternatively, we align the master with the slave
fsm.act("CHECK_MASTER_BITSLIP",
# At any point if the odd and/or even bits from the master reads 0
# It implies the detuning is completed
NextState("SHIFT_WAIT_TIMER"),
If(check_odd & (odd_master_rxdata != 0),
NextState("MASTER_HIGH_BITSLIP_FIRST"),
),
If(check_even & (even_master_rxdata != 0),
NextState("MASTER_HIGH_BITSLIP_FIRST"),
),
)
fsm.act("MASTER_HIGH_BITSLIP_FIRST",
self.master_bitslip.eq(1),
NextState("MASTER_LOW_BITSLIP"),
)
fsm.act("MASTER_LOW_BITSLIP",
# bitslip signal is auto-reset
NextState("MASTER_HIGH_BITSLIP_SECOND"),
)
fsm.act("MASTER_HIGH_BITSLIP_SECOND",
self.master_bitslip.eq(1),
NextState("MASTER_WAIT_TIMER"),
)
fsm.act("MASTER_WAIT_TIMER",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("CHECK_MASTER_BITSLIP"),
)
)
# After eliminating the potentially duplicating pattern,
# Shift the entire output pattern for delay tap optimization
# Ideally, the optimized first edge would be the middle pair
# So, shift it until bit 3/4 is set but bit 5 is not set
fsm.act("SHIFT_WAIT_TIMER",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SHIFT_SAMPLE_PATTERN"),
)
)
fsm.act("SHIFT_SAMPLE_PATTERN",
If((self.loopback_rxdata[3:5] != 0) & ~self.loopback_rxdata[5],
NextState("TERMINATE"),
).Else(
NextState("SHIFT_HIGH_BITSLIP_FIRST"),
)
)
fsm.act("SHIFT_HIGH_BITSLIP_FIRST",
self.master_bitslip.eq(1),
self.slave_bitslip.eq(1),
NextState("SHIFT_LOW_BITSLIP"),
)
fsm.act("SHIFT_LOW_BITSLIP",
# bitslip signal is auto-reset
NextState("SHIFT_HIGH_BITSLIP_SECOND"),
)
fsm.act("SHIFT_HIGH_BITSLIP_SECOND",
self.master_bitslip.eq(1),
self.slave_bitslip.eq(1),
NextState("SHIFT_WAIT_TIMER")
)
fsm.act("TERMINATE",
self.done.eq(1),
NextState("TERMINATE"),
)
class PhaseReader(Module):
def __init__(self):
# Drive IDELAYE2 CE pin to increment delay
# The signal should only last for 1 cycle
self.inc_en = Signal()
self.loopback_rxdata = Signal(10)
self.delay_tap = Signal(5)
# Pull up to start the phase reader
self.start = Signal()
self.data_result = Array(Signal(10) for _ in range(32))
self.done = Signal()
# Wait for stabilization after increment
self.submodules.stab_timer = WaitTimer(511)
fsm = FSM(reset_state="WAIT_START")
self.submodules += fsm
fsm.act("WAIT_START",
If(self.start,
NextState("WAIT_TIMER"),
).Else(
NextState("WAIT_START"),
)
)
fsm.act("WAIT_TIMER",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SAMPLE"),
)
)
fsm.act("SAMPLE",
# Wait is reset now
# Explicit assignment is unnecessary, as combinatorial statement
# falls back to he default value when not driven
# Keep result alive until reset
NextValue(self.data_result[self.delay_tap], self.loopback_rxdata),
NextState("HIGH_CE"),
)
fsm.act("HIGH_CE",
self.inc_en.eq(1),
NextState("LOW_CE"),
)
fsm.act("LOW_CE",
# TAP OUT is available 1 cycle after the pulse
# Explicit signal reset is unnecessary, as signal assigned by
# combinatorial logic in FSM is after leaving the setting block
NextState("READ_TAP"),
)
fsm.act("READ_TAP",
If(self.delay_tap != 0,
NextState("WAIT_TIMER"),
).Else(
NextState("PROBE_FIN"),
)
)
fsm.act("PROBE_FIN",
self.done.eq(1),
NextState("PROBE_FIN"),
)
class DelayOptimizer(Module):
def __init__(self):
# IN
# Signals from the channel
self.loopback_rxdata = Signal(10)
self.delay_tap = Signal(5)
# IN
# Signal to start the calculation
self.start = Signal()
# OUT
# Signal for controlling the channel delay tap
self.ld = Signal()
self.inc_en = Signal()
# OUT
# The optimal delay
self.opt_delay_tap = Signal(5)
# OUT
# Keep even/odd indices, decimate the other
self.select_odd = Signal()
# OUT
# Optimal delay is calculated
self.done = Signal()
# Priority encoder for finding the pulse location
self.submodules.pulse_encoder = PriorityEncoderMSB(10)
# Wait for stabilization after increment
self.submodules.stab_timer = WaitTimer(511)
# Intermediate signals
self.expected_pulse = Signal(max=9)
self.min_delay = Signal(5)
self.max_offset = Signal(5)
# Translate rxdata into array to allow indexing
self.rxdata_array = Array(Signal() for _ in range(10))
self.comb += [ self.rxdata_array[i].eq(self.loopback_rxdata[i]) for i in range(10) ]
fsm = FSM(reset_state="WAIT_START")
self.submodules += fsm
fsm.act("WAIT_START",
If(self.start,
NextState("WAIT_ZERO"),
).Else(
NextState("WAIT_START"),
)
)
fsm.act("WAIT_ZERO",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SAMPLE_ZERO"),
)
)
fsm.act("SAMPLE_ZERO",
# Oversampling should guarantee the detection
# However, priority encoder itself does not wraparound
# So, we need to avoid passing wrapped around pulse signal into
# the priority encoder.
If(self.loopback_rxdata[0] & self.loopback_rxdata[-1],
NextValue(self.expected_pulse, 1),
).Else(
self.pulse_encoder.i.eq(self.loopback_rxdata),
If(self.pulse_encoder.o == 9,
NextValue(self.expected_pulse, 0),
).Else(
NextValue(self.expected_pulse, self.pulse_encoder.o + 1),
)
),
# Goto the next delay tap and wait for the pulse.
NextState("INC_PULSE_DELAY_IN"),
)
fsm.act("WAIT_PULSE_IN",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SAMPLE_PULSE_IN"),
)
)
fsm.act("SAMPLE_PULSE_IN",
If(self.rxdata_array[self.expected_pulse],
NextValue(self.min_delay, self.delay_tap),
NextState("INC_PULSE_DELAY_OUT"),
).Else(
NextState("INC_PULSE_DELAY_IN"),
)
)
fsm.act("INC_PULSE_DELAY_IN",
# This signal is automatically deasserted after this state
self.inc_en.eq(1),
NextState("WAIT_PULSE_IN"),
)
fsm.act("WAIT_PULSE_OUT",
self.stab_timer.wait.eq(1),
If(self.stab_timer.done,
NextState("SAMPLE_PULSE_OUT"),
)
)
fsm.act("SAMPLE_PULSE_OUT",
If(~self.rxdata_array[self.expected_pulse],
NextValue(self.opt_delay_tap, self.min_delay + (self.max_offset >> 1)),
NextState("LOAD_OPT_DELAY"),
).Else(
NextValue(self.max_offset, self.max_offset + 1),
NextState("INC_PULSE_DELAY_OUT"),
)
)
fsm.act("INC_PULSE_DELAY_OUT",
# This signal is automatically deasserted after this state
self.inc_en.eq(1),
NextState("WAIT_PULSE_OUT"),
)
fsm.act("LOAD_OPT_DELAY",
self.ld.eq(1),
# The optimal delay tap is prepared in the SAMPLE_PULSE_OUT state
NextState("WAIT_DELAY_LOAD"),
)
fsm.act("WAIT_DELAY_LOAD",
If(self.delay_tap == self.opt_delay_tap,
NextState("TERMINATE"),
),
)
fsm.act("TERMINATE",
self.done.eq(1),
self.select_odd.eq(self.expected_pulse[0]),
NextState("TERMINATE"),
)
class SyncSingleRX(Module):
def __init__(self):
# Ports
# IN: Undelayed serial signal
self.ser_in_no_dly = Signal()
# IN: Start RX alignment signal
self.start = Signal()
# OUT: Received data after self-alignment, decimation
self.rxdata = Signal(5)
# OUT: RXDATA from this channel is self-aligned
self.align_done = Signal()
# Components
self.submodules.rx = SingleLineRX()
self.submodules.slave_aligner = SlaveAligner()
self.submodules.delay_solver = DelayOptimizer()
# Sample decimation
select_odd = Signal()
decimated_rxdata = Signal(5)
# Dataflow
self.comb += [
# Delay and oversample the original signal
self.rx.ser_in_no_dly.eq(self.ser_in_no_dly),
# Use the deserialized signals for alignment
self.slave_aligner.loopback_rxdata.eq(self.rx.rxdata),
self.delay_solver.loopback_rxdata.eq(self.rx.rxdata),
# Decimate the oversampled signals
If(select_odd,
decimated_rxdata.eq(self.rx.rxdata[1::2]),
).Else(
decimated_rxdata.eq(self.rx.rxdata[::2]),
),
# Send it to the output
self.rxdata.eq(decimated_rxdata),
]
# Control signals
self.comb += [
# Bitslip alignment
self.rx.master_bitslip.eq(self.slave_aligner.master_bitslip),
self.rx.slave_bitslip.eq(self.slave_aligner.slave_bitslip),
# Tap delay optimization
self.rx.ce.eq(self.delay_solver.inc_en),
self.rx.ld.eq(self.delay_solver.ld),
self.rx.cnt_in.eq(self.delay_solver.opt_delay_tap),
self.delay_solver.delay_tap.eq(self.rx.cnt_out),
]
self.submodules.fsm = FSM(reset_state="WAIT_SIGNAL")
self.fsm.act("WAIT_SIGNAL",
If((self.rx.rxdata != 0) & self.start,
NextState("WAIT_ALIGNER")
),
)
self.fsm.act("WAIT_ALIGNER",
self.slave_aligner.start.eq(1),
If(self.slave_aligner.done,
NextState("WAIT_DELAY_OPT"),
),
)
self.fsm.act("WAIT_DELAY_OPT",
self.delay_solver.start.eq(1),
If(self.delay_solver.done,
NextValue(select_odd, self.delay_solver.select_odd),
NextState("INTRA_ALIGN_DONE"),
),
)
self.fsm.act("INTRA_ALIGN_DONE",
self.align_done.eq(1),
NextState("INTRA_ALIGN_DONE"),
)
class MultiLineTX(Module):
def __init__(self):
# Ports
# IN: Unserialized data
self.txdata = Signal(20)
# OUT: Serialized data
self.ser_out = Signal(4)
# OUT: 3-state signal output
self.t_out = Signal(4)
for idx in range(4):
single_tx = SingleLineTX()
self.comb += [
self.ser_out[idx].eq(single_tx.ser_out),
self.t_out[idx].eq(single_tx.t_out),
single_tx.txdata.eq(self.txdata[5*idx:5*(idx+1)]),
]
self.submodules += single_tx
class MultiLineRX(Module):
def __init__(self):
# Ports
# IN: Undelayed serial signal
self.ser_in_no_dly = Signal(4)
# IN: Start alignment process of all channels
self.start = Signal()
# OUT: Received data after self-alignment, decimation
self.rxdata = Signal(20)
# OUT: RXDATA from all channels are self-aligned
self.align_done = Signal()
# OUT: Group delay compensated
self.delay_done = Signal()
# OUT: Group delay adjustment failed
self.err = Signal()
channel_align_done = Signal(4)
self.comb += self.align_done.eq(channel_align_done == 0b1111)
buffer_outflow = Signal(4)
self.comb += buffer_outflow.eq(0b1111)
for idx in range(4):
single_rx = SyncSingleRX()
self.comb += [
single_rx.ser_in_no_dly.eq(self.ser_in_no_dly[idx]),
channel_align_done[idx].eq(single_rx.align_done),
# Propagate start alignment signal to all channels
single_rx.start.eq(self.start),
]
# FIFOs for handling group delay
# Signal from each OSERDES group can have a different delay
# So, add delay to the groups that receives the pulse early
# Maximum delay = 8
channel_buffer = SyncFIFO(5, 16)
self.comb += [
# Allow data go through the FIFO unless aligning
# Pay the memory delay cost
channel_buffer.we.eq(1),
channel_buffer.re.eq(buffer_outflow[idx]),
# Data always flow from individual RX to the rxdata port
channel_buffer.din.eq(single_rx.rxdata),
self.rxdata[5*idx:5*(idx+1)].eq(channel_buffer.dout),
]
# If at any point the FIFO fills up,
# group delay can no longer be determined and compensated
self.sync += [
If(~channel_buffer.writable,
self.err.eq(1),
),
]
self.submodules += [ single_rx, channel_buffer ]
self.submodules.fsm = FSM(reset_state="WAIT_ALIGN_DONE")
self.fsm.act("WAIT_ALIGN_DONE",
If(self.align_done,
NextState("WAIT_ZERO"),
),
)
self.fsm.act("WAIT_ZERO",
If(self.rxdata == 0,
NextState("WAIT_PULSE"),
),
)
self.fsm.act("WAIT_PULSE",
# Control outflow until all channels finds the pulse
If(self.rxdata == 0b11111111111111111111,
buffer_outflow.eq(0b1111),
self.delay_done.eq(1),
NextState("GROUP_DELAY_DONE"),
).Else(
buffer_outflow[0].eq(self.rxdata[ 0: 5] == 0),
buffer_outflow[1].eq(self.rxdata[ 5:10] == 0),
buffer_outflow[2].eq(self.rxdata[10:15] == 0),
buffer_outflow[3].eq(self.rxdata[15:20] == 0),
),
)
self.fsm.act("GROUP_DELAY_DONE",
self.delay_done.eq(1),
NextState("GROUP_DELAY_DONE"),
)