from migen import * from migen.genlib.misc import WaitTimer from migen.genlib.fifo import SyncFIFO from util import PriorityEncoderMSB class SingleLineTX(Module): def __init__(self): self.txdata = Signal(5) self.ser_out = Signal() self.t_out = Signal() # TX SERDES self.specials += Instance("OSERDESE2", p_DATA_RATE_OQ="SDR", p_DATA_RATE_TQ="BUF", p_DATA_WIDTH=5, p_TRISTATE_WIDTH=1, p_INIT_OQ=0b00000, o_OQ=self.ser_out, o_TQ=self.t_out, i_RST=ResetSignal(), i_CLK=ClockSignal("sys5x"), i_CLKDIV=ClockSignal(), i_D1=self.txdata[0], i_D2=self.txdata[1], i_D3=self.txdata[2], i_D4=self.txdata[3], i_D5=self.txdata[4], i_TCE=1, i_OCE=1, # TODO: Hardcode t_in? Output disable is always unnecessary? i_T1=0) class SingleLineRX(Module): def __init__(self): self.rxdata = Signal(10) self.ser_in_no_dly = Signal() self.ld = Signal() self.ce = Signal() self.cnt_in = Signal(5) self.cnt_out = Signal(5) self.master_bitslip = Signal() self.slave_bitslip = Signal() ser_in = Signal() shifts = Signal(2) self.specials += [ # Master deserializer Instance("ISERDESE2", p_DATA_RATE="DDR", p_DATA_WIDTH=10, p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_SERDES_MODE="MASTER", p_IOBDELAY="IFD", o_Q1=self.rxdata[9], o_Q2=self.rxdata[8], o_Q3=self.rxdata[7], o_Q4=self.rxdata[6], o_Q5=self.rxdata[5], o_Q6=self.rxdata[4], o_Q7=self.rxdata[3], o_Q8=self.rxdata[2], o_SHIFTOUT1=shifts[0], o_SHIFTOUT2=shifts[1], i_DDLY=ser_in, i_BITSLIP=self.master_bitslip, i_CLK=ClockSignal("rx_sys5x"), i_CLKB=~ClockSignal("rx_sys5x"), i_CE1=1, i_RST=ResetSignal("rx_sys"), i_CLKDIV=ClockSignal("rx_sys")), # Slave deserializer Instance("ISERDESE2", p_DATA_RATE="DDR", p_DATA_WIDTH=10, p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_SERDES_MODE="SLAVE", p_IOBDELAY="IFD", o_Q3=self.rxdata[1], o_Q4=self.rxdata[0], # i_DDLY=ser_in, i_BITSLIP=self.slave_bitslip, i_CLK=ClockSignal("rx_sys5x"), i_CLKB=~ClockSignal("rx_sys5x"), i_CE1=1, i_RST=ResetSignal("rx_sys"), i_CLKDIV=ClockSignal("rx_sys"), i_SHIFTIN1=shifts[0], i_SHIFTIN2=shifts[1]), # Tunable delay Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", # REFCLK refers to the clock source of IDELAYCTRL p_REFCLK_FREQUENCY=200.0, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VAR_LOAD", p_IDELAY_VALUE=0, i_C=ClockSignal("rx_sys"), i_LD=self.ld, i_CE=self.ce, i_LDPIPEEN=0, i_INC=1, # Always increment # Set the optimal delay tap via the aligner i_CNTVALUEIN=self.cnt_in, # Allow the aligner to check the tap value o_CNTVALUEOUT=self.cnt_out, i_IDATAIN=self.ser_in_no_dly, o_DATAOUT=ser_in ), # IDELAYCTRL is with the clocking ] class BitSlipReader(Module): def __init__(self): # IN self.loopback_rxdata = Signal(10) self.start = Signal() # Wait for stabilization after bitslip self.submodules.stab_timer = WaitTimer(511) # OUT self.done = Signal() self.bitslip = Signal() self.data_result = Array(Signal(10) for _ in range(5)) self.slip_count = Signal(3) fsm = FSM(reset_state="WAIT_START") self.submodules += fsm fsm.act("WAIT_START", If(self.start, NextState("WAIT_TIMER"), ).Else( NextState("WAIT_START"), ) ) fsm.act("WAIT_TIMER", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SAMPLE"), ) ) fsm.act("SAMPLE", # Wait is reset now # Explicit assignment is unnecessary, as combinatorial statement # falls back to he default value when not driven # Keep result alive until reset NextValue(self.data_result[self.slip_count], self.loopback_rxdata), NextValue(self.slip_count, self.slip_count + 1), NextState("HIGH_BITSLIP_FIRST"), ) # Pulsing BITSLIP alternate between 1 right shift and 3 left shifts # We are trying to figure out which 2-bits are the slave copying from # Hence, we only want shifts by 2. Pulsing twice does exactly that. fsm.act("HIGH_BITSLIP_FIRST", self.bitslip.eq(1), NextState("LOW_BITSLIP"), ) fsm.act("LOW_BITSLIP", # bitslip signal is auto-reset NextState("HIGH_BITSLIP_SECOND"), ) fsm.act("HIGH_BITSLIP_SECOND", self.bitslip.eq(1), If(self.slip_count == 5, NextState("TERMINATE"), ).Else( NextState("WAIT_TIMER"), ) ) fsm.act("TERMINATE", self.done.eq(1), NextState("TERMINATE"), ) class SlaveAligner(Module): def __init__(self): # IN self.loopback_rxdata = Signal(10) self.start = Signal() # Wait for stabilization after bitslip self.submodules.stab_timer = WaitTimer(511) # OUT self.done = Signal() self.master_bitslip = Signal() self.slave_bitslip = Signal() slip_count = Signal(3) check_odd = Signal() check_even = Signal() fsm = FSM(reset_state="WAIT_START") self.submodules += fsm fsm.act("WAIT_START", If(self.start, NextState("WAIT_TIMER"), ) ) fsm.act("WAIT_TIMER", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SAMPLE"), ) ) fsm.act("SAMPLE", # Wait is reset now # Explicit assignment is unnecessary, as combinatorial statement # falls back to he default value when not driven # Detect the last 2 bits # If signal is received, detune the master bitslip if necessary If(self.loopback_rxdata[0] | self.loopback_rxdata[1], NextValue(check_odd, self.loopback_rxdata[1]), NextValue(check_even, self.loopback_rxdata[0]), NextState("CHECK_MASTER_BITSLIP"), ).Else( NextValue(slip_count, slip_count + 1), NextState("HIGH_BITSLIP_FIRST"), ) ) # Pulsing BITSLIP alternate between 1 right shift and 3 left shifts # We are trying to figure out which 2-bits are the slave copying from # Hence, we only want shifts by 2. Pulsing twice does exactly that. fsm.act("HIGH_BITSLIP_FIRST", self.master_bitslip.eq(1), self.slave_bitslip.eq(1), NextState("LOW_BITSLIP"), ) fsm.act("LOW_BITSLIP", # bitslip signal is auto-reset NextState("HIGH_BITSLIP_SECOND"), ) fsm.act("HIGH_BITSLIP_SECOND", self.master_bitslip.eq(1), self.slave_bitslip.eq(1), If(slip_count == 5, NextState("SHIFT_WAIT_TIMER"), ).Else( NextState("WAIT_TIMER"), ) ) odd_master_rxdata = self.loopback_rxdata[3::2] even_master_rxdata = self.loopback_rxdata[2::2] # Alternatively, we align the master with the slave fsm.act("CHECK_MASTER_BITSLIP", # At any point if the odd and/or even bits from the master reads 0 # It implies the detuning is completed NextState("SHIFT_WAIT_TIMER"), If(check_odd & (odd_master_rxdata != 0), NextState("MASTER_HIGH_BITSLIP_FIRST"), ), If(check_even & (even_master_rxdata != 0), NextState("MASTER_HIGH_BITSLIP_FIRST"), ), ) fsm.act("MASTER_HIGH_BITSLIP_FIRST", self.master_bitslip.eq(1), NextState("MASTER_LOW_BITSLIP"), ) fsm.act("MASTER_LOW_BITSLIP", # bitslip signal is auto-reset NextState("MASTER_HIGH_BITSLIP_SECOND"), ) fsm.act("MASTER_HIGH_BITSLIP_SECOND", self.master_bitslip.eq(1), NextState("MASTER_WAIT_TIMER"), ) fsm.act("MASTER_WAIT_TIMER", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("CHECK_MASTER_BITSLIP"), ) ) # After eliminating the potentially duplicating pattern, # Shift the entire output pattern for delay tap optimization # Ideally, the optimized first edge would be the middle pair # So, shift it until bit 3/4 is set but bit 5 is not set fsm.act("SHIFT_WAIT_TIMER", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SHIFT_SAMPLE_PATTERN"), ) ) fsm.act("SHIFT_SAMPLE_PATTERN", If((self.loopback_rxdata[3:5] != 0) & ~self.loopback_rxdata[5], NextState("TERMINATE"), ).Else( NextState("SHIFT_HIGH_BITSLIP_FIRST"), ) ) fsm.act("SHIFT_HIGH_BITSLIP_FIRST", self.master_bitslip.eq(1), self.slave_bitslip.eq(1), NextState("SHIFT_LOW_BITSLIP"), ) fsm.act("SHIFT_LOW_BITSLIP", # bitslip signal is auto-reset NextState("SHIFT_HIGH_BITSLIP_SECOND"), ) fsm.act("SHIFT_HIGH_BITSLIP_SECOND", self.master_bitslip.eq(1), self.slave_bitslip.eq(1), NextState("SHIFT_WAIT_TIMER") ) fsm.act("TERMINATE", self.done.eq(1), NextState("TERMINATE"), ) class PhaseReader(Module): def __init__(self): # Drive IDELAYE2 CE pin to increment delay # The signal should only last for 1 cycle self.inc_en = Signal() self.loopback_rxdata = Signal(10) self.delay_tap = Signal(5) # Pull up to start the phase reader self.start = Signal() self.data_result = Array(Signal(10) for _ in range(32)) self.done = Signal() # Wait for stabilization after increment self.submodules.stab_timer = WaitTimer(511) fsm = FSM(reset_state="WAIT_START") self.submodules += fsm fsm.act("WAIT_START", If(self.start, NextState("WAIT_TIMER"), ).Else( NextState("WAIT_START"), ) ) fsm.act("WAIT_TIMER", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SAMPLE"), ) ) fsm.act("SAMPLE", # Wait is reset now # Explicit assignment is unnecessary, as combinatorial statement # falls back to he default value when not driven # Keep result alive until reset NextValue(self.data_result[self.delay_tap], self.loopback_rxdata), NextState("HIGH_CE"), ) fsm.act("HIGH_CE", self.inc_en.eq(1), NextState("LOW_CE"), ) fsm.act("LOW_CE", # TAP OUT is available 1 cycle after the pulse # Explicit signal reset is unnecessary, as signal assigned by # combinatorial logic in FSM is after leaving the setting block NextState("READ_TAP"), ) fsm.act("READ_TAP", If(self.delay_tap != 0, NextState("WAIT_TIMER"), ).Else( NextState("PROBE_FIN"), ) ) fsm.act("PROBE_FIN", self.done.eq(1), NextState("PROBE_FIN"), ) class DelayOptimizer(Module): def __init__(self): # IN # Signals from the channel self.loopback_rxdata = Signal(10) self.delay_tap = Signal(5) # IN # Signal to start the calculation self.start = Signal() # OUT # Signal for controlling the channel delay tap self.ld = Signal() self.inc_en = Signal() # OUT # The optimal delay self.opt_delay_tap = Signal(5) # OUT # Keep even/odd indices, decimate the other self.select_odd = Signal() # OUT # Optimal delay is calculated self.done = Signal() # Priority encoder for finding the pulse location self.submodules.pulse_encoder = PriorityEncoderMSB(10) # Wait for stabilization after increment self.submodules.stab_timer = WaitTimer(511) # Intermediate signals self.expected_pulse = Signal(max=9) self.min_delay = Signal(5) self.max_offset = Signal(5) # Translate rxdata into array to allow indexing self.rxdata_array = Array(Signal() for _ in range(10)) self.comb += [ self.rxdata_array[i].eq(self.loopback_rxdata[i]) for i in range(10) ] fsm = FSM(reset_state="WAIT_START") self.submodules += fsm fsm.act("WAIT_START", If(self.start, NextState("WAIT_ZERO"), ).Else( NextState("WAIT_START"), ) ) fsm.act("WAIT_ZERO", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SAMPLE_ZERO"), ) ) fsm.act("SAMPLE_ZERO", # Oversampling should guarantee the detection # However, priority encoder itself does not wraparound # So, we need to avoid passing wrapped around pulse signal into # the priority encoder. If(self.loopback_rxdata[0] & self.loopback_rxdata[-1], NextValue(self.expected_pulse, 1), ).Else( self.pulse_encoder.i.eq(self.loopback_rxdata), If(self.pulse_encoder.o == 9, NextValue(self.expected_pulse, 0), ).Else( NextValue(self.expected_pulse, self.pulse_encoder.o + 1), ) ), # Goto the next delay tap and wait for the pulse. NextState("INC_PULSE_DELAY_IN"), ) fsm.act("WAIT_PULSE_IN", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SAMPLE_PULSE_IN"), ) ) fsm.act("SAMPLE_PULSE_IN", If(self.rxdata_array[self.expected_pulse], NextValue(self.min_delay, self.delay_tap), NextState("INC_PULSE_DELAY_OUT"), ).Else( NextState("INC_PULSE_DELAY_IN"), ) ) fsm.act("INC_PULSE_DELAY_IN", # This signal is automatically deasserted after this state self.inc_en.eq(1), NextState("WAIT_PULSE_IN"), ) fsm.act("WAIT_PULSE_OUT", self.stab_timer.wait.eq(1), If(self.stab_timer.done, NextState("SAMPLE_PULSE_OUT"), ) ) fsm.act("SAMPLE_PULSE_OUT", If(~self.rxdata_array[self.expected_pulse], NextValue(self.opt_delay_tap, self.min_delay + (self.max_offset >> 1)), NextState("LOAD_OPT_DELAY"), ).Else( NextValue(self.max_offset, self.max_offset + 1), NextState("INC_PULSE_DELAY_OUT"), ) ) fsm.act("INC_PULSE_DELAY_OUT", # This signal is automatically deasserted after this state self.inc_en.eq(1), NextState("WAIT_PULSE_OUT"), ) fsm.act("LOAD_OPT_DELAY", self.ld.eq(1), # The optimal delay tap is prepared in the SAMPLE_PULSE_OUT state NextState("WAIT_DELAY_LOAD"), ) fsm.act("WAIT_DELAY_LOAD", If(self.delay_tap == self.opt_delay_tap, NextState("TERMINATE"), ), ) fsm.act("TERMINATE", self.done.eq(1), self.select_odd.eq(self.expected_pulse[0]), NextState("TERMINATE"), ) class SyncSingleRX(Module): def __init__(self): # Ports # IN: Undelayed serial signal self.ser_in_no_dly = Signal() # IN: Start RX alignment signal self.start = Signal() # OUT: Received data after self-alignment, decimation self.rxdata = Signal(5) # OUT: RXDATA from this channel is self-aligned self.align_done = Signal() # Components self.submodules.rx = SingleLineRX() self.submodules.slave_aligner = SlaveAligner() self.submodules.delay_solver = DelayOptimizer() # Sample decimation select_odd = Signal() decimated_rxdata = Signal(5) # Dataflow self.comb += [ # Delay and oversample the original signal self.rx.ser_in_no_dly.eq(self.ser_in_no_dly), # Use the deserialized signals for alignment self.slave_aligner.loopback_rxdata.eq(self.rx.rxdata), self.delay_solver.loopback_rxdata.eq(self.rx.rxdata), # Decimate the oversampled signals If(select_odd, decimated_rxdata.eq(self.rx.rxdata[1::2]), ).Else( decimated_rxdata.eq(self.rx.rxdata[::2]), ), # Send it to the output self.rxdata.eq(decimated_rxdata), ] # Control signals self.comb += [ # Bitslip alignment self.rx.master_bitslip.eq(self.slave_aligner.master_bitslip), self.rx.slave_bitslip.eq(self.slave_aligner.slave_bitslip), # Tap delay optimization self.rx.ce.eq(self.delay_solver.inc_en), self.rx.ld.eq(self.delay_solver.ld), self.rx.cnt_in.eq(self.delay_solver.opt_delay_tap), self.delay_solver.delay_tap.eq(self.rx.cnt_out), ] self.submodules.fsm = FSM(reset_state="WAIT_SIGNAL") self.fsm.act("WAIT_SIGNAL", If((self.rx.rxdata != 0) & self.start, NextState("WAIT_ALIGNER") ), ) self.fsm.act("WAIT_ALIGNER", self.slave_aligner.start.eq(1), If(self.slave_aligner.done, NextState("WAIT_DELAY_OPT"), ), ) self.fsm.act("WAIT_DELAY_OPT", self.delay_solver.start.eq(1), If(self.delay_solver.done, NextValue(select_odd, self.delay_solver.select_odd), NextState("INTRA_ALIGN_DONE"), ), ) self.fsm.act("INTRA_ALIGN_DONE", self.align_done.eq(1), NextState("INTRA_ALIGN_DONE"), ) class MultiLineTX(Module): def __init__(self): # Ports # IN: Unserialized data self.txdata = Signal(20) # OUT: Serialized data self.ser_out = Signal(4) # OUT: 3-state signal output self.t_out = Signal(4) for idx in range(4): single_tx = SingleLineTX() self.comb += [ self.ser_out[idx].eq(single_tx.ser_out), self.t_out[idx].eq(single_tx.t_out), single_tx.txdata.eq(self.txdata[5*idx:5*(idx+1)]), ] self.submodules += single_tx class MultiLineRX(Module): def __init__(self): # Ports # IN: Undelayed serial signal self.ser_in_no_dly = Signal(4) # IN: Start alignment process of all channels self.start = Signal() # OUT: Received data after self-alignment, decimation self.rxdata = Signal(20) # OUT: RXDATA from all channels are self-aligned self.align_done = Signal() # OUT: Group delay compensated self.delay_done = Signal() # OUT: Group delay adjustment failed self.err = Signal() channel_align_done = Signal(4) self.comb += self.align_done.eq(channel_align_done == 0b1111) buffer_outflow = Signal(4) self.comb += buffer_outflow.eq(0b1111) for idx in range(4): single_rx = SyncSingleRX() self.comb += [ single_rx.ser_in_no_dly.eq(self.ser_in_no_dly[idx]), channel_align_done[idx].eq(single_rx.align_done), # Propagate start alignment signal to all channels single_rx.start.eq(self.start), ] # FIFOs for handling group delay # Signal from each OSERDES group can have a different delay # So, add delay to the groups that receives the pulse early # Maximum delay = 8 channel_buffer = SyncFIFO(5, 16) self.comb += [ # Allow data go through the FIFO unless aligning # Pay the memory delay cost channel_buffer.we.eq(1), channel_buffer.re.eq(buffer_outflow[idx]), # Data always flow from individual RX to the rxdata port channel_buffer.din.eq(single_rx.rxdata), self.rxdata[5*idx:5*(idx+1)].eq(channel_buffer.dout), ] # If at any point the FIFO fills up, # group delay can no longer be determined and compensated self.sync += [ If(~channel_buffer.writable, self.err.eq(1), ), ] self.submodules += [ single_rx, channel_buffer ] self.submodules.fsm = FSM(reset_state="WAIT_ALIGN_DONE") self.fsm.act("WAIT_ALIGN_DONE", If(self.align_done, NextState("WAIT_ZERO"), ), ) self.fsm.act("WAIT_ZERO", If(self.rxdata == 0, NextState("WAIT_PULSE"), ), ) self.fsm.act("WAIT_PULSE", # Control outflow until all channels finds the pulse If(self.rxdata == 0b11111111111111111111, buffer_outflow.eq(0b1111), self.delay_done.eq(1), NextState("GROUP_DELAY_DONE"), ).Else( buffer_outflow[0].eq(self.rxdata[ 0: 5] == 0), buffer_outflow[1].eq(self.rxdata[ 5:10] == 0), buffer_outflow[2].eq(self.rxdata[10:15] == 0), buffer_outflow[3].eq(self.rxdata[15:20] == 0), ), ) self.fsm.act("GROUP_DELAY_DONE", self.delay_done.eq(1), NextState("GROUP_DELAY_DONE"), )