diff --git a/src/gateware/cxp_downconn.py b/src/gateware/cxp_downconn.py
index 40e9938..9288bbe 100644
--- a/src/gateware/cxp_downconn.py
+++ b/src/gateware/cxp_downconn.py
@@ -7,29 +7,20 @@ from misoc.interconnect.csr import *
 from misoc.interconnect import stream
 
 from artiq.gateware.drtio.transceiver.gtx_7series_init import *
-from cxp_pipeline import downconn_layout
+from cxp_pipeline import word_layout
 
 from functools import reduce
 from operator import add
 
-class CXP_DownConn_PHY(Module, AutoCSR):
+class CXP_DownConn_PHYS(Module, AutoCSR):
     def __init__(self, refclk, pads, sys_clk_freq, debug_sma, pmod_pads):
-        nconn = len(pads)
-        self.rx_start_init = CSRStorage()
-        self.rx_restart = CSR()
-
-        self.tx_start_init = CSRStorage()
-        self.tx_restart = CSR()
-        self.txenable = CSRStorage()
-
-        self.rx_ready = CSRStatus(nconn)
-
         self.qpll_reset = CSR()
         self.qpll_locked = CSRStatus()
 
-        self.gtxs = []
+        self.rx_phys = []
         # # #
 
+        # For speed higher than 6.6Gbps, QPLL need to be used instead of CPLL
         self.submodules.qpll = qpll = QPLL(refclk, sys_clk_freq)
         self.sync += [
             qpll.reset.eq(self.qpll_reset.re),
@@ -37,13 +28,10 @@ class CXP_DownConn_PHY(Module, AutoCSR):
         ]
 
 
-
-        for i in range(nconn):
-            if i != 0:
-                break
-            gtx = GTX(self.qpll, pads[i], sys_clk_freq, tx_mode="single", rx_mode="single")
-            self.gtxs.append(gtx)
-            setattr(self.submodules, "gtx"+str(i), gtx)
+        for i, pad in enumerate(pads):
+            rx = Receiver(qpll, pad, sys_clk_freq, "single", "single", debug_sma, pmod_pads)
+            self.rx_phys.append(rx)
+            setattr(self.submodules, "rx"+str(i), rx)
 
             # TODO: add extension gtx connections
             # TODO: add connection interface
@@ -52,191 +40,83 @@ class CXP_DownConn_PHY(Module, AutoCSR):
         # checkout channel interfaces & drtio_gtx
         # GTPTXPhaseAlignement for inspiration
 
-        # Connect all GTX connections' DRP
 
-        self.gtx_daddr = CSRStorage(9)
-        self.gtx_dread = CSR()
-        self.gtx_din_stb = CSR()
-        self.gtx_din = CSRStorage(16)
+class Receiver(Module):
+    def __init__(self, qpll, pad, sys_clk_freq, tx_mode, rx_mode, debug_sma, pmod_pads):
+        self.submodules.gtx = gtx = GTX(qpll, pad, sys_clk_freq, tx_mode="single", rx_mode="single")
+       
+        # DEBUG: remove cdc rx fifo
+        # gtx rx -> fifo out -> cdc out
+        
+        rx_fifo = stream.AsyncFIFO(word_layout, 512)
+        self.submodules += ClockDomainsRenamer({"write": "cxp_gtx_rx", "read": "sys"})(rx_fifo)
+        self.source = rx_fifo.source
 
-        self.gtx_dout = CSRStatus(16)
-        self.gtx_dready = CSR()
-
-        for gtx in self.gtxs:
-            self.sync += [
-
-                gtx.txenable.eq(self.txenable.storage[0]),
-                gtx.tx_restart.eq(self.tx_restart.re),
-                gtx.rx_restart.eq(self.rx_restart.re),
-                gtx.tx_init.clk_path_ready.eq(self.tx_start_init.storage),
-                gtx.rx_init.clk_path_ready.eq(self.rx_start_init.storage),
-            ]
-
-            self.comb += gtx.dclk.eq(ClockSignal("sys"))
-            self.sync += [
-                gtx.den.eq(0),
-                gtx.dwen.eq(0),
-                If(self.gtx_dread.re,
-                    gtx.den.eq(1),
-                    gtx.daddr.eq(self.gtx_daddr.storage),
-                ).Elif(self.gtx_din_stb.re,
-                    gtx.den.eq(1),
-                    gtx.dwen.eq(1),
-                    gtx.daddr.eq(self.gtx_daddr.storage),
-                    gtx.din.eq(self.gtx_din.storage),
+        for i in range(4):
+            self.sync.cxp_gtx_rx += [
+                rx_fifo.sink.stb.eq(0),
+                # don't store idle word in fifo
+                If((gtx.rx_ready & rx_fifo.sink.ack &  ~((gtx.decoders[0].d == 0xBC) & (gtx.decoders[0].k == 1))),
+                    rx_fifo.sink.stb.eq(1),
+                    rx_fifo.sink.data[i*8:(i*8)+8].eq(gtx.decoders[i].d),
+                    rx_fifo.sink.k[i].eq(gtx.decoders[i].k),
                 ),
             ]
 
-        # TODO: deal with 4 GTX instance of outpus
-        for n, gtx in enumerate(self.gtxs):
-            self.sync += [
-                self.rx_ready.status[n].eq(gtx.rx_ready),
-                If(gtx.dready,
-                   self.gtx_dready.w.eq(1),
-                   self.gtx_dout.status.eq(gtx.dout),
-                ),
-                If(self.gtx_dready.re,
-                   self.gtx_dready.w.eq(0),
-                ),
-            ]
-
-        self.sources = []
+        # DEBUG: tx fifos for loopback
+        # fw -> fifo (sys) -> cdc fifo -> gtx tx
         
-        for n, gtx in enumerate(self.gtxs):
-            # DEBUG: remove cdc fifo
-            # gtx rx -> fifo out -> cdc out
-            
-            fifo_out = stream.AsyncFIFO(downconn_layout, 512)
-            self.submodules += ClockDomainsRenamer({"write": "cxp_gtx_rx", "read": "sys"})(fifo_out)
-            self.sources.append(fifo_out)
+        tx_fifo = stream.AsyncFIFO(word_layout, 512)
+        self.submodules += ClockDomainsRenamer({"write": "sys", "read": "cxp_gtx_tx"})(tx_fifo)
+        self.sink = tx_fifo.sink
+    
+        self.tx_stb_sys = Signal()
+        txstb = Signal()
+        self.specials += MultiReg(self.tx_stb_sys, txstb, odomain="cxp_gtx_tx")
 
-            for i in range(4):
-                self.sync.cxp_gtx_rx += [
-                    fifo_out.sink.stb.eq(0),
-                    # don't store idle word in fifo
-                    If((gtx.rx_ready & fifo_out.sink.ack &  ~((gtx.decoders[0].d == 0xBC) & (gtx.decoders[0].k == 1))),
-                        fifo_out.sink.stb.eq(1),
-                        fifo_out.sink.data[i*8:(i*8)+8].eq(gtx.decoders[i].d),
-                        fifo_out.sink.k[i].eq(gtx.decoders[i].k),
-                    ),
-                ]
-        
+        word_count = Signal(max=100)
 
+        # JANK: fix the every 98th word got eaten
+        # cnt      97    98   99       0
+        # out fifo[97] IDLE IDLE fifo[99]
+        # ack       1     0    0       1
+        self.sync.cxp_gtx_tx += [
+            tx_fifo.source.ack.eq(0),
 
-        # DEBUG: tx of gtx is not used in CXP            
-        # DEBUG: txusrclk PLL DRG
-        
-        self.txpll_reset = CSRStorage()
-        self.pll_daddr = CSRStorage(7)
-        self.pll_dclk = CSRStorage()
-        self.pll_den = CSRStorage()
-        self.pll_din = CSRStorage(16)
-        self.pll_dwen = CSRStorage()
-
-        self.txpll_locked = CSRStatus()
-        self.pll_dout = CSRStatus(16)
-        self.pll_dready = CSRStatus()
-
-        self.txinit_phaligndone = CSRStatus()
-        self.rxinit_phaligndone = CSRStatus()
-
-        self.tx_stb = CSRStorage()
-        self.sinks = []
-
-        for n, gtx in enumerate(self.gtxs):
-            self.comb += [
-                gtx.txpll_reset.eq(self.txpll_reset.storage),
-                gtx.pll_daddr.eq(self.pll_daddr.storage),
-                gtx.pll_dclk.eq(self.pll_dclk.storage),
-                gtx.pll_den.eq(self.pll_den.storage),
-                gtx.pll_din.eq(self.pll_din.storage),
-                gtx.pll_dwen.eq(self.pll_dwen.storage),
-
-                self.txinit_phaligndone.status.eq(gtx.tx_init.Xxphaligndone),
-                self.rxinit_phaligndone.status.eq(gtx.rx_init.Xxphaligndone), self.txpll_locked.status.eq(gtx.txpll_locked),
-                self.pll_dout.status.eq(gtx.pll_dout),
-                self.pll_dready.status.eq(gtx.pll_dready),
-            ]
-
-            # DEBUG:loopback 
-            self.loopback_mode = CSRStorage(3)
-            self.comb += gtx.loopback_mode.eq(self.loopback_mode.storage) 
-
-            # DEBUG: datain
-            # fw -> fifo (sys) -> cdc fifo -> gtx tx
-            
-            fifo_in = stream.AsyncFIFO(downconn_layout, 512)
-            self.submodules += ClockDomainsRenamer({"write": "sys", "read": "cxp_gtx_tx"})(fifo_in)
-            self.sinks.append(fifo_in)
-        
-            # TODO: why there this send an extra 0xFB word
-            txstb = Signal()
-            self.specials += MultiReg(self.tx_stb.storage, txstb, odomain="cxp_gtx_tx")
-
-            word_count = Signal(max=100)
-
-            # JANK: fix the every 98th word got eaten
-            # cnt      97    98   99       0
-            # out fifo[97] IDLE IDLE fifo[99]
-            # ack       1     0    0       1
-            self.sync.cxp_gtx_tx += [
-                fifo_in.source.ack.eq(0),
-
-                If(word_count == 99,
-                    word_count.eq(word_count.reset),
-                ).Else(
-                    If(fifo_in.source.stb & txstb,
-                        If(word_count != 98, fifo_in.source.ack.eq(1)),
-                        word_count.eq(word_count + 1),
-                    )
+            If(word_count == 99,
+                word_count.eq(word_count.reset),
+            ).Else(
+                If(tx_fifo.source.stb & txstb,
+                    If(word_count != 98, tx_fifo.source.ack.eq(1)),
+                    word_count.eq(word_count + 1),
                 )
-            ]
+            )
+        ]
 
-            # NOTE: prevent the first word send twice due to stream stb delay 
-            self.comb += [
-                If((fifo_in.source.stb & fifo_in.source.ack & (word_count != 99)),
-                    gtx.encoder.d[0].eq(fifo_in.source.data[:8]),
-                    gtx.encoder.d[1].eq(fifo_in.source.data[8:16]),
-                    gtx.encoder.d[2].eq(fifo_in.source.data[16:24]),
-                    gtx.encoder.d[3].eq(fifo_in.source.data[24:]),
-                    gtx.encoder.k[0].eq(fifo_in.source.k[0]),
-                    gtx.encoder.k[1].eq(fifo_in.source.k[1]),
-                    gtx.encoder.k[2].eq(fifo_in.source.k[2]),
-                    gtx.encoder.k[3].eq(fifo_in.source.k[3]),
-                ).Else(
-                    # NOTE: IDLE WORD
-                    gtx.encoder.d[0].eq(0xBC),
-                    gtx.encoder.k[0].eq(1),
-                    gtx.encoder.d[1].eq(0x3C),
-                    gtx.encoder.k[1].eq(1),
-                    gtx.encoder.d[2].eq(0x3C),
-                    gtx.encoder.k[2].eq(1),
-                    gtx.encoder.d[3].eq(0xB5),
-                    gtx.encoder.k[3].eq(0),
-                )
-            ]
+        # NOTE: prevent the first word send twice due to stream stb delay 
+        self.comb += [
+            If((tx_fifo.source.stb & tx_fifo.source.ack & (word_count != 99)),
+                gtx.encoder.d[0].eq(tx_fifo.source.data[:8]),
+                gtx.encoder.d[1].eq(tx_fifo.source.data[8:16]),
+                gtx.encoder.d[2].eq(tx_fifo.source.data[16:24]),
+                gtx.encoder.d[3].eq(tx_fifo.source.data[24:]),
+                gtx.encoder.k[0].eq(tx_fifo.source.k[0]),
+                gtx.encoder.k[1].eq(tx_fifo.source.k[1]),
+                gtx.encoder.k[2].eq(tx_fifo.source.k[2]),
+                gtx.encoder.k[3].eq(tx_fifo.source.k[3]),
+            ).Else(
+                # NOTE: IDLE WORD
+                gtx.encoder.d[0].eq(0xBC),
+                gtx.encoder.k[0].eq(1),
+                gtx.encoder.d[1].eq(0x3C),
+                gtx.encoder.k[1].eq(1),
+                gtx.encoder.d[2].eq(0x3C),
+                gtx.encoder.k[2].eq(1),
+                gtx.encoder.d[3].eq(0xB5),
+                gtx.encoder.k[3].eq(0),
+            )
+        ]
 
-            # DEBUG: IO SMA & PMOD
-            if n == 0:
-                self.specials += [
-                    # Instance("OBUF", i_I=gtx.cd_cxp_gtx_rx.clk, o_O=debug_sma.p_tx),
-                    # Instance("OBUF", i_I=gtx.cd_cxp_gtx_tx.clk, o_O=debug_sma.n_rx),
-
-                    # # pmod 0-7 pin
-                    # Instance("OBUF", i_I=txstb, o_O=pmod_pads[0]),
-                    # Instance("OBUF", i_I=fifo_in.source.stb, o_O=pmod_pads[1]),
-                    # Instance("OBUF", i_I=fifo_in.source.ack, o_O=pmod_pads[2]),
-                    # Instance("OBUF", i_I=gtx.comma_checker.aligner_en, o_O=pmod_pads[3]),
-                    # Instance("OBUF", i_I=gtx.comma_checker.check_reset, o_O=pmod_pads[4]),
-                    # Instance("OBUF", i_I=gtx.comma_checker.has_comma, o_O=pmod_pads[5]),
-                    # Instance("OBUF", i_I=gtx.comma_checker.has_error, o_O=pmod_pads[6]),
-                    # Instance("OBUF", i_I=gtx.comma_checker.ready_sys, o_O=pmod_pads[7]),
-
-                    # Instance("OBUF", i_I=gtx.dclk, o_O=pmod_pads[0]),
-                    # Instance("OBUF", i_I=gtx.den, o_O=pmod_pads[1]),
-                    # Instance("OBUF", i_I=gtx.dwen, o_O=pmod_pads[2]),
-                    # Instance("OBUF", i_I=gtx.dready, o_O=pmod_pads[3]),
-                ]
 
 class QPLL(Module, AutoCSR):
     def __init__(self, refclk, sys_clk_freq):