From 20e079a381b777550ce7872151b17fb4a11cd7ae Mon Sep 17 00:00:00 2001 From: Peter Drmota <49479443+pmldrmota@users.noreply.github.com> Date: Mon, 15 Nov 2021 05:09:16 +0100 Subject: [PATCH] AD9910 driver feature extension and SUServo IIR readability (#1500) * coredevice.ad9910: Add set_cfr2 function and extend arguments of set_cfr1 and set_sync * SUServo: Wrap CPLD and DDS devices in a list * SUServo: Refactor [nfc] Co-authored-by: drmota Co-authored-by: David Nadlinger --- RELEASE_NOTES.rst | 3 + artiq/coredevice/ad9910.py | 72 +++++-- artiq/coredevice/suservo.py | 56 ++--- artiq/examples/kasli_suservo/device_db.py | 6 +- artiq/frontend/artiq_ddb_template.py | 15 +- artiq/gateware/suservo/__init__.py | 10 + artiq/gateware/suservo/iir.py | 246 +++++++++++++--------- artiq/gateware/suservo/servo.py | 66 +++++- artiq/gateware/targets/kasli.py | 6 +- 9 files changed, 301 insertions(+), 179 deletions(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index c743f2005..467e4e9c4 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -104,6 +104,9 @@ Breaking changes: * ``quamash`` has been replaced with ``qasync``. * Protocols are updated to use device endian. * Analyzer dump format includes a byte for device endianness. +* To support variable numbers of Urukul cards in the future, the + ``artiq.coredevice.suservo.SUServo`` constructor now accepts two device name lists, + ``cpld_devices`` and ``dds_devices``, rather than four individual arguments. * Experiment classes with underscore-prefixed names are now ignored when ``artiq_client`` determines which experiment to submit (consistent with ``artiq_run``). diff --git a/artiq/coredevice/ad9910.py b/artiq/coredevice/ad9910.py index 95ad66896..49bfe9a90 100644 --- a/artiq/coredevice/ad9910.py +++ b/artiq/coredevice/ad9910.py @@ -374,18 +374,25 @@ class AD9910: data[(n - preload) + i] = self.bus.read() @kernel - def set_cfr1(self, power_down: TInt32 = 0b0000, + def set_cfr1(self, + power_down: TInt32 = 0b0000, phase_autoclear: TInt32 = 0, - drg_load_lrr: TInt32 = 0, drg_autoclear: TInt32 = 0, - internal_profile: TInt32 = 0, ram_destination: TInt32 = 0, - ram_enable: TInt32 = 0, manual_osk_external: TInt32 = 0, - osk_enable: TInt32 = 0, select_auto_osk: TInt32 = 0): + drg_load_lrr: TInt32 = 0, + drg_autoclear: TInt32 = 0, + phase_clear: TInt32 = 0, + internal_profile: TInt32 = 0, + ram_destination: TInt32 = 0, + ram_enable: TInt32 = 0, + manual_osk_external: TInt32 = 0, + osk_enable: TInt32 = 0, + select_auto_osk: TInt32 = 0): """Set CFR1. See the AD9910 datasheet for parameter meanings. This method does not pulse IO_UPDATE. :param power_down: Power down bits. :param phase_autoclear: Autoclear phase accumulator. + :param phase_clear: Asynchronous, static reset of the phase accumulator. :param drg_load_lrr: Load digital ramp generator LRR. :param drg_autoclear: Autoclear digital ramp generator. :param internal_profile: Internal profile control. @@ -405,11 +412,41 @@ class AD9910: (drg_load_lrr << 15) | (drg_autoclear << 14) | (phase_autoclear << 13) | + (phase_clear << 11) | (osk_enable << 9) | (select_auto_osk << 8) | (power_down << 4) | 2) # SDIO input only, MSB first + @kernel + def set_cfr2(self, + asf_profile_enable: TInt32 = 1, + drg_enable: TInt32 = 0, + effective_ftw: TInt32 = 1, + sync_validation_disable: TInt32 = 0, + matched_latency_enable: TInt32 = 0): + """Set CFR2. See the AD9910 datasheet for parameter meanings. + + This method does not pulse IO_UPDATE. + + :param asf_profile_enable: Enable amplitude scale from single tone profiles. + :param drg_enable: Digital ramp enable. + :param effective_ftw: Read effective FTW. + :param sync_validation_disable: Disable the SYNC_SMP_ERR pin indicating + (active high) detection of a synchronization pulse sampling error. + :param matched_latency_enable: Simultaneous application of amplitude, + phase, and frequency changes to the DDS arrive at the output + + * matched_latency_enable = 0: in the order listed + * matched_latency_enable = 1: simultaneously. + """ + self.write32(_AD9910_REG_CFR2, + (asf_profile_enable << 24) | + (drg_enable << 19) | + (effective_ftw << 16) | + (matched_latency_enable << 7) | + (sync_validation_disable << 5)) + @kernel def init(self, blind: TBool = False): """Initialize and configure the DDS. @@ -442,7 +479,7 @@ class AD9910: # enable amplitude scale from profiles # read effective FTW # sync timing validation disable (enabled later) - self.write32(_AD9910_REG_CFR2, 0x01010020) + self.set_cfr2(sync_validation_disable=1) self.cpld.io_update.pulse(1 * us) cfr3 = (0x0807c000 | (self.pll_vco << 24) | (self.pll_cp << 19) | (self.pll_en << 8) | @@ -465,7 +502,7 @@ class AD9910: if i >= 100 - 1: raise ValueError("PLL lock timeout") delay(10 * us) # slack - if self.sync_data.sync_delay_seed >= 0: + if self.sync_data.sync_delay_seed >= 0 and not blind: self.tune_sync_delay(self.sync_data.sync_delay_seed) delay(1 * ms) @@ -875,20 +912,26 @@ class AD9910: self.cpld.cfg_sw(self.chip_select - 4, state) @kernel - def set_sync(self, in_delay: TInt32, window: TInt32): + def set_sync(self, + in_delay: TInt32, + window: TInt32, + en_sync_gen: TInt32 = 0): """Set the relevant parameters in the multi device synchronization register. See the AD9910 datasheet for details. The SYNC clock generator preset value is set to zero, and the SYNC_OUT generator is - disabled. + disabled by default. :param in_delay: SYNC_IN delay tap (0-31) in steps of ~75ps :param window: Symmetric SYNC_IN validation window (0-15) in steps of ~75ps for both hold and setup margin. + :param en_sync_gen: Whether to enable the DDS-internal sync generator + (SYNC_OUT, cf. sync_sel == 1). Should be left off for the normal + use case, where the SYNC clock is supplied by the core device. """ self.write32(_AD9910_REG_SYNC, (window << 28) | # SYNC S/H validation delay (1 << 27) | # SYNC receiver enable - (0 << 26) | # SYNC generator disable + (en_sync_gen << 26) | # SYNC generator enable (0 << 25) | # SYNC generator SYS rising edge (0 << 18) | # SYNC preset (0 << 11) | # SYNC output delay @@ -904,9 +947,10 @@ class AD9910: Also modifies CFR2. """ - self.write32(_AD9910_REG_CFR2, 0x01010020) # clear SMP_ERR + self.set_cfr2(sync_validation_disable=1) # clear SMP_ERR self.cpld.io_update.pulse(1 * us) - self.write32(_AD9910_REG_CFR2, 0x01010000) # enable SMP_ERR + delay(10 * us) # slack + self.set_cfr2(sync_validation_disable=0) # enable SMP_ERR self.cpld.io_update.pulse(1 * us) @kernel @@ -984,7 +1028,7 @@ class AD9910: # set up DRG self.set_cfr1(drg_load_lrr=1, drg_autoclear=1) # DRG -> FTW, DRG enable - self.write32(_AD9910_REG_CFR2, 0x01090000) + self.set_cfr2(drg_enable=1) # no limits self.write64(_AD9910_REG_RAMP_LIMIT, -1, 0) # DRCTL=0, dt=1 t_SYNC_CLK @@ -1005,7 +1049,7 @@ class AD9910: ftw = self.read32(_AD9910_REG_FTW) # read out effective FTW delay(100 * us) # slack # disable DRG - self.write32(_AD9910_REG_CFR2, 0x01010000) + self.set_cfr2(drg_enable=0) self.cpld.io_update.pulse_mu(8) return ftw & 1 diff --git a/artiq/coredevice/suservo.py b/artiq/coredevice/suservo.py index 932adf35b..1d0a72dad 100644 --- a/artiq/coredevice/suservo.py +++ b/artiq/coredevice/suservo.py @@ -57,32 +57,26 @@ class SUServo: :param channel: RTIO channel number :param pgia_device: Name of the Sampler PGIA gain setting SPI bus - :param cpld0_device: Name of the first Urukul CPLD SPI bus - :param cpld1_device: Name of the second Urukul CPLD SPI bus - :param dds0_device: Name of the AD9910 device for the DDS on the first - Urukul - :param dds1_device: Name of the AD9910 device for the DDS on the second - Urukul + :param cpld_devices: Names of the Urukul CPLD SPI buses + :param dds_devices: Names of the AD9910 devices :param gains: Initial value for PGIA gains shift register (default: 0x0000). Knowledge of this state is not transferred between experiments. :param core_device: Core device name """ - kernel_invariants = {"channel", "core", "pgia", "cpld0", "cpld1", - "dds0", "dds1", "ref_period_mu"} + kernel_invariants = {"channel", "core", "pgia", "cplds", "ddses", + "ref_period_mu"} def __init__(self, dmgr, channel, pgia_device, - cpld0_device, cpld1_device, - dds0_device, dds1_device, + cpld_devices, dds_devices, gains=0x0000, core_device="core"): self.core = dmgr.get(core_device) self.pgia = dmgr.get(pgia_device) self.pgia.update_xfer_duration_mu(div=4, length=16) - self.dds0 = dmgr.get(dds0_device) - self.dds1 = dmgr.get(dds1_device) - self.cpld0 = dmgr.get(cpld0_device) - self.cpld1 = dmgr.get(cpld1_device) + assert len(dds_devices) == len(cpld_devices) + self.ddses = [dmgr.get(dds) for dds in dds_devices] + self.cplds = [dmgr.get(cpld) for cpld in cpld_devices] self.channel = channel self.gains = gains self.ref_period_mu = self.core.seconds_to_mu( @@ -109,17 +103,15 @@ class SUServo: sampler.SPI_CONFIG | spi.SPI_END, 16, 4, sampler.SPI_CS_PGIA) - self.cpld0.init(blind=True) - cfg0 = self.cpld0.cfg_reg - self.cpld0.cfg_write(cfg0 | (0xf << urukul.CFG_MASK_NU)) - self.dds0.init(blind=True) - self.cpld0.cfg_write(cfg0) + for i in range(len(self.cplds)): + cpld = self.cplds[i] + dds = self.ddses[i] - self.cpld1.init(blind=True) - cfg1 = self.cpld1.cfg_reg - self.cpld1.cfg_write(cfg1 | (0xf << urukul.CFG_MASK_NU)) - self.dds1.init(blind=True) - self.cpld1.cfg_write(cfg1) + cpld.init(blind=True) + prev_cpld_cfg = cpld.cfg_reg + cpld.cfg_write(prev_cpld_cfg | (0xf << urukul.CFG_MASK_NU)) + dds.init(blind=True) + cpld.cfg_write(prev_cpld_cfg) @kernel def write(self, addr, value): @@ -257,9 +249,11 @@ class Channel: self.servo = dmgr.get(servo_device) self.core = self.servo.core self.channel = channel - # FIXME: this assumes the mem channel is right after the control - # channels - self.servo_channel = self.channel + 8 - self.servo.channel + # This assumes the mem channel is right after the control channels + # Make sure this is always the case in eem.py + self.servo_channel = (self.channel + 4 * len(self.servo.cplds) - + self.servo.channel) + self.dds = self.servo.ddses[self.servo_channel // 4] @kernel def set(self, en_out, en_iir=0, profile=0): @@ -311,12 +305,8 @@ class Channel: see :meth:`dds_offset_to_mu` :param phase: DDS phase in turns """ - if self.servo_channel < 4: - dds = self.servo.dds0 - else: - dds = self.servo.dds1 - ftw = dds.frequency_to_ftw(frequency) - pow_ = dds.turns_to_pow(phase) + ftw = self.dds.frequency_to_ftw(frequency) + pow_ = self.dds.turns_to_pow(phase) offs = self.dds_offset_to_mu(offset) self.set_dds_mu(profile, ftw, offs, pow_) diff --git a/artiq/examples/kasli_suservo/device_db.py b/artiq/examples/kasli_suservo/device_db.py index d33bfb280..fdb85dc47 100644 --- a/artiq/examples/kasli_suservo/device_db.py +++ b/artiq/examples/kasli_suservo/device_db.py @@ -191,10 +191,8 @@ device_db = { "arguments": { "channel": 24, "pgia_device": "spi_sampler0_pgia", - "cpld0_device": "urukul0_cpld", - "cpld1_device": "urukul1_cpld", - "dds0_device": "urukul0_dds", - "dds1_device": "urukul1_dds" + "cpld_devices": ["urukul0_cpld", "urukul1_cpld"], + "dds_devices": ["urukul0_dds", "urukul1_dds"], } }, diff --git a/artiq/frontend/artiq_ddb_template.py b/artiq/frontend/artiq_ddb_template.py index 52408a0d4..0a14a06be 100755 --- a/artiq/frontend/artiq_ddb_template.py +++ b/artiq/frontend/artiq_ddb_template.py @@ -364,8 +364,7 @@ class PeripheralManager: def process_suservo(self, rtio_offset, peripheral): suservo_name = self.get_name("suservo") sampler_name = self.get_name("sampler") - urukul0_name = self.get_name("urukul") - urukul1_name = self.get_name("urukul") + urukul_names = [self.get_name("urukul") for _ in range(2)] channel = count(0) for i in range(8): self.gen(""" @@ -386,16 +385,14 @@ class PeripheralManager: "arguments": {{ "channel": 0x{suservo_channel:06x}, "pgia_device": "spi_{sampler_name}_pgia", - "cpld0_device": "{urukul0_name}_cpld", - "cpld1_device": "{urukul1_name}_cpld", - "dds0_device": "{urukul0_name}_dds", - "dds1_device": "{urukul1_name}_dds" + "cpld_devices": {cpld_names_list}, + "dds_devices": {dds_names_list} }} }}""", suservo_name=suservo_name, sampler_name=sampler_name, - urukul0_name=urukul0_name, - urukul1_name=urukul1_name, + cpld_names_list=[urukul_name + "_cpld" for urukul_name in urukul_names], + dds_names_list=[urukul_name + "_dds" for urukul_name in urukul_names], suservo_channel=rtio_offset+next(channel)) self.gen(""" device_db["spi_{sampler_name}_pgia"] = {{ @@ -407,7 +404,7 @@ class PeripheralManager: sampler_name=sampler_name, sampler_channel=rtio_offset+next(channel)) pll_vco = peripheral.get("pll_vco") - for urukul_name in (urukul0_name, urukul1_name): + for urukul_name in urukul_names: self.gen(""" device_db["spi_{urukul_name}"] = {{ "type": "local", diff --git a/artiq/gateware/suservo/__init__.py b/artiq/gateware/suservo/__init__.py index e69de29bb..7a1df77ac 100644 --- a/artiq/gateware/suservo/__init__.py +++ b/artiq/gateware/suservo/__init__.py @@ -0,0 +1,10 @@ +"""Gateware implementation of the Sampler-Urukul (AD9910) DDS amplitude servo. + +General conventions: + + - ``t_...`` signals and constants refer to time spans measured in the gateware + module's default clock (typically a 125 MHz RTIO clock). + - ``start`` signals cause modules to proceed with the next servo iteration iff + they are currently idle (i.e. their value is irrelevant while the module is + busy, so they are not necessarily one-clock-period strobes). +""" diff --git a/artiq/gateware/suservo/iir.py b/artiq/gateware/suservo/iir.py index 0ebab3f13..0ec9bfa09 100644 --- a/artiq/gateware/suservo/iir.py +++ b/artiq/gateware/suservo/iir.py @@ -1,9 +1,7 @@ from collections import namedtuple import logging - from migen import * - logger = logging.getLogger(__name__) @@ -222,31 +220,30 @@ class IIR(Module): assert w.word <= w.coeff # same memory assert w.state + w.coeff + 3 <= w.accu - # m_coeff of active profiles should only be accessed during + # m_coeff of active profiles should only be accessed externally during # ~processing self.specials.m_coeff = Memory( width=2*w.coeff, # Cat(pow/ftw/offset, cfg/a/b) depth=4 << w.profile + w.channel) - # m_state[x] should only be read during ~(shifting | - # loading) - # m_state[y] of active profiles should only be read during + # m_state[x] should only be read externally during ~(shifting | loading) + # m_state[y] of active profiles should only be read externally during # ~processing self.specials.m_state = Memory( width=w.state, # y1,x0,x1 depth=(1 << w.profile + w.channel) + (2 << w.channel)) # ctrl should only be updated synchronously self.ctrl = [Record([ - ("profile", w.profile), - ("en_out", 1), - ("en_iir", 1), - ("clip", 1), - ("stb", 1)]) - for i in range(1 << w.channel)] + ("profile", w.profile), + ("en_out", 1), + ("en_iir", 1), + ("clip", 1), + ("stb", 1)]) + for i in range(1 << w.channel)] # only update during ~loading self.adc = [Signal((w.adc, True), reset_less=True) for i in range(1 << w.channel)] # Cat(ftw0, ftw1, pow, asf) - # only read during ~processing + # only read externally during ~processing self.dds = [Signal(4*w.word, reset_less=True) for i in range(1 << w.channel)] # perform one IIR iteration, start with loading, @@ -270,100 +267,116 @@ class IIR(Module): en_iirs = Array([ch.en_iir for ch in self.ctrl]) clips = Array([ch.clip for ch in self.ctrl]) - # state counter - state = Signal(w.channel + 2) - # pipeline group activity flags (SR) - stage = Signal(3) + # Main state machine sequencing the steps of each servo iteration. The + # module IDLEs until self.start is asserted, and then runs through LOAD, + # PROCESS and SHIFT in order (see description of corresponding flags + # above). The steps share the same memory ports, and are executed + # strictly sequentially. + # + # LOAD/SHIFT just read/write one address per cycle; the duration needed + # to iterate over all channels is determined by counting cycles. + # + # The PROCESSing step is split across a three-stage pipeline, where each + # stage has up to four clock cycles latency. We feed the first stage + # using the (MSBs of) t_current_step, and, after all channels have been + # covered, proceed once the pipeline has completely drained. self.submodules.fsm = fsm = FSM("IDLE") - state_clr = Signal() - stage_en = Signal() + t_current_step = Signal(w.channel + 2) + t_current_step_clr = Signal() + + # pipeline group activity flags (SR) + # 0: load from memory + # 1: compute + # 2: write to output registers (DDS profiles, clip flags) + stages_active = Signal(3) fsm.act("IDLE", self.done.eq(1), - state_clr.eq(1), + t_current_step_clr.eq(1), If(self.start, NextState("LOAD") ) ) fsm.act("LOAD", self.loading.eq(1), - If(state == (1 << w.channel) - 1, - state_clr.eq(1), - stage_en.eq(1), + If(t_current_step == (1 << w.channel) - 1, + t_current_step_clr.eq(1), + NextValue(stages_active[0], 1), NextState("PROCESS") ) ) fsm.act("PROCESS", self.processing.eq(1), # this is technically wasting three cycles - # (one for setting stage, and phase=2,3 with stage[2]) - If(stage == 0, - state_clr.eq(1), - NextState("SHIFT") + # (one for setting stages_active, and phase=2,3 with stages_active[2]) + If(stages_active == 0, + t_current_step_clr.eq(1), + NextState("SHIFT"), ) ) fsm.act("SHIFT", self.shifting.eq(1), - If(state == (2 << w.channel) - 1, + If(t_current_step == (2 << w.channel) - 1, NextState("IDLE") ) ) self.sync += [ - state.eq(state + 1), - If(state_clr, - state.eq(0), - ), - If(stage_en, - stage[0].eq(1) + If(t_current_step_clr, + t_current_step.eq(0) + ).Else( + t_current_step.eq(t_current_step + 1) ) ] - # pipeline group channel pointer + # global pipeline phase (lower two bits of t_current_step) + pipeline_phase = Signal(2, reset_less=True) + # pipeline group channel pointer (SR) # for each pipeline stage, this is the channel currently being # processed channel = [Signal(w.channel, reset_less=True) for i in range(3)] + self.comb += Cat(pipeline_phase, channel[0]).eq(t_current_step) + self.sync += [ + If(pipeline_phase == 3, + Cat(channel[1:]).eq(Cat(channel[:-1])), + stages_active[1:].eq(stages_active[:-1]), + If(channel[0] == (1 << w.channel) - 1, + stages_active[0].eq(0) + ) + ) + ] + # pipeline group profile pointer (SR) # for each pipeline stage, this is the profile currently being # processed profile = [Signal(w.profile, reset_less=True) for i in range(2)] - # pipeline phase (lower two bits of state) - phase = Signal(2, reset_less=True) - - self.comb += Cat(phase, channel[0]).eq(state) self.sync += [ - Case(phase, { - 0: [ - profile[0].eq(profiles[channel[0]]), - profile[1].eq(profile[0]) - ], - 3: [ - Cat(channel[1:]).eq(Cat(channel[:-1])), - stage[1:].eq(stage[:-1]), - If(channel[0] == (1 << w.channel) - 1, - stage[0].eq(0) - ) - ] - }) + If(pipeline_phase == 0, + profile[0].eq(profiles[channel[0]]), + profile[1].eq(profile[0]), + ) ] m_coeff = self.m_coeff.get_port() m_state = self.m_state.get_port(write_capable=True) # mode=READ_FIRST self.specials += m_state, m_coeff + # + # Hook up main IIR filter. + # + dsp = DSP(w) self.submodules += dsp offset_clr = Signal() - self.comb += [ - m_coeff.adr.eq(Cat(phase, profile[0], - Mux(phase==0, channel[1], channel[0]))), + m_coeff.adr.eq(Cat(pipeline_phase, profile[0], + Mux(pipeline_phase == 0, channel[1], channel[0]))), dsp.offset[-w.coeff - 1:].eq(Mux(offset_clr, 0, Cat(m_coeff.dat_r[:w.coeff], m_coeff.dat_r[w.coeff - 1]) )), dsp.coeff.eq(m_coeff.dat_r[w.coeff:]), dsp.state.eq(m_state.dat_r), - Case(phase, { + Case(pipeline_phase, { 0: dsp.accu_clr.eq(1), 2: [ offset_clr.eq(1), @@ -373,6 +386,11 @@ class IIR(Module): }) ] + + # + # Arbitrate state memory access between steps. + # + # selected adc and profile delay (combinatorial from dat_r) # both share the same coeff word (sel in the lower 8 bits) sel_profile = Signal(w.channel) @@ -389,13 +407,13 @@ class IIR(Module): sel_profile.eq(m_coeff.dat_r[w.coeff:]), dly_profile.eq(m_coeff.dat_r[w.coeff + 8:]), If(self.shifting, - m_state.adr.eq(state | (1 << w.profile + w.channel)), + m_state.adr.eq(t_current_step | (1 << w.profile + w.channel)), m_state.dat_w.eq(m_state.dat_r), - m_state.we.eq(state[0]) + m_state.we.eq(t_current_step[0]) ), If(self.loading, - m_state.adr.eq((state << 1) | (1 << w.profile + w.channel)), - m_state.dat_w[-w.adc - 1:-1].eq(Array(self.adc)[state]), + m_state.adr.eq((t_current_step << 1) | (1 << w.profile + w.channel)), + m_state.dat_w[-w.adc - 1:-1].eq(Array(self.adc)[t_current_step]), m_state.dat_w[-1].eq(m_state.dat_w[-2]), m_state.we.eq(1) ), @@ -405,16 +423,20 @@ class IIR(Module): Cat(profile[1], channel[2]), # read old y Cat(profile[0], channel[0]), - # x0 (recent) + # read x0 (recent) 0 | (sel_profile << 1) | (1 << w.profile + w.channel), - # x1 (old) + # read x1 (old) 1 | (sel << 1) | (1 << w.profile + w.channel), - ])[phase]), + ])[pipeline_phase]), m_state.dat_w.eq(dsp.output), - m_state.we.eq((phase == 0) & stage[2] & en[1]), + m_state.we.eq((pipeline_phase == 0) & stages_active[2] & en[1]), ) ] + # + # Compute auxiliary signals (delayed servo enable, clip indicators, etc.). + # + # internal channel delay counters dlys = Array([Signal(w.dly) for i in range(1 << w.channel)]) @@ -434,51 +456,65 @@ class IIR(Module): en_out = Signal(reset_less=True) # latched channel en_iir en_iir = Signal(reset_less=True) + + self.sync += [ + Case(pipeline_phase, { + 0: [ + dly.eq(dlys[channel[0]]), + en_out.eq(en_outs[channel[0]]), + en_iir.eq(en_iirs[channel[0]]), + If(stages_active[2] & en[1] & dsp.clip, + clips[channel[2]].eq(1) + ) + ], + 2: [ + en[0].eq(0), + en[1].eq(en[0]), + sel.eq(sel_profile), + If(stages_active[0] & en_out, + If(dly != dly_profile, + dlys[channel[0]].eq(dly + 1) + ).Elif(en_iir, + en[0].eq(1) + ) + ) + ], + }), + ] + + # + # Update DDS profile with FTW/POW/ASF + # Stage 0 loads the POW, stage 1 the FTW, and stage 2 writes + # the ASF computed by the IIR filter. + # + # muxing ddss = Array(self.dds) self.sync += [ - Case(phase, { - 0: [ - dly.eq(dlys[channel[0]]), - en_out.eq(en_outs[channel[0]]), - en_iir.eq(en_iirs[channel[0]]), - If(stage[1], - ddss[channel[1]][:w.word].eq(m_coeff.dat_r) - ), - If(stage[2] & en[1] & dsp.clip, - clips[channel[2]].eq(1) - ) - ], - 1: [ - If(stage[1], - ddss[channel[1]][w.word:2*w.word].eq( - m_coeff.dat_r), - ), - If(stage[2], - ddss[channel[2]][3*w.word:].eq( - m_state.dat_r[w.state - w.asf - 1:w.state - 1]) - ) - ], - 2: [ - en[0].eq(0), - en[1].eq(en[0]), - sel.eq(sel_profile), - If(stage[0], - ddss[channel[0]][2*w.word:3*w.word].eq( - m_coeff.dat_r), - If(en_out, - If(dly != dly_profile, - dlys[channel[0]].eq(dly + 1) - ).Elif(en_iir, - en[0].eq(1) - ) - ) - ) - ], - 3: [ - ], - }), + Case(pipeline_phase, { + 0: [ + If(stages_active[1], + ddss[channel[1]][:w.word].eq(m_coeff.dat_r), # ftw0 + ), + ], + 1: [ + If(stages_active[1], + ddss[channel[1]][w.word:2 * w.word].eq(m_coeff.dat_r), # ftw1 + ), + If(stages_active[2], + ddss[channel[2]][3*w.word:].eq( # asf + m_state.dat_r[w.state - w.asf - 1:w.state - 1]) + ) + ], + 2: [ + If(stages_active[0], + ddss[channel[0]][2*w.word:3*w.word].eq(m_coeff.dat_r), # pow + ), + ], + 3: [ + ], + }), ] def _coeff(self, channel, profile, coeff): diff --git a/artiq/gateware/suservo/servo.py b/artiq/gateware/suservo/servo.py index 08b31a3bc..1aec95f02 100644 --- a/artiq/gateware/suservo/servo.py +++ b/artiq/gateware/suservo/servo.py @@ -5,32 +5,76 @@ from .iir import IIR, IIRWidths from .dds_ser import DDS, DDSParams +def predict_timing(adc_p, iir_p, dds_p): + """ + The following is a sketch of the timing for 1 Sampler (8 ADCs) and N Urukuls + Shown here, the cycle duration is limited by the IIR loading+processing time. + + ADC|CONVH|CONV|READ|RTT|IDLE|CONVH|CONV|READ|RTT|IDLE|CONVH|CONV|READ|RTT|... + |4 |57 |16 |8 | .. |4 |57 |16 |8 | .. |4 |57 |16 |8 |... + ---+-------------------+------------------------+------------------------+--- + IIR| |LOAD|PROC |SHIFT|LOAD|PROC |SHIFT|... + | |8 |16*N+9 |16 |8 |16*N+9 |16 |... + ---+--------------------------------------+------------------------+--------- + DDS| |CMD|PROF|WAIT|IO_UP|IDLE|CMD|PR... + | |16 |128 |1 |1 | .. |16 | ... + + IIR loading starts once the ADC presents its data, the DDSes are updated + once the IIR processing is over. These are the only blocking processes. + IIR shifting happens in parallel to writing to the DDSes and ADC conversions + take place while the IIR filter is processing or the DDSes are being + written to, depending on the cycle duration (given by whichever module + takes the longest). + """ + t_adc = (adc_p.t_cnvh + adc_p.t_conv + adc_p.t_rtt + + adc_p.channels*adc_p.width//adc_p.lanes) + 1 + # load adc_p.channels values, process dds_p.channels + # (4 processing phases and 2 additional stages à 4 phases + # to complete the processing of the last channel) + t_iir = adc_p.channels + 4*dds_p.channels + 8 + 1 + t_dds = (dds_p.width*2 + 1)*dds_p.clk + 1 + t_cycle = max(t_adc, t_iir, t_dds) + return t_adc, t_iir, t_dds, t_cycle + class Servo(Module): def __init__(self, adc_pads, dds_pads, adc_p, iir_p, dds_p): + t_adc, t_iir, t_dds, t_cycle = predict_timing(adc_p, iir_p, dds_p) + assert t_iir + 2*adc_p.channels < t_cycle, "need shifting time" + self.submodules.adc = ADC(adc_pads, adc_p) self.submodules.iir = IIR(iir_p) self.submodules.dds = DDS(dds_pads, dds_p) # adc channels are reversed on Sampler - for i, j, k, l in zip(reversed(self.adc.data), self.iir.adc, - self.iir.dds, self.dds.profile): - self.comb += j.eq(i), l.eq(k) - - t_adc = (adc_p.t_cnvh + adc_p.t_conv + adc_p.t_rtt + - adc_p.channels*adc_p.width//adc_p.lanes) + 1 - t_iir = ((1 + 4 + 1) << iir_p.channel) + 1 - t_dds = (dds_p.width*2 + 1)*dds_p.clk + 1 - - t_cycle = max(t_adc, t_iir, t_dds) - assert t_iir + (2 << iir_p.channel) < t_cycle, "need shifting time" + for iir, adc in zip(self.iir.adc, reversed(self.adc.data)): + self.comb += iir.eq(adc) + for dds, iir in zip(self.dds.profile, self.iir.dds): + self.comb += dds.eq(iir) + # If high, a new cycle is started if the current cycle (if any) is + # finished. Consequently, if low, servo iterations cease after the + # current cycle is finished. Don't care while the first step (ADC) + # is active. self.start = Signal() + + # Counter for delay between end of ADC cycle and start of next one, + # depending on the duration of the other steps. t_restart = t_cycle - t_adc + 1 assert t_restart > 1 cnt = Signal(max=t_restart) cnt_done = Signal() active = Signal(3) + + # Indicates whether different steps (0: ADC, 1: IIR, 2: DDS) are + # currently active (exposed for simulation only), with each bit being + # reset once the successor step is launched. Depending on the + # timing details of the different steps, any number can be concurrently + # active (e.g. ADC read from iteration n, IIR computation from iteration + # n - 1, and DDS write from iteration n - 2). + + # Asserted once per cycle when the DDS write has been completed. self.done = Signal() + self.sync += [ If(self.dds.done, active[2].eq(0) diff --git a/artiq/gateware/targets/kasli.py b/artiq/gateware/targets/kasli.py index 311028fcb..cf8b5760f 100755 --- a/artiq/gateware/targets/kasli.py +++ b/artiq/gateware/targets/kasli.py @@ -228,9 +228,9 @@ class SUServo(StandaloneBase): ttl_serdes_7series.Output_8X, ttl_serdes_7series.Output_8X) # EEM3/2: Sampler, EEM5/4: Urukul, EEM7/6: Urukul - eem.SUServo.add_std( - self, eems_sampler=(3, 2), - eems_urukul0=(5, 4), eems_urukul1=(7, 6)) + eem.SUServo.add_std(self, + eems_sampler=(3, 2), + eems_urukul=[[5, 4], [7, 6]]) for i in (1, 2): sfp_ctl = self.platform.request("sfp_ctl", i)