forked from M-Labs/artiq
1
0
Fork 0

AD9910 driver feature extension and SUServo IIR readability (#1500)

* coredevice.ad9910: Add set_cfr2 function and extend arguments of set_cfr1 and set_sync

* SUServo: Wrap CPLD and DDS devices in a list

* SUServo: Refactor [nfc]

Co-authored-by: drmota <peter.drmota@physics.ox.ac.uk>
Co-authored-by: David Nadlinger <code@klickverbot.at>
This commit is contained in:
Peter Drmota 2021-11-15 05:09:16 +01:00 committed by GitHub
parent f0c50c80e6
commit 20e079a381
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 301 additions and 179 deletions

View File

@ -104,6 +104,9 @@ Breaking changes:
* ``quamash`` has been replaced with ``qasync``. * ``quamash`` has been replaced with ``qasync``.
* Protocols are updated to use device endian. * Protocols are updated to use device endian.
* Analyzer dump format includes a byte for device endianness. * Analyzer dump format includes a byte for device endianness.
* To support variable numbers of Urukul cards in the future, the
``artiq.coredevice.suservo.SUServo`` constructor now accepts two device name lists,
``cpld_devices`` and ``dds_devices``, rather than four individual arguments.
* Experiment classes with underscore-prefixed names are now ignored when ``artiq_client`` * Experiment classes with underscore-prefixed names are now ignored when ``artiq_client``
determines which experiment to submit (consistent with ``artiq_run``). determines which experiment to submit (consistent with ``artiq_run``).

View File

@ -374,18 +374,25 @@ class AD9910:
data[(n - preload) + i] = self.bus.read() data[(n - preload) + i] = self.bus.read()
@kernel @kernel
def set_cfr1(self, power_down: TInt32 = 0b0000, def set_cfr1(self,
power_down: TInt32 = 0b0000,
phase_autoclear: TInt32 = 0, phase_autoclear: TInt32 = 0,
drg_load_lrr: TInt32 = 0, drg_autoclear: TInt32 = 0, drg_load_lrr: TInt32 = 0,
internal_profile: TInt32 = 0, ram_destination: TInt32 = 0, drg_autoclear: TInt32 = 0,
ram_enable: TInt32 = 0, manual_osk_external: TInt32 = 0, phase_clear: TInt32 = 0,
osk_enable: TInt32 = 0, select_auto_osk: TInt32 = 0): internal_profile: TInt32 = 0,
ram_destination: TInt32 = 0,
ram_enable: TInt32 = 0,
manual_osk_external: TInt32 = 0,
osk_enable: TInt32 = 0,
select_auto_osk: TInt32 = 0):
"""Set CFR1. See the AD9910 datasheet for parameter meanings. """Set CFR1. See the AD9910 datasheet for parameter meanings.
This method does not pulse IO_UPDATE. This method does not pulse IO_UPDATE.
:param power_down: Power down bits. :param power_down: Power down bits.
:param phase_autoclear: Autoclear phase accumulator. :param phase_autoclear: Autoclear phase accumulator.
:param phase_clear: Asynchronous, static reset of the phase accumulator.
:param drg_load_lrr: Load digital ramp generator LRR. :param drg_load_lrr: Load digital ramp generator LRR.
:param drg_autoclear: Autoclear digital ramp generator. :param drg_autoclear: Autoclear digital ramp generator.
:param internal_profile: Internal profile control. :param internal_profile: Internal profile control.
@ -405,11 +412,41 @@ class AD9910:
(drg_load_lrr << 15) | (drg_load_lrr << 15) |
(drg_autoclear << 14) | (drg_autoclear << 14) |
(phase_autoclear << 13) | (phase_autoclear << 13) |
(phase_clear << 11) |
(osk_enable << 9) | (osk_enable << 9) |
(select_auto_osk << 8) | (select_auto_osk << 8) |
(power_down << 4) | (power_down << 4) |
2) # SDIO input only, MSB first 2) # SDIO input only, MSB first
@kernel
def set_cfr2(self,
asf_profile_enable: TInt32 = 1,
drg_enable: TInt32 = 0,
effective_ftw: TInt32 = 1,
sync_validation_disable: TInt32 = 0,
matched_latency_enable: TInt32 = 0):
"""Set CFR2. See the AD9910 datasheet for parameter meanings.
This method does not pulse IO_UPDATE.
:param asf_profile_enable: Enable amplitude scale from single tone profiles.
:param drg_enable: Digital ramp enable.
:param effective_ftw: Read effective FTW.
:param sync_validation_disable: Disable the SYNC_SMP_ERR pin indicating
(active high) detection of a synchronization pulse sampling error.
:param matched_latency_enable: Simultaneous application of amplitude,
phase, and frequency changes to the DDS arrive at the output
* matched_latency_enable = 0: in the order listed
* matched_latency_enable = 1: simultaneously.
"""
self.write32(_AD9910_REG_CFR2,
(asf_profile_enable << 24) |
(drg_enable << 19) |
(effective_ftw << 16) |
(matched_latency_enable << 7) |
(sync_validation_disable << 5))
@kernel @kernel
def init(self, blind: TBool = False): def init(self, blind: TBool = False):
"""Initialize and configure the DDS. """Initialize and configure the DDS.
@ -442,7 +479,7 @@ class AD9910:
# enable amplitude scale from profiles # enable amplitude scale from profiles
# read effective FTW # read effective FTW
# sync timing validation disable (enabled later) # sync timing validation disable (enabled later)
self.write32(_AD9910_REG_CFR2, 0x01010020) self.set_cfr2(sync_validation_disable=1)
self.cpld.io_update.pulse(1 * us) self.cpld.io_update.pulse(1 * us)
cfr3 = (0x0807c000 | (self.pll_vco << 24) | cfr3 = (0x0807c000 | (self.pll_vco << 24) |
(self.pll_cp << 19) | (self.pll_en << 8) | (self.pll_cp << 19) | (self.pll_en << 8) |
@ -465,7 +502,7 @@ class AD9910:
if i >= 100 - 1: if i >= 100 - 1:
raise ValueError("PLL lock timeout") raise ValueError("PLL lock timeout")
delay(10 * us) # slack delay(10 * us) # slack
if self.sync_data.sync_delay_seed >= 0: if self.sync_data.sync_delay_seed >= 0 and not blind:
self.tune_sync_delay(self.sync_data.sync_delay_seed) self.tune_sync_delay(self.sync_data.sync_delay_seed)
delay(1 * ms) delay(1 * ms)
@ -875,20 +912,26 @@ class AD9910:
self.cpld.cfg_sw(self.chip_select - 4, state) self.cpld.cfg_sw(self.chip_select - 4, state)
@kernel @kernel
def set_sync(self, in_delay: TInt32, window: TInt32): def set_sync(self,
in_delay: TInt32,
window: TInt32,
en_sync_gen: TInt32 = 0):
"""Set the relevant parameters in the multi device synchronization """Set the relevant parameters in the multi device synchronization
register. See the AD9910 datasheet for details. The SYNC clock register. See the AD9910 datasheet for details. The SYNC clock
generator preset value is set to zero, and the SYNC_OUT generator is generator preset value is set to zero, and the SYNC_OUT generator is
disabled. disabled by default.
:param in_delay: SYNC_IN delay tap (0-31) in steps of ~75ps :param in_delay: SYNC_IN delay tap (0-31) in steps of ~75ps
:param window: Symmetric SYNC_IN validation window (0-15) in :param window: Symmetric SYNC_IN validation window (0-15) in
steps of ~75ps for both hold and setup margin. steps of ~75ps for both hold and setup margin.
:param en_sync_gen: Whether to enable the DDS-internal sync generator
(SYNC_OUT, cf. sync_sel == 1). Should be left off for the normal
use case, where the SYNC clock is supplied by the core device.
""" """
self.write32(_AD9910_REG_SYNC, self.write32(_AD9910_REG_SYNC,
(window << 28) | # SYNC S/H validation delay (window << 28) | # SYNC S/H validation delay
(1 << 27) | # SYNC receiver enable (1 << 27) | # SYNC receiver enable
(0 << 26) | # SYNC generator disable (en_sync_gen << 26) | # SYNC generator enable
(0 << 25) | # SYNC generator SYS rising edge (0 << 25) | # SYNC generator SYS rising edge
(0 << 18) | # SYNC preset (0 << 18) | # SYNC preset
(0 << 11) | # SYNC output delay (0 << 11) | # SYNC output delay
@ -904,9 +947,10 @@ class AD9910:
Also modifies CFR2. Also modifies CFR2.
""" """
self.write32(_AD9910_REG_CFR2, 0x01010020) # clear SMP_ERR self.set_cfr2(sync_validation_disable=1) # clear SMP_ERR
self.cpld.io_update.pulse(1 * us) self.cpld.io_update.pulse(1 * us)
self.write32(_AD9910_REG_CFR2, 0x01010000) # enable SMP_ERR delay(10 * us) # slack
self.set_cfr2(sync_validation_disable=0) # enable SMP_ERR
self.cpld.io_update.pulse(1 * us) self.cpld.io_update.pulse(1 * us)
@kernel @kernel
@ -984,7 +1028,7 @@ class AD9910:
# set up DRG # set up DRG
self.set_cfr1(drg_load_lrr=1, drg_autoclear=1) self.set_cfr1(drg_load_lrr=1, drg_autoclear=1)
# DRG -> FTW, DRG enable # DRG -> FTW, DRG enable
self.write32(_AD9910_REG_CFR2, 0x01090000) self.set_cfr2(drg_enable=1)
# no limits # no limits
self.write64(_AD9910_REG_RAMP_LIMIT, -1, 0) self.write64(_AD9910_REG_RAMP_LIMIT, -1, 0)
# DRCTL=0, dt=1 t_SYNC_CLK # DRCTL=0, dt=1 t_SYNC_CLK
@ -1005,7 +1049,7 @@ class AD9910:
ftw = self.read32(_AD9910_REG_FTW) # read out effective FTW ftw = self.read32(_AD9910_REG_FTW) # read out effective FTW
delay(100 * us) # slack delay(100 * us) # slack
# disable DRG # disable DRG
self.write32(_AD9910_REG_CFR2, 0x01010000) self.set_cfr2(drg_enable=0)
self.cpld.io_update.pulse_mu(8) self.cpld.io_update.pulse_mu(8)
return ftw & 1 return ftw & 1

View File

@ -57,32 +57,26 @@ class SUServo:
:param channel: RTIO channel number :param channel: RTIO channel number
:param pgia_device: Name of the Sampler PGIA gain setting SPI bus :param pgia_device: Name of the Sampler PGIA gain setting SPI bus
:param cpld0_device: Name of the first Urukul CPLD SPI bus :param cpld_devices: Names of the Urukul CPLD SPI buses
:param cpld1_device: Name of the second Urukul CPLD SPI bus :param dds_devices: Names of the AD9910 devices
:param dds0_device: Name of the AD9910 device for the DDS on the first
Urukul
:param dds1_device: Name of the AD9910 device for the DDS on the second
Urukul
:param gains: Initial value for PGIA gains shift register :param gains: Initial value for PGIA gains shift register
(default: 0x0000). Knowledge of this state is not transferred (default: 0x0000). Knowledge of this state is not transferred
between experiments. between experiments.
:param core_device: Core device name :param core_device: Core device name
""" """
kernel_invariants = {"channel", "core", "pgia", "cpld0", "cpld1", kernel_invariants = {"channel", "core", "pgia", "cplds", "ddses",
"dds0", "dds1", "ref_period_mu"} "ref_period_mu"}
def __init__(self, dmgr, channel, pgia_device, def __init__(self, dmgr, channel, pgia_device,
cpld0_device, cpld1_device, cpld_devices, dds_devices,
dds0_device, dds1_device,
gains=0x0000, core_device="core"): gains=0x0000, core_device="core"):
self.core = dmgr.get(core_device) self.core = dmgr.get(core_device)
self.pgia = dmgr.get(pgia_device) self.pgia = dmgr.get(pgia_device)
self.pgia.update_xfer_duration_mu(div=4, length=16) self.pgia.update_xfer_duration_mu(div=4, length=16)
self.dds0 = dmgr.get(dds0_device) assert len(dds_devices) == len(cpld_devices)
self.dds1 = dmgr.get(dds1_device) self.ddses = [dmgr.get(dds) for dds in dds_devices]
self.cpld0 = dmgr.get(cpld0_device) self.cplds = [dmgr.get(cpld) for cpld in cpld_devices]
self.cpld1 = dmgr.get(cpld1_device)
self.channel = channel self.channel = channel
self.gains = gains self.gains = gains
self.ref_period_mu = self.core.seconds_to_mu( self.ref_period_mu = self.core.seconds_to_mu(
@ -109,17 +103,15 @@ class SUServo:
sampler.SPI_CONFIG | spi.SPI_END, sampler.SPI_CONFIG | spi.SPI_END,
16, 4, sampler.SPI_CS_PGIA) 16, 4, sampler.SPI_CS_PGIA)
self.cpld0.init(blind=True) for i in range(len(self.cplds)):
cfg0 = self.cpld0.cfg_reg cpld = self.cplds[i]
self.cpld0.cfg_write(cfg0 | (0xf << urukul.CFG_MASK_NU)) dds = self.ddses[i]
self.dds0.init(blind=True)
self.cpld0.cfg_write(cfg0)
self.cpld1.init(blind=True) cpld.init(blind=True)
cfg1 = self.cpld1.cfg_reg prev_cpld_cfg = cpld.cfg_reg
self.cpld1.cfg_write(cfg1 | (0xf << urukul.CFG_MASK_NU)) cpld.cfg_write(prev_cpld_cfg | (0xf << urukul.CFG_MASK_NU))
self.dds1.init(blind=True) dds.init(blind=True)
self.cpld1.cfg_write(cfg1) cpld.cfg_write(prev_cpld_cfg)
@kernel @kernel
def write(self, addr, value): def write(self, addr, value):
@ -257,9 +249,11 @@ class Channel:
self.servo = dmgr.get(servo_device) self.servo = dmgr.get(servo_device)
self.core = self.servo.core self.core = self.servo.core
self.channel = channel self.channel = channel
# FIXME: this assumes the mem channel is right after the control # This assumes the mem channel is right after the control channels
# channels # Make sure this is always the case in eem.py
self.servo_channel = self.channel + 8 - self.servo.channel self.servo_channel = (self.channel + 4 * len(self.servo.cplds) -
self.servo.channel)
self.dds = self.servo.ddses[self.servo_channel // 4]
@kernel @kernel
def set(self, en_out, en_iir=0, profile=0): def set(self, en_out, en_iir=0, profile=0):
@ -311,12 +305,8 @@ class Channel:
see :meth:`dds_offset_to_mu` see :meth:`dds_offset_to_mu`
:param phase: DDS phase in turns :param phase: DDS phase in turns
""" """
if self.servo_channel < 4: ftw = self.dds.frequency_to_ftw(frequency)
dds = self.servo.dds0 pow_ = self.dds.turns_to_pow(phase)
else:
dds = self.servo.dds1
ftw = dds.frequency_to_ftw(frequency)
pow_ = dds.turns_to_pow(phase)
offs = self.dds_offset_to_mu(offset) offs = self.dds_offset_to_mu(offset)
self.set_dds_mu(profile, ftw, offs, pow_) self.set_dds_mu(profile, ftw, offs, pow_)

View File

@ -191,10 +191,8 @@ device_db = {
"arguments": { "arguments": {
"channel": 24, "channel": 24,
"pgia_device": "spi_sampler0_pgia", "pgia_device": "spi_sampler0_pgia",
"cpld0_device": "urukul0_cpld", "cpld_devices": ["urukul0_cpld", "urukul1_cpld"],
"cpld1_device": "urukul1_cpld", "dds_devices": ["urukul0_dds", "urukul1_dds"],
"dds0_device": "urukul0_dds",
"dds1_device": "urukul1_dds"
} }
}, },

View File

@ -364,8 +364,7 @@ class PeripheralManager:
def process_suservo(self, rtio_offset, peripheral): def process_suservo(self, rtio_offset, peripheral):
suservo_name = self.get_name("suservo") suservo_name = self.get_name("suservo")
sampler_name = self.get_name("sampler") sampler_name = self.get_name("sampler")
urukul0_name = self.get_name("urukul") urukul_names = [self.get_name("urukul") for _ in range(2)]
urukul1_name = self.get_name("urukul")
channel = count(0) channel = count(0)
for i in range(8): for i in range(8):
self.gen(""" self.gen("""
@ -386,16 +385,14 @@ class PeripheralManager:
"arguments": {{ "arguments": {{
"channel": 0x{suservo_channel:06x}, "channel": 0x{suservo_channel:06x},
"pgia_device": "spi_{sampler_name}_pgia", "pgia_device": "spi_{sampler_name}_pgia",
"cpld0_device": "{urukul0_name}_cpld", "cpld_devices": {cpld_names_list},
"cpld1_device": "{urukul1_name}_cpld", "dds_devices": {dds_names_list}
"dds0_device": "{urukul0_name}_dds",
"dds1_device": "{urukul1_name}_dds"
}} }}
}}""", }}""",
suservo_name=suservo_name, suservo_name=suservo_name,
sampler_name=sampler_name, sampler_name=sampler_name,
urukul0_name=urukul0_name, cpld_names_list=[urukul_name + "_cpld" for urukul_name in urukul_names],
urukul1_name=urukul1_name, dds_names_list=[urukul_name + "_dds" for urukul_name in urukul_names],
suservo_channel=rtio_offset+next(channel)) suservo_channel=rtio_offset+next(channel))
self.gen(""" self.gen("""
device_db["spi_{sampler_name}_pgia"] = {{ device_db["spi_{sampler_name}_pgia"] = {{
@ -407,7 +404,7 @@ class PeripheralManager:
sampler_name=sampler_name, sampler_name=sampler_name,
sampler_channel=rtio_offset+next(channel)) sampler_channel=rtio_offset+next(channel))
pll_vco = peripheral.get("pll_vco") pll_vco = peripheral.get("pll_vco")
for urukul_name in (urukul0_name, urukul1_name): for urukul_name in urukul_names:
self.gen(""" self.gen("""
device_db["spi_{urukul_name}"] = {{ device_db["spi_{urukul_name}"] = {{
"type": "local", "type": "local",

View File

@ -0,0 +1,10 @@
"""Gateware implementation of the Sampler-Urukul (AD9910) DDS amplitude servo.
General conventions:
- ``t_...`` signals and constants refer to time spans measured in the gateware
module's default clock (typically a 125 MHz RTIO clock).
- ``start`` signals cause modules to proceed with the next servo iteration iff
they are currently idle (i.e. their value is irrelevant while the module is
busy, so they are not necessarily one-clock-period strobes).
"""

View File

@ -1,9 +1,7 @@
from collections import namedtuple from collections import namedtuple
import logging import logging
from migen import * from migen import *
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -222,14 +220,13 @@ class IIR(Module):
assert w.word <= w.coeff # same memory assert w.word <= w.coeff # same memory
assert w.state + w.coeff + 3 <= w.accu assert w.state + w.coeff + 3 <= w.accu
# m_coeff of active profiles should only be accessed during # m_coeff of active profiles should only be accessed externally during
# ~processing # ~processing
self.specials.m_coeff = Memory( self.specials.m_coeff = Memory(
width=2*w.coeff, # Cat(pow/ftw/offset, cfg/a/b) width=2*w.coeff, # Cat(pow/ftw/offset, cfg/a/b)
depth=4 << w.profile + w.channel) depth=4 << w.profile + w.channel)
# m_state[x] should only be read during ~(shifting | # m_state[x] should only be read externally during ~(shifting | loading)
# loading) # m_state[y] of active profiles should only be read externally during
# m_state[y] of active profiles should only be read during
# ~processing # ~processing
self.specials.m_state = Memory( self.specials.m_state = Memory(
width=w.state, # y1,x0,x1 width=w.state, # y1,x0,x1
@ -246,7 +243,7 @@ class IIR(Module):
self.adc = [Signal((w.adc, True), reset_less=True) self.adc = [Signal((w.adc, True), reset_less=True)
for i in range(1 << w.channel)] for i in range(1 << w.channel)]
# Cat(ftw0, ftw1, pow, asf) # Cat(ftw0, ftw1, pow, asf)
# only read during ~processing # only read externally during ~processing
self.dds = [Signal(4*w.word, reset_less=True) self.dds = [Signal(4*w.word, reset_less=True)
for i in range(1 << w.channel)] for i in range(1 << w.channel)]
# perform one IIR iteration, start with loading, # perform one IIR iteration, start with loading,
@ -270,100 +267,116 @@ class IIR(Module):
en_iirs = Array([ch.en_iir for ch in self.ctrl]) en_iirs = Array([ch.en_iir for ch in self.ctrl])
clips = Array([ch.clip for ch in self.ctrl]) clips = Array([ch.clip for ch in self.ctrl])
# state counter # Main state machine sequencing the steps of each servo iteration. The
state = Signal(w.channel + 2) # module IDLEs until self.start is asserted, and then runs through LOAD,
# pipeline group activity flags (SR) # PROCESS and SHIFT in order (see description of corresponding flags
stage = Signal(3) # above). The steps share the same memory ports, and are executed
# strictly sequentially.
#
# LOAD/SHIFT just read/write one address per cycle; the duration needed
# to iterate over all channels is determined by counting cycles.
#
# The PROCESSing step is split across a three-stage pipeline, where each
# stage has up to four clock cycles latency. We feed the first stage
# using the (MSBs of) t_current_step, and, after all channels have been
# covered, proceed once the pipeline has completely drained.
self.submodules.fsm = fsm = FSM("IDLE") self.submodules.fsm = fsm = FSM("IDLE")
state_clr = Signal() t_current_step = Signal(w.channel + 2)
stage_en = Signal() t_current_step_clr = Signal()
# pipeline group activity flags (SR)
# 0: load from memory
# 1: compute
# 2: write to output registers (DDS profiles, clip flags)
stages_active = Signal(3)
fsm.act("IDLE", fsm.act("IDLE",
self.done.eq(1), self.done.eq(1),
state_clr.eq(1), t_current_step_clr.eq(1),
If(self.start, If(self.start,
NextState("LOAD") NextState("LOAD")
) )
) )
fsm.act("LOAD", fsm.act("LOAD",
self.loading.eq(1), self.loading.eq(1),
If(state == (1 << w.channel) - 1, If(t_current_step == (1 << w.channel) - 1,
state_clr.eq(1), t_current_step_clr.eq(1),
stage_en.eq(1), NextValue(stages_active[0], 1),
NextState("PROCESS") NextState("PROCESS")
) )
) )
fsm.act("PROCESS", fsm.act("PROCESS",
self.processing.eq(1), self.processing.eq(1),
# this is technically wasting three cycles # this is technically wasting three cycles
# (one for setting stage, and phase=2,3 with stage[2]) # (one for setting stages_active, and phase=2,3 with stages_active[2])
If(stage == 0, If(stages_active == 0,
state_clr.eq(1), t_current_step_clr.eq(1),
NextState("SHIFT") NextState("SHIFT"),
) )
) )
fsm.act("SHIFT", fsm.act("SHIFT",
self.shifting.eq(1), self.shifting.eq(1),
If(state == (2 << w.channel) - 1, If(t_current_step == (2 << w.channel) - 1,
NextState("IDLE") NextState("IDLE")
) )
) )
self.sync += [ self.sync += [
state.eq(state + 1), If(t_current_step_clr,
If(state_clr, t_current_step.eq(0)
state.eq(0), ).Else(
), t_current_step.eq(t_current_step + 1)
If(stage_en,
stage[0].eq(1)
) )
] ]
# pipeline group channel pointer # global pipeline phase (lower two bits of t_current_step)
pipeline_phase = Signal(2, reset_less=True)
# pipeline group channel pointer (SR)
# for each pipeline stage, this is the channel currently being # for each pipeline stage, this is the channel currently being
# processed # processed
channel = [Signal(w.channel, reset_less=True) for i in range(3)] channel = [Signal(w.channel, reset_less=True) for i in range(3)]
self.comb += Cat(pipeline_phase, channel[0]).eq(t_current_step)
self.sync += [
If(pipeline_phase == 3,
Cat(channel[1:]).eq(Cat(channel[:-1])),
stages_active[1:].eq(stages_active[:-1]),
If(channel[0] == (1 << w.channel) - 1,
stages_active[0].eq(0)
)
)
]
# pipeline group profile pointer (SR) # pipeline group profile pointer (SR)
# for each pipeline stage, this is the profile currently being # for each pipeline stage, this is the profile currently being
# processed # processed
profile = [Signal(w.profile, reset_less=True) for i in range(2)] profile = [Signal(w.profile, reset_less=True) for i in range(2)]
# pipeline phase (lower two bits of state)
phase = Signal(2, reset_less=True)
self.comb += Cat(phase, channel[0]).eq(state)
self.sync += [ self.sync += [
Case(phase, { If(pipeline_phase == 0,
0: [
profile[0].eq(profiles[channel[0]]), profile[0].eq(profiles[channel[0]]),
profile[1].eq(profile[0]) profile[1].eq(profile[0]),
],
3: [
Cat(channel[1:]).eq(Cat(channel[:-1])),
stage[1:].eq(stage[:-1]),
If(channel[0] == (1 << w.channel) - 1,
stage[0].eq(0)
) )
] ]
})
]
m_coeff = self.m_coeff.get_port() m_coeff = self.m_coeff.get_port()
m_state = self.m_state.get_port(write_capable=True) # mode=READ_FIRST m_state = self.m_state.get_port(write_capable=True) # mode=READ_FIRST
self.specials += m_state, m_coeff self.specials += m_state, m_coeff
#
# Hook up main IIR filter.
#
dsp = DSP(w) dsp = DSP(w)
self.submodules += dsp self.submodules += dsp
offset_clr = Signal() offset_clr = Signal()
self.comb += [ self.comb += [
m_coeff.adr.eq(Cat(phase, profile[0], m_coeff.adr.eq(Cat(pipeline_phase, profile[0],
Mux(phase==0, channel[1], channel[0]))), Mux(pipeline_phase == 0, channel[1], channel[0]))),
dsp.offset[-w.coeff - 1:].eq(Mux(offset_clr, 0, dsp.offset[-w.coeff - 1:].eq(Mux(offset_clr, 0,
Cat(m_coeff.dat_r[:w.coeff], m_coeff.dat_r[w.coeff - 1]) Cat(m_coeff.dat_r[:w.coeff], m_coeff.dat_r[w.coeff - 1])
)), )),
dsp.coeff.eq(m_coeff.dat_r[w.coeff:]), dsp.coeff.eq(m_coeff.dat_r[w.coeff:]),
dsp.state.eq(m_state.dat_r), dsp.state.eq(m_state.dat_r),
Case(phase, { Case(pipeline_phase, {
0: dsp.accu_clr.eq(1), 0: dsp.accu_clr.eq(1),
2: [ 2: [
offset_clr.eq(1), offset_clr.eq(1),
@ -373,6 +386,11 @@ class IIR(Module):
}) })
] ]
#
# Arbitrate state memory access between steps.
#
# selected adc and profile delay (combinatorial from dat_r) # selected adc and profile delay (combinatorial from dat_r)
# both share the same coeff word (sel in the lower 8 bits) # both share the same coeff word (sel in the lower 8 bits)
sel_profile = Signal(w.channel) sel_profile = Signal(w.channel)
@ -389,13 +407,13 @@ class IIR(Module):
sel_profile.eq(m_coeff.dat_r[w.coeff:]), sel_profile.eq(m_coeff.dat_r[w.coeff:]),
dly_profile.eq(m_coeff.dat_r[w.coeff + 8:]), dly_profile.eq(m_coeff.dat_r[w.coeff + 8:]),
If(self.shifting, If(self.shifting,
m_state.adr.eq(state | (1 << w.profile + w.channel)), m_state.adr.eq(t_current_step | (1 << w.profile + w.channel)),
m_state.dat_w.eq(m_state.dat_r), m_state.dat_w.eq(m_state.dat_r),
m_state.we.eq(state[0]) m_state.we.eq(t_current_step[0])
), ),
If(self.loading, If(self.loading,
m_state.adr.eq((state << 1) | (1 << w.profile + w.channel)), m_state.adr.eq((t_current_step << 1) | (1 << w.profile + w.channel)),
m_state.dat_w[-w.adc - 1:-1].eq(Array(self.adc)[state]), m_state.dat_w[-w.adc - 1:-1].eq(Array(self.adc)[t_current_step]),
m_state.dat_w[-1].eq(m_state.dat_w[-2]), m_state.dat_w[-1].eq(m_state.dat_w[-2]),
m_state.we.eq(1) m_state.we.eq(1)
), ),
@ -405,16 +423,20 @@ class IIR(Module):
Cat(profile[1], channel[2]), Cat(profile[1], channel[2]),
# read old y # read old y
Cat(profile[0], channel[0]), Cat(profile[0], channel[0]),
# x0 (recent) # read x0 (recent)
0 | (sel_profile << 1) | (1 << w.profile + w.channel), 0 | (sel_profile << 1) | (1 << w.profile + w.channel),
# x1 (old) # read x1 (old)
1 | (sel << 1) | (1 << w.profile + w.channel), 1 | (sel << 1) | (1 << w.profile + w.channel),
])[phase]), ])[pipeline_phase]),
m_state.dat_w.eq(dsp.output), m_state.dat_w.eq(dsp.output),
m_state.we.eq((phase == 0) & stage[2] & en[1]), m_state.we.eq((pipeline_phase == 0) & stages_active[2] & en[1]),
) )
] ]
#
# Compute auxiliary signals (delayed servo enable, clip indicators, etc.).
#
# internal channel delay counters # internal channel delay counters
dlys = Array([Signal(w.dly) dlys = Array([Signal(w.dly)
for i in range(1 << w.channel)]) for i in range(1 << w.channel)])
@ -434,48 +456,62 @@ class IIR(Module):
en_out = Signal(reset_less=True) en_out = Signal(reset_less=True)
# latched channel en_iir # latched channel en_iir
en_iir = Signal(reset_less=True) en_iir = Signal(reset_less=True)
# muxing
ddss = Array(self.dds)
self.sync += [ self.sync += [
Case(phase, { Case(pipeline_phase, {
0: [ 0: [
dly.eq(dlys[channel[0]]), dly.eq(dlys[channel[0]]),
en_out.eq(en_outs[channel[0]]), en_out.eq(en_outs[channel[0]]),
en_iir.eq(en_iirs[channel[0]]), en_iir.eq(en_iirs[channel[0]]),
If(stage[1], If(stages_active[2] & en[1] & dsp.clip,
ddss[channel[1]][:w.word].eq(m_coeff.dat_r)
),
If(stage[2] & en[1] & dsp.clip,
clips[channel[2]].eq(1) clips[channel[2]].eq(1)
) )
], ],
1: [
If(stage[1],
ddss[channel[1]][w.word:2*w.word].eq(
m_coeff.dat_r),
),
If(stage[2],
ddss[channel[2]][3*w.word:].eq(
m_state.dat_r[w.state - w.asf - 1:w.state - 1])
)
],
2: [ 2: [
en[0].eq(0), en[0].eq(0),
en[1].eq(en[0]), en[1].eq(en[0]),
sel.eq(sel_profile), sel.eq(sel_profile),
If(stage[0], If(stages_active[0] & en_out,
ddss[channel[0]][2*w.word:3*w.word].eq(
m_coeff.dat_r),
If(en_out,
If(dly != dly_profile, If(dly != dly_profile,
dlys[channel[0]].eq(dly + 1) dlys[channel[0]].eq(dly + 1)
).Elif(en_iir, ).Elif(en_iir,
en[0].eq(1) en[0].eq(1)
) )
) )
],
}),
]
#
# Update DDS profile with FTW/POW/ASF
# Stage 0 loads the POW, stage 1 the FTW, and stage 2 writes
# the ASF computed by the IIR filter.
#
# muxing
ddss = Array(self.dds)
self.sync += [
Case(pipeline_phase, {
0: [
If(stages_active[1],
ddss[channel[1]][:w.word].eq(m_coeff.dat_r), # ftw0
),
],
1: [
If(stages_active[1],
ddss[channel[1]][w.word:2 * w.word].eq(m_coeff.dat_r), # ftw1
),
If(stages_active[2],
ddss[channel[2]][3*w.word:].eq( # asf
m_state.dat_r[w.state - w.asf - 1:w.state - 1])
) )
], ],
2: [
If(stages_active[0],
ddss[channel[0]][2*w.word:3*w.word].eq(m_coeff.dat_r), # pow
),
],
3: [ 3: [
], ],
}), }),

View File

@ -5,32 +5,76 @@ from .iir import IIR, IIRWidths
from .dds_ser import DDS, DDSParams from .dds_ser import DDS, DDSParams
def predict_timing(adc_p, iir_p, dds_p):
"""
The following is a sketch of the timing for 1 Sampler (8 ADCs) and N Urukuls
Shown here, the cycle duration is limited by the IIR loading+processing time.
ADC|CONVH|CONV|READ|RTT|IDLE|CONVH|CONV|READ|RTT|IDLE|CONVH|CONV|READ|RTT|...
|4 |57 |16 |8 | .. |4 |57 |16 |8 | .. |4 |57 |16 |8 |...
---+-------------------+------------------------+------------------------+---
IIR| |LOAD|PROC |SHIFT|LOAD|PROC |SHIFT|...
| |8 |16*N+9 |16 |8 |16*N+9 |16 |...
---+--------------------------------------+------------------------+---------
DDS| |CMD|PROF|WAIT|IO_UP|IDLE|CMD|PR...
| |16 |128 |1 |1 | .. |16 | ...
IIR loading starts once the ADC presents its data, the DDSes are updated
once the IIR processing is over. These are the only blocking processes.
IIR shifting happens in parallel to writing to the DDSes and ADC conversions
take place while the IIR filter is processing or the DDSes are being
written to, depending on the cycle duration (given by whichever module
takes the longest).
"""
t_adc = (adc_p.t_cnvh + adc_p.t_conv + adc_p.t_rtt +
adc_p.channels*adc_p.width//adc_p.lanes) + 1
# load adc_p.channels values, process dds_p.channels
# (4 processing phases and 2 additional stages à 4 phases
# to complete the processing of the last channel)
t_iir = adc_p.channels + 4*dds_p.channels + 8 + 1
t_dds = (dds_p.width*2 + 1)*dds_p.clk + 1
t_cycle = max(t_adc, t_iir, t_dds)
return t_adc, t_iir, t_dds, t_cycle
class Servo(Module): class Servo(Module):
def __init__(self, adc_pads, dds_pads, adc_p, iir_p, dds_p): def __init__(self, adc_pads, dds_pads, adc_p, iir_p, dds_p):
t_adc, t_iir, t_dds, t_cycle = predict_timing(adc_p, iir_p, dds_p)
assert t_iir + 2*adc_p.channels < t_cycle, "need shifting time"
self.submodules.adc = ADC(adc_pads, adc_p) self.submodules.adc = ADC(adc_pads, adc_p)
self.submodules.iir = IIR(iir_p) self.submodules.iir = IIR(iir_p)
self.submodules.dds = DDS(dds_pads, dds_p) self.submodules.dds = DDS(dds_pads, dds_p)
# adc channels are reversed on Sampler # adc channels are reversed on Sampler
for i, j, k, l in zip(reversed(self.adc.data), self.iir.adc, for iir, adc in zip(self.iir.adc, reversed(self.adc.data)):
self.iir.dds, self.dds.profile): self.comb += iir.eq(adc)
self.comb += j.eq(i), l.eq(k) for dds, iir in zip(self.dds.profile, self.iir.dds):
self.comb += dds.eq(iir)
t_adc = (adc_p.t_cnvh + adc_p.t_conv + adc_p.t_rtt +
adc_p.channels*adc_p.width//adc_p.lanes) + 1
t_iir = ((1 + 4 + 1) << iir_p.channel) + 1
t_dds = (dds_p.width*2 + 1)*dds_p.clk + 1
t_cycle = max(t_adc, t_iir, t_dds)
assert t_iir + (2 << iir_p.channel) < t_cycle, "need shifting time"
# If high, a new cycle is started if the current cycle (if any) is
# finished. Consequently, if low, servo iterations cease after the
# current cycle is finished. Don't care while the first step (ADC)
# is active.
self.start = Signal() self.start = Signal()
# Counter for delay between end of ADC cycle and start of next one,
# depending on the duration of the other steps.
t_restart = t_cycle - t_adc + 1 t_restart = t_cycle - t_adc + 1
assert t_restart > 1 assert t_restart > 1
cnt = Signal(max=t_restart) cnt = Signal(max=t_restart)
cnt_done = Signal() cnt_done = Signal()
active = Signal(3) active = Signal(3)
# Indicates whether different steps (0: ADC, 1: IIR, 2: DDS) are
# currently active (exposed for simulation only), with each bit being
# reset once the successor step is launched. Depending on the
# timing details of the different steps, any number can be concurrently
# active (e.g. ADC read from iteration n, IIR computation from iteration
# n - 1, and DDS write from iteration n - 2).
# Asserted once per cycle when the DDS write has been completed.
self.done = Signal() self.done = Signal()
self.sync += [ self.sync += [
If(self.dds.done, If(self.dds.done,
active[2].eq(0) active[2].eq(0)

View File

@ -228,9 +228,9 @@ class SUServo(StandaloneBase):
ttl_serdes_7series.Output_8X, ttl_serdes_7series.Output_8X) ttl_serdes_7series.Output_8X, ttl_serdes_7series.Output_8X)
# EEM3/2: Sampler, EEM5/4: Urukul, EEM7/6: Urukul # EEM3/2: Sampler, EEM5/4: Urukul, EEM7/6: Urukul
eem.SUServo.add_std( eem.SUServo.add_std(self,
self, eems_sampler=(3, 2), eems_sampler=(3, 2),
eems_urukul0=(5, 4), eems_urukul1=(7, 6)) eems_urukul=[[5, 4], [7, 6]])
for i in (1, 2): for i in (1, 2):
sfp_ctl = self.platform.request("sfp_ctl", i) sfp_ctl = self.platform.request("sfp_ctl", i)