From 0f15e0ef70d85ddd2cc78a45f417aabd0c1c5204 Mon Sep 17 00:00:00 2001
From: linuswck <linuswck@m-labs.hk>
Date: Wed, 11 Dec 2024 10:59:49 +0800
Subject: [PATCH] Add patches to fix timing errors in gateware

---
 fast-servo/autolock_pipeline.patch      | 66 +++++++++++++++++++++++++
 fast-servo/iir_pipeline.patch           | 13 +++++
 fast-servo/linien_module_pipeline.patch | 57 +++++++++++++++++++++
 fast-servo/pid_pipeline.patch           | 64 ++++++++++++++++++++++++
 flake.nix                               |  6 ++-
 5 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 fast-servo/autolock_pipeline.patch
 create mode 100644 fast-servo/iir_pipeline.patch
 create mode 100644 fast-servo/linien_module_pipeline.patch
 create mode 100644 fast-servo/pid_pipeline.patch

diff --git a/fast-servo/autolock_pipeline.patch b/fast-servo/autolock_pipeline.patch
new file mode 100644
index 0000000..2e8c60c
--- /dev/null
+++ b/fast-servo/autolock_pipeline.patch
@@ -0,0 +1,66 @@
+diff --git a/gateware/logic/autolock.py b/gateware/logic/autolock.py
+index a6dc764..1a8407f 100644
+--- a/gateware/logic/autolock.py
++++ b/gateware/logic/autolock.py
+@@ -148,14 +148,17 @@ class RobustAutolock(Module, AutoCSR):
+         final_waited_for = Signal(bits_for(N_points))
+ 
+         # this is the signal that's used for detecting peaks
+-        sum_diff = Signal((len(self.sum_diff_calculator.output), True))
+-        abs_sum_diff = Signal.like(sum_diff)
++        self.sum_diff = Signal((len(self.sum_diff_calculator.output), True))
++        abs_sum_diff = Signal.like(self.sum_diff)
+         self.comb += [
+             self.sum_diff_calculator.writing_data_now.eq(self.writing_data_now),
+             self.sum_diff_calculator.restart.eq(self.at_start),
+             self.sum_diff_calculator.input.eq(self.input),
+             self.sum_diff_calculator.delay_value.eq(self.time_scale.storage),
+-            sum_diff.eq(self.sum_diff_calculator.output),
++        ]
++
++        self.sync += [
++            self.sum_diff.eq(self.sum_diff_calculator.output),
+         ]
+ 
+         # has this signal at the moment the same sign as the peak we are looking for?
+@@ -168,16 +171,17 @@ class RobustAutolock(Module, AutoCSR):
+         all_instructions_triggered = Signal()
+ 
+         self.comb += [
+-            sign_equal.eq((sum_diff > 0) == (current_peak_height > 0)),
+-            If(sum_diff >= 0, abs_sum_diff.eq(sum_diff)).Else(
+-                abs_sum_diff.eq(-1 * sum_diff)
++            sign_equal.eq((self.sum_diff > 0) == (current_peak_height > 0)),
++            If(self.sum_diff >= 0, abs_sum_diff.eq(self.sum_diff)).Else(
++                abs_sum_diff.eq(-1 * self.sum_diff)
+             ),
+             If(
+                 current_peak_height >= 0,
+                 abs_current_peak_height.eq(current_peak_height),
+             ).Else(abs_current_peak_height.eq(-1 * current_peak_height)),
+             over_threshold.eq(abs_sum_diff >= abs_current_peak_height),
+-            waited_long_enough.eq(waited_for > current_wait_for),
++            # HACK: To compensate the lock position output for the pipeline delay
++            waited_long_enough.eq((waited_for >= current_wait_for - 1) & (waited_for != 2 ** bits_for(N_points) - 1) & (current_wait_for - 1 != 2 ** bits_for(N_points) - 1)),
+             all_instructions_triggered.eq(
+                 self.current_instruction_idx >= self.N_instructions.storage
+             ),
+@@ -190,7 +194,7 @@ class RobustAutolock(Module, AutoCSR):
+         self.sync += [
+             If(
+                 self.at_start,
+-                waited_for.eq(0),
++                waited_for.eq(-1),
+                 # fpga robust autolock algorithm registeres trigger events delayed.
+                 # Therefore, we give it a head start for `final_waited_for`
+                 final_waited_for.eq(ROBUST_AUTOLOCK_FPGA_DELAY),
+@@ -213,7 +217,8 @@ class RobustAutolock(Module, AutoCSR):
+                         self.current_instruction_idx.eq(
+                             self.current_instruction_idx + 1
+                         ),
+-                        waited_for.eq(0),
++                        # HACK: To compensate the lock position output for the pipeline delay
++                        waited_for.eq(-1),
+                     ).Else(waited_for.eq(waited_for + 1)),
+                 ),
+                 If(
diff --git a/fast-servo/iir_pipeline.patch b/fast-servo/iir_pipeline.patch
new file mode 100644
index 0000000..b64b209
--- /dev/null
+++ b/fast-servo/iir_pipeline.patch
@@ -0,0 +1,13 @@
+diff --git a/gateware/logic/iir.py b/gateware/logic/iir.py
+index 2380dd7..60bfeb7 100644
+--- a/gateware/logic/iir.py
++++ b/gateware/logic/iir.py
+@@ -89,7 +89,7 @@ class Iir(Filter):
+                 zr = Signal.like(z)
+                 self.sync += zr.eq(z)
+                 z = Signal.like(zr)
+-                self.comb += z.eq(zr + signal * c[coeff])
++                self.sync += z.eq(zr + signal * c[coeff])
+             self.comb += y_next.eq(z)
+             self.latency.value = Constant(order + 1)
+             self.interval.value = Constant(1)
diff --git a/fast-servo/linien_module_pipeline.patch b/fast-servo/linien_module_pipeline.patch
new file mode 100644
index 0000000..4206247
--- /dev/null
+++ b/fast-servo/linien_module_pipeline.patch
@@ -0,0 +1,57 @@
+diff --git a/gateware/linien_module.py b/gateware/linien_module.py
+index a958896..a64714c 100644
+--- a/gateware/linien_module.py
++++ b/gateware/linien_module.py
+@@ -233,23 +233,46 @@ class LinienModule(Module, AutoCSR):
+             self.fast_a.adc.eq(soc.analog.adc_a),
+             self.fast_b.adc.eq(soc.analog.adc_b),
+         ]
+-
++        
+         # now, we combine the output of the two paths, with a variable factor each.
+         mixed = Signal(
+             (2 + ((signal_width + 1) + self.logic.chain_a_factor.size), True)
+         )
++
++        chain_a_factor_mult_fast_a_out_i = Signal(
++            (2 + ((signal_width + 1) + self.logic.chain_a_factor.size), True)
++        )
++
++        chain_b_factor_mult_fast_b_out_i = Signal(
++            (2 + ((signal_width + 1) + self.logic.chain_a_factor.size), True)
++        )
++        combined_offset_signed_left_shifted = Signal(
++            (2 + ((signal_width + 1) + self.logic.chain_a_factor.size), True)
++        )
++        fast_a_out_i_left_shifted = Signal(
++            (2 + ((signal_width + 1) + self.logic.chain_a_factor.size), True)
++        )
++
++        self.sync += [
++            chain_a_factor_mult_fast_a_out_i.eq(self.logic.chain_a_factor.storage * self.fast_a.out_i),
++            chain_b_factor_mult_fast_b_out_i.eq(self.logic.chain_b_factor.storage * self.fast_b.out_i),
++            combined_offset_signed_left_shifted.eq(self.logic.combined_offset_signed << (chain_factor_bits + s)),
++            fast_a_out_i_left_shifted.eq(self.fast_a.out_i << chain_factor_bits),
++        ] 
++
++
+         self.comb += [
+             If(
+                 self.logic.dual_channel.storage,
+                 mixed.eq(
+-                    (self.logic.chain_a_factor.storage * self.fast_a.out_i)
+-                    + (self.logic.chain_b_factor.storage * self.fast_b.out_i)
+-                    + (self.logic.combined_offset_signed << (chain_factor_bits + s))
++                    chain_a_factor_mult_fast_a_out_i
++                    + chain_b_factor_mult_fast_b_out_i
++                    + combined_offset_signed_left_shifted
+                 ),
+             ).Else(
+                 mixed.eq(
+-                    (self.fast_a.out_i << chain_factor_bits)
+-                    + (self.logic.combined_offset_signed << (chain_factor_bits + s))
++                    fast_a_out_i_left_shifted
++                    + combined_offset_signed_left_shifted
+                 )
+             )
+         ]
diff --git a/fast-servo/pid_pipeline.patch b/fast-servo/pid_pipeline.patch
new file mode 100644
index 0000000..26d2986
--- /dev/null
+++ b/fast-servo/pid_pipeline.patch
@@ -0,0 +1,64 @@
+diff --git a/gateware/logic/pid.py b/gateware/logic/pid.py
+index 4320f94..64e1a74 100644
+--- a/gateware/logic/pid.py
++++ b/gateware/logic/pid.py
+@@ -56,10 +56,13 @@ class PID(Module, AutoCSR):
+         self.comb += [kp_signed.eq(self.kp.storage)]
+ 
+         kp_mult = Signal((self.width + self.coeff_width, True))
+-        self.comb += [kp_mult.eq(self.error * kp_signed)]
++        kp_mult_reg = Signal((self.width + self.coeff_width, True))
++        self.sync += kp_mult.eq(kp_mult_reg >> (self.coeff_width - 2))
++
++        self.comb += [kp_mult_reg.eq(self.error * kp_signed)]
+ 
+         self.output_p = Signal((self.width, True))
+-        self.comb += [self.output_p.eq(kp_mult >> (self.coeff_width - 2))]
++        self.comb += [self.output_p.eq(kp_mult)]
+ 
+         self.kp_mult = kp_mult
+ 
+@@ -71,8 +74,10 @@ class PID(Module, AutoCSR):
+         self.comb += [ki_signed.eq(self.ki.storage)]
+ 
+         self.ki_mult = Signal((1 + self.width + self.coeff_width, True))
++        self.ki_mult_reg = Signal((1 + self.width + self.coeff_width, True))
++        self.sync += self.ki_mult.eq(self.ki_mult_reg)
++        self.comb += self.ki_mult_reg.eq((self.error * ki_signed) >> 4)
+ 
+-        self.comb += [self.ki_mult.eq((self.error * ki_signed) >> 4)]
+ 
+         int_reg_width = self.width + self.coeff_width + 4
+         extra_width = int_reg_width - self.width
+@@ -110,15 +115,17 @@ class PID(Module, AutoCSR):
+         self.kd = CSRStorage(self.coeff_width)
+         kd_signed = Signal((self.coeff_width, True))
+         kd_mult = Signal((mult_width, True))
++        kd_mult_reg = Signal((mult_width, True))
++        self.sync += kd_mult.eq(kd_mult_reg)
+ 
+-        self.comb += [kd_signed.eq(self.kd.storage), kd_mult.eq(self.error * kd_signed)]
++        self.comb += [kd_signed.eq(self.kd.storage), kd_mult_reg.eq(self.error * kd_signed >> (self.coeff_width - self.d_shift))]
+ 
+         kd_reg = Signal((out_width, True))
+         kd_reg_r = Signal((out_width, True))
+ 
+         self.output_d = Signal((out_width, True))
+         self.sync += [
+-            kd_reg.eq(kd_mult >> (self.coeff_width - self.d_shift)),
++            kd_reg.eq(kd_mult),
+             kd_reg_r.eq(kd_reg),
+             self.output_d.eq(kd_reg - kd_reg_r),
+         ]
+@@ -143,4 +150,12 @@ class PID(Module, AutoCSR):
+ 
+         # sync is required here, otherwise we get artifacts when one of the
+         # signals changes sign
+-        self.sync += [self.pid_sum.eq(self.output_p + self.int_out + self.output_d)]
++        self.sync += [
++            If(
++                self.running,
++                self.pid_sum.eq(self.output_p + self.int_out + self.output_d),
++            )
++            .Else(self.pid_sum.eq(0))
++        ]
diff --git a/flake.nix b/flake.nix
index d23d814..f3d89b0 100644
--- a/flake.nix
+++ b/flake.nix
@@ -164,7 +164,11 @@
         '';
         patches = [
           fast-servo/linien-fast-servo-gateware.patch
-          fast-servo/linien-fast-servo-server.patch
+          fast-servo/linien-fast-servo-server.patch 
+          fast-servo/autolock_pipeline.patch
+          fast-servo/iir_pipeline.patch
+          fast-servo/linien_module_pipeline.patch
+          fast-servo/pid_pipeline.patch
         ];
         nativeBuildInputs = [
           (pkgs.python3.withPackages(ps: [