From 029bbc9ecccef33842486c664208b55aa8ea9979 Mon Sep 17 00:00:00 2001 From: Vasil Nikolov Date: Mon, 24 Apr 2023 00:46:06 +0300 Subject: [PATCH 1/7] add unit test for variance --- src/base/mod.rs | 2 ++ src/base/variance_test.rs | 7 +++++++ 2 files changed, 9 insertions(+) create mode 100644 src/base/variance_test.rs diff --git a/src/base/mod.rs b/src/base/mod.rs index 0f09cc33..1eabbfcf 100644 --- a/src/base/mod.rs +++ b/src/base/mod.rs @@ -64,3 +64,5 @@ pub use self::matrix_view::*; pub use self::storage::*; #[cfg(any(feature = "std", feature = "alloc"))] pub use self::vec_storage::*; + +mod variance_test; diff --git a/src/base/variance_test.rs b/src/base/variance_test.rs new file mode 100644 index 00000000..eec5f7e4 --- /dev/null +++ b/src/base/variance_test.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +use crate::DVector; +#[test] +fn test_variance_new() { + let v = DVector::repeat(10_000, 100000000.1234); + assert_eq!(v.variance(), 0.0) +} From 032002dce964391e016bf880c894993246aec6eb Mon Sep 17 00:00:00 2001 From: Vasil Nikolov Date: Mon, 24 Apr 2023 01:22:57 +0300 Subject: [PATCH 2/7] initial, unoptimized algoritm --- src/base/statistics.rs | 27 +++++++++++++++++++++------ src/base/variance_test.rs | 2 +- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/base/statistics.rs b/src/base/statistics.rs index 9f0e0ee6..bf2c40d8 100644 --- a/src/base/statistics.rs +++ b/src/base/statistics.rs @@ -335,12 +335,27 @@ impl> Matrix { if self.is_empty() { T::zero() } else { - let val = self.iter().cloned().fold((T::zero(), T::zero()), |a, b| { - (a.0 + b.clone() * b.clone(), a.1 + b) - }); - let denom = T::one() / crate::convert::<_, T>(self.len() as f64); - let vd = val.1 * denom.clone(); - val.0 * denom - vd.clone() * vd + // let val = self.iter().cloned().fold((T::zero(), T::zero()), |a, b| { + // (a.0 + b.clone() * b.clone(), a.1 + b) + // }); + // let denom = T::one() / crate::convert::<_, T>(self.len() as f64); + // let vd = val.1 * denom.clone(); + // val.0 * denom - vd.clone() * vd + // let mean: T = self.iter().map(|&entry| entry).sum::(); + // + // let x: Vec = (0..1000).map(|_| T::zero()).collect(); + // let s: T = x.iter().cloned().fold(T::zero(), |a, b| a + b); + + // cannot use sum since `T` is not `Sum` by trait bounds + let total_sum = self.iter().cloned().fold(T::zero(), |a, b| a + b); + let n_elements = crate::convert::<_, T>(self.len() as f64); + let mean = total_sum / n_elements.clone(); + + let variance = self.iter().cloned().fold(T::zero(), |acc, x| { + acc + (x.clone() - mean.clone()) * (x.clone() - mean.clone()) + }) / n_elements.clone(); + + variance } } diff --git a/src/base/variance_test.rs b/src/base/variance_test.rs index eec5f7e4..af5d22af 100644 --- a/src/base/variance_test.rs +++ b/src/base/variance_test.rs @@ -2,6 +2,6 @@ use crate::DVector; #[test] fn test_variance_new() { - let v = DVector::repeat(10_000, 100000000.1234); + let v = DVector::repeat(10_000, 100000000.0); assert_eq!(v.variance(), 0.0) } From fc56abe4816d6d00a9db4914c568698b4ed5754b Mon Sep 17 00:00:00 2001 From: vasil Date: Mon, 24 Apr 2023 23:22:32 +0300 Subject: [PATCH 3/7] add simple test, remove comment from old variance impl --- src/base/statistics.rs | 17 +++-------------- src/base/variance_test.rs | 7 +++++-- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/base/statistics.rs b/src/base/statistics.rs index bf2c40d8..ebefb49d 100644 --- a/src/base/statistics.rs +++ b/src/base/statistics.rs @@ -335,25 +335,14 @@ impl> Matrix { if self.is_empty() { T::zero() } else { - // let val = self.iter().cloned().fold((T::zero(), T::zero()), |a, b| { - // (a.0 + b.clone() * b.clone(), a.1 + b) - // }); - // let denom = T::one() / crate::convert::<_, T>(self.len() as f64); - // let vd = val.1 * denom.clone(); - // val.0 * denom - vd.clone() * vd - // let mean: T = self.iter().map(|&entry| entry).sum::(); - // - // let x: Vec = (0..1000).map(|_| T::zero()).collect(); - // let s: T = x.iter().cloned().fold(T::zero(), |a, b| a + b); - // cannot use sum since `T` is not `Sum` by trait bounds - let total_sum = self.iter().cloned().fold(T::zero(), |a, b| a + b); + let sum_of_elements = self.iter().cloned().fold(T::zero(), |a, b| a + b); let n_elements = crate::convert::<_, T>(self.len() as f64); - let mean = total_sum / n_elements.clone(); + let mean = sum_of_elements / n_elements.clone(); let variance = self.iter().cloned().fold(T::zero(), |acc, x| { acc + (x.clone() - mean.clone()) * (x.clone() - mean.clone()) - }) / n_elements.clone(); + }) / n_elements; variance } diff --git a/src/base/variance_test.rs b/src/base/variance_test.rs index af5d22af..4319e156 100644 --- a/src/base/variance_test.rs +++ b/src/base/variance_test.rs @@ -2,6 +2,9 @@ use crate::DVector; #[test] fn test_variance_new() { - let v = DVector::repeat(10_000, 100000000.0); - assert_eq!(v.variance(), 0.0) + let long_repeating_vector = DVector::repeat(10_000, 100000000.0); + assert_eq!(long_repeating_vector.variance(), 0.0); + + let short_vec = DVector::from_vec(vec![1., 2., 3.]); + assert_eq!(short_vec.variance(), 2.0 / 3.0) } From 75405b1e24ed71561b3fb9e210dfab3f4c3e0d39 Mon Sep 17 00:00:00 2001 From: vasil Date: Tue, 25 Apr 2023 01:25:36 +0300 Subject: [PATCH 4/7] fix bug, add test in tests folder --- src/base/mod.rs | 2 -- src/base/statistics.rs | 12 ++++-------- tests/core/mod.rs | 1 + src/base/variance_test.rs => tests/core/variance.rs | 6 +++--- 4 files changed, 8 insertions(+), 13 deletions(-) rename src/base/variance_test.rs => tests/core/variance.rs (72%) diff --git a/src/base/mod.rs b/src/base/mod.rs index 1eabbfcf..0f09cc33 100644 --- a/src/base/mod.rs +++ b/src/base/mod.rs @@ -64,5 +64,3 @@ pub use self::matrix_view::*; pub use self::storage::*; #[cfg(any(feature = "std", feature = "alloc"))] pub use self::vec_storage::*; - -mod variance_test; diff --git a/src/base/statistics.rs b/src/base/statistics.rs index ebefb49d..6007f8c7 100644 --- a/src/base/statistics.rs +++ b/src/base/statistics.rs @@ -335,16 +335,12 @@ impl> Matrix { if self.is_empty() { T::zero() } else { - // cannot use sum since `T` is not `Sum` by trait bounds - let sum_of_elements = self.iter().cloned().fold(T::zero(), |a, b| a + b); - let n_elements = crate::convert::<_, T>(self.len() as f64); - let mean = sum_of_elements / n_elements.clone(); + let n_elements: T = crate::convert(self.len() as f64); + let mean = self.mean(); - let variance = self.iter().cloned().fold(T::zero(), |acc, x| { + self.iter().cloned().fold(T::zero(), |acc, x| { acc + (x.clone() - mean.clone()) * (x.clone() - mean.clone()) - }) / n_elements; - - variance + }) / n_elements } } diff --git a/tests/core/mod.rs b/tests/core/mod.rs index 0f7ee85b..f0484e4d 100644 --- a/tests/core/mod.rs +++ b/tests/core/mod.rs @@ -11,6 +11,7 @@ mod reshape; #[cfg(feature = "rkyv-serialize-no-std")] mod rkyv; mod serde; +mod variance; #[cfg(feature = "compare")] mod matrixcompare; diff --git a/src/base/variance_test.rs b/tests/core/variance.rs similarity index 72% rename from src/base/variance_test.rs rename to tests/core/variance.rs index 4319e156..c643ea3f 100644 --- a/src/base/variance_test.rs +++ b/tests/core/variance.rs @@ -1,10 +1,10 @@ -#[cfg(test)] -use crate::DVector; +use nalgebra::DVector; #[test] fn test_variance_new() { let long_repeating_vector = DVector::repeat(10_000, 100000000.0); assert_eq!(long_repeating_vector.variance(), 0.0); let short_vec = DVector::from_vec(vec![1., 2., 3.]); - assert_eq!(short_vec.variance(), 2.0 / 3.0) + + assert_eq!(short_vec.variance(), 2.0 / 3.0); } From 6c241a3200d4c8879bbe6fc0c8a3f432cfb05c6f Mon Sep 17 00:00:00 2001 From: Vasil Nikolov Date: Fri, 28 Apr 2023 00:03:28 +0300 Subject: [PATCH 5/7] add features needed to run tests with only `cargo test` --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 1d36aeb1..f5c67d15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,6 +111,7 @@ serde_json = "1.0" rand_xorshift = "0.3" rand_isaac = "0.3" criterion = { version = "0.4", features = ["html_reports"] } +nalgebra = { path = ".", features = ["debug", "compare", "rand", "macros"]} # For matrix comparison macro matrixcompare = "0.3.0" From 2521fd9851726797df54965c202a17c9748fd555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Crozet?= Date: Sun, 30 Apr 2023 14:53:12 +0200 Subject: [PATCH 6/7] Add a couple of additional catastrophic cancellation variance checks --- tests/core/variance.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/core/variance.rs b/tests/core/variance.rs index c643ea3f..eb08ea0f 100644 --- a/tests/core/variance.rs +++ b/tests/core/variance.rs @@ -1,10 +1,18 @@ use nalgebra::DVector; + #[test] -fn test_variance_new() { +fn test_variance_catastrophic_cancellation() { let long_repeating_vector = DVector::repeat(10_000, 100000000.0); assert_eq!(long_repeating_vector.variance(), 0.0); let short_vec = DVector::from_vec(vec![1., 2., 3.]); - assert_eq!(short_vec.variance(), 2.0 / 3.0); + + let short_vec = + DVector::::from_vec(vec![1.0e8 + 4.0, 1.0e8 + 7.0, 1.0e8 + 13.0, 1.0e8 + 16.0]); + assert_eq!(short_vec.variance(), 22.5); + + let short_vec = + DVector::::from_vec(vec![1.0e9 + 4.0, 1.0e9 + 7.0, 1.0e9 + 13.0, 1.0e9 + 16.0]); + assert_eq!(short_vec.variance(), 22.5); } From 41cfbdbf6249c06508a7cb74e6d8b7ef602fc2d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Crozet?= Date: Sun, 30 Apr 2023 14:53:59 +0200 Subject: [PATCH 7/7] Update Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf0253aa..971c5173 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ documented here. This project adheres to [Semantic Versioning](https://semver.org/). +## Unreleased + +### Fixed +- Fixed severe catastrophic cancellation issue in variance calculation. ## [0.32.2] (07 March 2023)