From 7260f05b07d25dcc6796577c8b9289b610ce6b53 Mon Sep 17 00:00:00 2001 From: Andreas Longva Date: Wed, 18 Nov 2020 13:54:14 +0100 Subject: [PATCH] Improved CooMatrix proptest strategies --- nalgebra-sparse/Cargo.toml | 6 + nalgebra-sparse/src/coo.rs | 2 +- nalgebra-sparse/src/proptest.rs | 193 ++++++++++++++++--- nalgebra-sparse/tests/unit.rs | 3 + nalgebra-sparse/tests/unit_tests/mod.rs | 3 +- nalgebra-sparse/tests/unit_tests/proptest.rs | 134 +++++++++++++ 6 files changed, 312 insertions(+), 29 deletions(-) diff --git a/nalgebra-sparse/Cargo.toml b/nalgebra-sparse/Cargo.toml index bf0d3849..3a4f08c1 100644 --- a/nalgebra-sparse/Cargo.toml +++ b/nalgebra-sparse/Cargo.toml @@ -7,7 +7,13 @@ edition = "2018" [features] proptest-support = ["proptest", "nalgebra/proptest"] +# Enable to enable running some tests that take a lot of time to run +slow-tests = [] + [dependencies] nalgebra = { version="0.23", path = "../" } num-traits = { version = "0.2", default-features = false } proptest = { version = "0.10", optional = true } + +[dev-dependencies] +itertools = "0.9" diff --git a/nalgebra-sparse/src/coo.rs b/nalgebra-sparse/src/coo.rs index a22f65cd..9869d502 100644 --- a/nalgebra-sparse/src/coo.rs +++ b/nalgebra-sparse/src/coo.rs @@ -37,7 +37,7 @@ use num_traits::Zero; /// /// // TODO: Convert to CSR /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CooMatrix { nrows: usize, ncols: usize, diff --git a/nalgebra-sparse/src/proptest.rs b/nalgebra-sparse/src/proptest.rs index 59d832e7..fae49028 100644 --- a/nalgebra-sparse/src/proptest.rs +++ b/nalgebra-sparse/src/proptest.rs @@ -4,38 +4,177 @@ use crate::coo::CooMatrix; use proptest::prelude::*; -use proptest::collection::{SizeRange, vec}; +use proptest::collection::{vec, hash_map}; use nalgebra::Scalar; +use std::cmp::min; +use std::iter::repeat; +use proptest::sample::{Index}; -/// TODO -pub fn coo( - value_strategy: T, - rows: impl Strategy + 'static, - cols: impl Strategy + 'static, - max_nonzeros: usize) -> BoxedStrategy> +/// A strategy for generating `nnz` triplets. +/// +/// This strategy should generally only be used when `nnz` is close to `nrows * ncols`. +fn dense_triplet_strategy(value_strategy: T, + nrows: usize, + ncols: usize, + nnz: usize) + -> impl Strategy> where T: Strategy + Clone + 'static, T::Value: Scalar, { - (rows, cols, (0 ..= max_nonzeros)) - .prop_flat_map(move |(nrows, ncols, nnz)| { - // If the numbers of rows and columns are small in comparison with the - // max nnz, it will lead to small matrices essentially always turning out to be dense. - // To address this, we correct the nnz by computing the modulo with the - // maximum number of non-zeros (ignoring duplicates) we can have for - // the given dimensions. - // This way we can still generate very sparse matrices for small matrices. - let max_nnz = nrows * ncols; - let nnz = if max_nnz == 0 { 0 } else { nnz % max_nnz }; - let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 }; - let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 }; - let row_indices = vec![row_index_strategy.clone(); nnz]; - let col_indices = vec![col_index_strategy.clone(); nnz]; - let values_strategy = vec![value_strategy.clone(); nnz]; + assert!(nnz <= nrows * ncols); - (Just(nrows), Just(ncols), row_indices, col_indices, values_strategy) - }).prop_map(|(nrows, ncols, row_indices, col_indices, values)| { - CooMatrix::try_from_triplets(nrows, ncols, row_indices, col_indices, values) - .expect("We should always generate valid COO data.") - }).boxed() + // Construct a number of booleans of which exactly `nnz` are true. + let booleans: Vec<_> = repeat(true) + .take(nnz) + .chain(repeat(false)) + .take(nrows * ncols) + .collect(); + + Just(booleans) + // Shuffle the booleans so that they are randomly distributed + .prop_shuffle() + // Convert the booleans into a list of coordinate pairs + .prop_map(move |booleans| { + booleans + .into_iter() + .enumerate() + .filter_map(|(index, is_entry)| { + if is_entry { + // Convert linear index to row/col pair + let i = index / ncols; + let j = index % ncols; + Some((i, j)) + } else { + None + } + }) + .collect::>() + }) + // Assign values to each coordinate pair in order to generate a list of triplets + .prop_flat_map(move |coords| { + vec![value_strategy.clone(); coords.len()] + .prop_map(move |values| { + coords.clone().into_iter() + .zip(values) + .map(|((i, j), v)| { + (i, j, v) + }) + .collect::>() + }) + }) +} + +/// A strategy for generating `nnz` triplets. +/// +/// This strategy should generally only be used when `nnz << nrows * ncols`. If `nnz` is too +/// close to `nrows * ncols` it may fail due to excessive rejected samples. +fn sparse_triplet_strategy(value_strategy: T, + nrows: usize, + ncols: usize, + nnz: usize) + -> impl Strategy> + where + T: Strategy + Clone + 'static, + T::Value: Scalar, +{ + // Have to handle the zero case: proptest doesn't like empty ranges (i.e. 0 .. 0) + let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 }; + let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 }; + let coord_strategy = (row_index_strategy, col_index_strategy); + hash_map(coord_strategy, value_strategy.clone(), nnz) + .prop_map(|hash_map| { + let triplets: Vec<_> = hash_map + .into_iter() + .map(|((i, j), v)| (i, j, v)) + .collect(); + triplets + }) + // Although order in the hash map is unspecified, it's not necessarily *random* + // - or, in particular, it does not necessarily sample the whole space of possible outcomes - + // so we additionally shuffle the triplets + .prop_shuffle() +} + +/// TODO +pub fn coo_no_duplicates( + value_strategy: T, + rows: impl Strategy + 'static, + cols: impl Strategy + 'static, + max_nonzeros: usize) -> impl Strategy> +where + T: Strategy + Clone + 'static, + T::Value: Scalar, +{ + (rows, cols) + .prop_flat_map(move |(nrows, ncols)| { + let max_nonzeros = min(max_nonzeros, nrows * ncols); + let size_range = 0 ..= max_nonzeros; + let value_strategy = value_strategy.clone(); + + size_range.prop_flat_map(move |nnz| { + let value_strategy = value_strategy.clone(); + if nnz as f64 > 0.10 * (nrows as f64) * (ncols as f64) { + // If the number of nnz is sufficiently dense, then use the dense + // sample strategy + dense_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed() + } else { + // Otherwise, use a hash map strategy so that we can get a sparse sampling + // (so that complexity is rather on the order of max_nnz than nrows * ncols) + sparse_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed() + } + }) + .prop_map(move |triplets| { + let mut coo = CooMatrix::new(nrows, ncols); + for (i, j, v) in triplets { + coo.push(i, j, v); + } + coo + }) + }) +} + +/// TODO +/// +/// TODO: Write note on how this strategy only maintains the constraints on values +/// for each triplet, but does not consider the sum of triplets +pub fn coo_with_duplicates( + value_strategy: T, + rows: impl Strategy + 'static, + cols: impl Strategy + 'static, + max_nonzeros: usize, + max_duplicates: usize) + -> impl Strategy> +where + T: Strategy + Clone + 'static, + T::Value: Scalar, +{ + let coo_strategy = coo_no_duplicates(value_strategy.clone(), rows, cols, max_nonzeros); + let duplicate_strategy = vec((any::(), value_strategy.clone()), 0 ..= max_duplicates); + (coo_strategy, duplicate_strategy) + .prop_flat_map(|(coo, duplicates)| { + let mut triplets: Vec<(usize, usize, T::Value)> = coo.triplet_iter() + .map(|(i, j, v)| (i, j, v.clone())) + .collect(); + if !triplets.is_empty() { + let duplicates_iter: Vec<_> = duplicates + .into_iter() + .map(|(idx, val)| { + let (i, j, _) = idx.get(&triplets); + (*i, *j, val) + }) + .collect(); + triplets.extend(duplicates_iter); + } + // Make sure to shuffle so that the duplicates get mixed in with the non-duplicates + let shuffled = Just(triplets).prop_shuffle(); + (Just(coo.nrows()), Just(coo.ncols()), shuffled) + }) + .prop_map(move |(nrows, ncols, triplets)| { + let mut coo = CooMatrix::new(nrows, ncols); + for (i, j, v) in triplets { + coo.push(i, j, v); + } + coo + }) } \ No newline at end of file diff --git a/nalgebra-sparse/tests/unit.rs b/nalgebra-sparse/tests/unit.rs index ee44e73b..b7badffd 100644 --- a/nalgebra-sparse/tests/unit.rs +++ b/nalgebra-sparse/tests/unit.rs @@ -1,4 +1,7 @@ //! Unit tests +#[cfg(not(feature = "proptest-support"))] +compile_error!("Tests must be run with feature proptest-support"); + mod unit_tests; #[macro_use] diff --git a/nalgebra-sparse/tests/unit_tests/mod.rs b/nalgebra-sparse/tests/unit_tests/mod.rs index 733357c9..f5b6d935 100644 --- a/nalgebra-sparse/tests/unit_tests/mod.rs +++ b/nalgebra-sparse/tests/unit_tests/mod.rs @@ -2,4 +2,5 @@ mod coo; mod ops; mod pattern; mod csr; -mod csc; \ No newline at end of file +mod csc; +mod proptest; \ No newline at end of file diff --git a/nalgebra-sparse/tests/unit_tests/proptest.rs b/nalgebra-sparse/tests/unit_tests/proptest.rs index e69de29b..23afa741 100644 --- a/nalgebra-sparse/tests/unit_tests/proptest.rs +++ b/nalgebra-sparse/tests/unit_tests/proptest.rs @@ -0,0 +1,134 @@ +use nalgebra_sparse::proptest::{coo_with_duplicates, coo_no_duplicates}; +use nalgebra::DMatrix; + +use proptest::prelude::*; +use itertools::Itertools; + +use std::collections::HashSet; +use std::iter::repeat; + +#[cfg(feature = "slow-tests")] +use { + proptest::test_runner::TestRunner, + proptest::strategy::ValueTree +}; +use std::ops::RangeInclusive; + +#[cfg(feature = "slow-tests")] +fn generate_all_possible_matrices(value_range: RangeInclusive, + rows_range: RangeInclusive, + cols_range: RangeInclusive) + -> HashSet> +{ + // Enumerate all possible combinations + let mut all_combinations = HashSet::new(); + for nrows in rows_range { + for ncols in cols_range.clone() { + // For the given number of rows and columns + let n_values = nrows * ncols; + + if n_values == 0 { + // If we have zero rows or columns, the set of matrices with the given + // rows and columns is a single element: an empty matrix + all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &[])); + } else { + // Otherwise, we need to sample all possible matrices. + // To do this, we generate the values as the (multi) Cartesian product + // of the value sets. For example, for a 2x2 matrices, we consider + // all possible 4-element arrays that the matrices can take by + // considering all elements in the cartesian product + // V x V x V x V + // where V is the set of eligible values, e.g. V := -1 ..= 1 + let values_iter = repeat(value_range.clone()) + .take(n_values) + .multi_cartesian_product(); + for matrix_values in values_iter { + all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &matrix_values)); + } + } + } + } + all_combinations +} + +#[cfg(feature = "slow-tests")] +#[test] +fn coo_no_duplicates_samples_all_admissible_outputs() { + // Note: This test basically mirrors a similar test for `matrix` in the `nalgebra` repo. + + // Test that the proptest generation covers all possible outputs for a small space of inputs + // given enough samples. + + // We use a deterministic test runner to make the test "stable". + let mut runner = TestRunner::deterministic(); + + // This number needs to be high enough so that we with high probability sample + // all possible cases + let num_generated_matrices = 500000; + + let values = -1..=1; + let rows = 0..=2; + let cols = 0..=3; + let strategy = coo_no_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3); + + // Enumerate all possible combinations + let all_combinations = generate_all_possible_matrices(values, rows, cols); + + let mut visited_combinations = HashSet::new(); + for _ in 0..num_generated_matrices { + let tree = strategy + .new_tree(&mut runner) + .expect("Tree generation should not fail"); + let matrix = tree.current(); + visited_combinations.insert(DMatrix::from(&matrix)); + } + + assert_eq!(visited_combinations.len(), all_combinations.len()); + assert_eq!(visited_combinations, all_combinations, "Did not sample all possible values."); +} + +#[cfg(feature = "slow-tests")] +#[test] +fn coo_with_duplicates_samples_all_admissible_outputs() { + // This is almost the same as the test for coo_no_duplicates, except that we need + // a different "success" criterion, since coo_with_duplicates is able to generate + // matrices with values outside of the value constraints. See below for details. + + // We use a deterministic test runner to make the test "stable". + let mut runner = TestRunner::deterministic(); + + // This number needs to be high enough so that we with high probability sample + // all possible cases + let num_generated_matrices = 500000; + + let values = -1..=1; + let rows = 0..=2; + let cols = 0..=3; + let strategy = coo_with_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3, 2); + + // Enumerate all possible combinations that fit the constraints + // (note: this is only a subset of the matrices that can be generated by + // `coo_with_duplicates`) + let all_combinations = generate_all_possible_matrices(values, rows, cols); + + let mut visited_combinations = HashSet::new(); + for _ in 0..num_generated_matrices { + let tree = strategy + .new_tree(&mut runner) + .expect("Tree generation should not fail"); + let matrix = tree.current(); + visited_combinations.insert(DMatrix::from(&matrix)); + } + + // Here we cannot verify that the set of visited combinations is *equal* to + // all possible outcomes with the given constraints, however the + // strategy should be able to generate all matrices that fit the constraints. + // In other words, we need to determine that set of all admissible matrices + // is contained in the set of visited matrices + assert!(all_combinations.is_subset(&visited_combinations)); +} + +#[test] +fn coo_no_duplicates_generates_admissible_matrices() { + +} \ No newline at end of file