Improved CooMatrix proptest strategies
This commit is contained in:
parent
46442d6060
commit
7260f05b07
|
@ -7,7 +7,13 @@ edition = "2018"
|
|||
[features]
|
||||
proptest-support = ["proptest", "nalgebra/proptest"]
|
||||
|
||||
# Enable to enable running some tests that take a lot of time to run
|
||||
slow-tests = []
|
||||
|
||||
[dependencies]
|
||||
nalgebra = { version="0.23", path = "../" }
|
||||
num-traits = { version = "0.2", default-features = false }
|
||||
proptest = { version = "0.10", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
itertools = "0.9"
|
||||
|
|
|
@ -37,7 +37,7 @@ use num_traits::Zero;
|
|||
///
|
||||
/// // TODO: Convert to CSR
|
||||
/// ```
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct CooMatrix<T> {
|
||||
nrows: usize,
|
||||
ncols: usize,
|
||||
|
|
|
@ -4,38 +4,177 @@
|
|||
|
||||
use crate::coo::CooMatrix;
|
||||
use proptest::prelude::*;
|
||||
use proptest::collection::{SizeRange, vec};
|
||||
use proptest::collection::{vec, hash_map};
|
||||
use nalgebra::Scalar;
|
||||
use std::cmp::min;
|
||||
use std::iter::repeat;
|
||||
use proptest::sample::{Index};
|
||||
|
||||
/// TODO
|
||||
pub fn coo<T>(
|
||||
value_strategy: T,
|
||||
rows: impl Strategy<Value=usize> + 'static,
|
||||
cols: impl Strategy<Value=usize> + 'static,
|
||||
max_nonzeros: usize) -> BoxedStrategy<CooMatrix<T::Value>>
|
||||
/// A strategy for generating `nnz` triplets.
|
||||
///
|
||||
/// This strategy should generally only be used when `nnz` is close to `nrows * ncols`.
|
||||
fn dense_triplet_strategy<T>(value_strategy: T,
|
||||
nrows: usize,
|
||||
ncols: usize,
|
||||
nnz: usize)
|
||||
-> impl Strategy<Value=Vec<(usize, usize, T::Value)>>
|
||||
where
|
||||
T: Strategy + Clone + 'static,
|
||||
T::Value: Scalar,
|
||||
{
|
||||
(rows, cols, (0 ..= max_nonzeros))
|
||||
.prop_flat_map(move |(nrows, ncols, nnz)| {
|
||||
// If the numbers of rows and columns are small in comparison with the
|
||||
// max nnz, it will lead to small matrices essentially always turning out to be dense.
|
||||
// To address this, we correct the nnz by computing the modulo with the
|
||||
// maximum number of non-zeros (ignoring duplicates) we can have for
|
||||
// the given dimensions.
|
||||
// This way we can still generate very sparse matrices for small matrices.
|
||||
let max_nnz = nrows * ncols;
|
||||
let nnz = if max_nnz == 0 { 0 } else { nnz % max_nnz };
|
||||
let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 };
|
||||
let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 };
|
||||
let row_indices = vec![row_index_strategy.clone(); nnz];
|
||||
let col_indices = vec![col_index_strategy.clone(); nnz];
|
||||
let values_strategy = vec![value_strategy.clone(); nnz];
|
||||
assert!(nnz <= nrows * ncols);
|
||||
|
||||
(Just(nrows), Just(ncols), row_indices, col_indices, values_strategy)
|
||||
}).prop_map(|(nrows, ncols, row_indices, col_indices, values)| {
|
||||
CooMatrix::try_from_triplets(nrows, ncols, row_indices, col_indices, values)
|
||||
.expect("We should always generate valid COO data.")
|
||||
}).boxed()
|
||||
// Construct a number of booleans of which exactly `nnz` are true.
|
||||
let booleans: Vec<_> = repeat(true)
|
||||
.take(nnz)
|
||||
.chain(repeat(false))
|
||||
.take(nrows * ncols)
|
||||
.collect();
|
||||
|
||||
Just(booleans)
|
||||
// Shuffle the booleans so that they are randomly distributed
|
||||
.prop_shuffle()
|
||||
// Convert the booleans into a list of coordinate pairs
|
||||
.prop_map(move |booleans| {
|
||||
booleans
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(index, is_entry)| {
|
||||
if is_entry {
|
||||
// Convert linear index to row/col pair
|
||||
let i = index / ncols;
|
||||
let j = index % ncols;
|
||||
Some((i, j))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
// Assign values to each coordinate pair in order to generate a list of triplets
|
||||
.prop_flat_map(move |coords| {
|
||||
vec![value_strategy.clone(); coords.len()]
|
||||
.prop_map(move |values| {
|
||||
coords.clone().into_iter()
|
||||
.zip(values)
|
||||
.map(|((i, j), v)| {
|
||||
(i, j, v)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// A strategy for generating `nnz` triplets.
|
||||
///
|
||||
/// This strategy should generally only be used when `nnz << nrows * ncols`. If `nnz` is too
|
||||
/// close to `nrows * ncols` it may fail due to excessive rejected samples.
|
||||
fn sparse_triplet_strategy<T>(value_strategy: T,
|
||||
nrows: usize,
|
||||
ncols: usize,
|
||||
nnz: usize)
|
||||
-> impl Strategy<Value=Vec<(usize, usize, T::Value)>>
|
||||
where
|
||||
T: Strategy + Clone + 'static,
|
||||
T::Value: Scalar,
|
||||
{
|
||||
// Have to handle the zero case: proptest doesn't like empty ranges (i.e. 0 .. 0)
|
||||
let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 };
|
||||
let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 };
|
||||
let coord_strategy = (row_index_strategy, col_index_strategy);
|
||||
hash_map(coord_strategy, value_strategy.clone(), nnz)
|
||||
.prop_map(|hash_map| {
|
||||
let triplets: Vec<_> = hash_map
|
||||
.into_iter()
|
||||
.map(|((i, j), v)| (i, j, v))
|
||||
.collect();
|
||||
triplets
|
||||
})
|
||||
// Although order in the hash map is unspecified, it's not necessarily *random*
|
||||
// - or, in particular, it does not necessarily sample the whole space of possible outcomes -
|
||||
// so we additionally shuffle the triplets
|
||||
.prop_shuffle()
|
||||
}
|
||||
|
||||
/// TODO
|
||||
pub fn coo_no_duplicates<T>(
|
||||
value_strategy: T,
|
||||
rows: impl Strategy<Value=usize> + 'static,
|
||||
cols: impl Strategy<Value=usize> + 'static,
|
||||
max_nonzeros: usize) -> impl Strategy<Value=CooMatrix<T::Value>>
|
||||
where
|
||||
T: Strategy + Clone + 'static,
|
||||
T::Value: Scalar,
|
||||
{
|
||||
(rows, cols)
|
||||
.prop_flat_map(move |(nrows, ncols)| {
|
||||
let max_nonzeros = min(max_nonzeros, nrows * ncols);
|
||||
let size_range = 0 ..= max_nonzeros;
|
||||
let value_strategy = value_strategy.clone();
|
||||
|
||||
size_range.prop_flat_map(move |nnz| {
|
||||
let value_strategy = value_strategy.clone();
|
||||
if nnz as f64 > 0.10 * (nrows as f64) * (ncols as f64) {
|
||||
// If the number of nnz is sufficiently dense, then use the dense
|
||||
// sample strategy
|
||||
dense_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed()
|
||||
} else {
|
||||
// Otherwise, use a hash map strategy so that we can get a sparse sampling
|
||||
// (so that complexity is rather on the order of max_nnz than nrows * ncols)
|
||||
sparse_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed()
|
||||
}
|
||||
})
|
||||
.prop_map(move |triplets| {
|
||||
let mut coo = CooMatrix::new(nrows, ncols);
|
||||
for (i, j, v) in triplets {
|
||||
coo.push(i, j, v);
|
||||
}
|
||||
coo
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// TODO
|
||||
///
|
||||
/// TODO: Write note on how this strategy only maintains the constraints on values
|
||||
/// for each triplet, but does not consider the sum of triplets
|
||||
pub fn coo_with_duplicates<T>(
|
||||
value_strategy: T,
|
||||
rows: impl Strategy<Value=usize> + 'static,
|
||||
cols: impl Strategy<Value=usize> + 'static,
|
||||
max_nonzeros: usize,
|
||||
max_duplicates: usize)
|
||||
-> impl Strategy<Value=CooMatrix<T::Value>>
|
||||
where
|
||||
T: Strategy + Clone + 'static,
|
||||
T::Value: Scalar,
|
||||
{
|
||||
let coo_strategy = coo_no_duplicates(value_strategy.clone(), rows, cols, max_nonzeros);
|
||||
let duplicate_strategy = vec((any::<Index>(), value_strategy.clone()), 0 ..= max_duplicates);
|
||||
(coo_strategy, duplicate_strategy)
|
||||
.prop_flat_map(|(coo, duplicates)| {
|
||||
let mut triplets: Vec<(usize, usize, T::Value)> = coo.triplet_iter()
|
||||
.map(|(i, j, v)| (i, j, v.clone()))
|
||||
.collect();
|
||||
if !triplets.is_empty() {
|
||||
let duplicates_iter: Vec<_> = duplicates
|
||||
.into_iter()
|
||||
.map(|(idx, val)| {
|
||||
let (i, j, _) = idx.get(&triplets);
|
||||
(*i, *j, val)
|
||||
})
|
||||
.collect();
|
||||
triplets.extend(duplicates_iter);
|
||||
}
|
||||
// Make sure to shuffle so that the duplicates get mixed in with the non-duplicates
|
||||
let shuffled = Just(triplets).prop_shuffle();
|
||||
(Just(coo.nrows()), Just(coo.ncols()), shuffled)
|
||||
})
|
||||
.prop_map(move |(nrows, ncols, triplets)| {
|
||||
let mut coo = CooMatrix::new(nrows, ncols);
|
||||
for (i, j, v) in triplets {
|
||||
coo.push(i, j, v);
|
||||
}
|
||||
coo
|
||||
})
|
||||
}
|
|
@ -1,4 +1,7 @@
|
|||
//! Unit tests
|
||||
#[cfg(not(feature = "proptest-support"))]
|
||||
compile_error!("Tests must be run with feature proptest-support");
|
||||
|
||||
mod unit_tests;
|
||||
|
||||
#[macro_use]
|
||||
|
|
|
@ -3,3 +3,4 @@ mod ops;
|
|||
mod pattern;
|
||||
mod csr;
|
||||
mod csc;
|
||||
mod proptest;
|
|
@ -0,0 +1,134 @@
|
|||
use nalgebra_sparse::proptest::{coo_with_duplicates, coo_no_duplicates};
|
||||
use nalgebra::DMatrix;
|
||||
|
||||
use proptest::prelude::*;
|
||||
use itertools::Itertools;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::iter::repeat;
|
||||
|
||||
#[cfg(feature = "slow-tests")]
|
||||
use {
|
||||
proptest::test_runner::TestRunner,
|
||||
proptest::strategy::ValueTree
|
||||
};
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
#[cfg(feature = "slow-tests")]
|
||||
fn generate_all_possible_matrices(value_range: RangeInclusive<i32>,
|
||||
rows_range: RangeInclusive<usize>,
|
||||
cols_range: RangeInclusive<usize>)
|
||||
-> HashSet<DMatrix<i32>>
|
||||
{
|
||||
// Enumerate all possible combinations
|
||||
let mut all_combinations = HashSet::new();
|
||||
for nrows in rows_range {
|
||||
for ncols in cols_range.clone() {
|
||||
// For the given number of rows and columns
|
||||
let n_values = nrows * ncols;
|
||||
|
||||
if n_values == 0 {
|
||||
// If we have zero rows or columns, the set of matrices with the given
|
||||
// rows and columns is a single element: an empty matrix
|
||||
all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &[]));
|
||||
} else {
|
||||
// Otherwise, we need to sample all possible matrices.
|
||||
// To do this, we generate the values as the (multi) Cartesian product
|
||||
// of the value sets. For example, for a 2x2 matrices, we consider
|
||||
// all possible 4-element arrays that the matrices can take by
|
||||
// considering all elements in the cartesian product
|
||||
// V x V x V x V
|
||||
// where V is the set of eligible values, e.g. V := -1 ..= 1
|
||||
let values_iter = repeat(value_range.clone())
|
||||
.take(n_values)
|
||||
.multi_cartesian_product();
|
||||
for matrix_values in values_iter {
|
||||
all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &matrix_values));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
all_combinations
|
||||
}
|
||||
|
||||
#[cfg(feature = "slow-tests")]
|
||||
#[test]
|
||||
fn coo_no_duplicates_samples_all_admissible_outputs() {
|
||||
// Note: This test basically mirrors a similar test for `matrix` in the `nalgebra` repo.
|
||||
|
||||
// Test that the proptest generation covers all possible outputs for a small space of inputs
|
||||
// given enough samples.
|
||||
|
||||
// We use a deterministic test runner to make the test "stable".
|
||||
let mut runner = TestRunner::deterministic();
|
||||
|
||||
// This number needs to be high enough so that we with high probability sample
|
||||
// all possible cases
|
||||
let num_generated_matrices = 500000;
|
||||
|
||||
let values = -1..=1;
|
||||
let rows = 0..=2;
|
||||
let cols = 0..=3;
|
||||
let strategy = coo_no_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3);
|
||||
|
||||
// Enumerate all possible combinations
|
||||
let all_combinations = generate_all_possible_matrices(values, rows, cols);
|
||||
|
||||
let mut visited_combinations = HashSet::new();
|
||||
for _ in 0..num_generated_matrices {
|
||||
let tree = strategy
|
||||
.new_tree(&mut runner)
|
||||
.expect("Tree generation should not fail");
|
||||
let matrix = tree.current();
|
||||
visited_combinations.insert(DMatrix::from(&matrix));
|
||||
}
|
||||
|
||||
assert_eq!(visited_combinations.len(), all_combinations.len());
|
||||
assert_eq!(visited_combinations, all_combinations, "Did not sample all possible values.");
|
||||
}
|
||||
|
||||
#[cfg(feature = "slow-tests")]
|
||||
#[test]
|
||||
fn coo_with_duplicates_samples_all_admissible_outputs() {
|
||||
// This is almost the same as the test for coo_no_duplicates, except that we need
|
||||
// a different "success" criterion, since coo_with_duplicates is able to generate
|
||||
// matrices with values outside of the value constraints. See below for details.
|
||||
|
||||
// We use a deterministic test runner to make the test "stable".
|
||||
let mut runner = TestRunner::deterministic();
|
||||
|
||||
// This number needs to be high enough so that we with high probability sample
|
||||
// all possible cases
|
||||
let num_generated_matrices = 500000;
|
||||
|
||||
let values = -1..=1;
|
||||
let rows = 0..=2;
|
||||
let cols = 0..=3;
|
||||
let strategy = coo_with_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3, 2);
|
||||
|
||||
// Enumerate all possible combinations that fit the constraints
|
||||
// (note: this is only a subset of the matrices that can be generated by
|
||||
// `coo_with_duplicates`)
|
||||
let all_combinations = generate_all_possible_matrices(values, rows, cols);
|
||||
|
||||
let mut visited_combinations = HashSet::new();
|
||||
for _ in 0..num_generated_matrices {
|
||||
let tree = strategy
|
||||
.new_tree(&mut runner)
|
||||
.expect("Tree generation should not fail");
|
||||
let matrix = tree.current();
|
||||
visited_combinations.insert(DMatrix::from(&matrix));
|
||||
}
|
||||
|
||||
// Here we cannot verify that the set of visited combinations is *equal* to
|
||||
// all possible outcomes with the given constraints, however the
|
||||
// strategy should be able to generate all matrices that fit the constraints.
|
||||
// In other words, we need to determine that set of all admissible matrices
|
||||
// is contained in the set of visited matrices
|
||||
assert!(all_combinations.is_subset(&visited_combinations));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coo_no_duplicates_generates_admissible_matrices() {
|
||||
|
||||
}
|
Loading…
Reference in New Issue