Improved CooMatrix proptest strategies
This commit is contained in:
parent
46442d6060
commit
7260f05b07
|
@ -7,7 +7,13 @@ edition = "2018"
|
||||||
[features]
|
[features]
|
||||||
proptest-support = ["proptest", "nalgebra/proptest"]
|
proptest-support = ["proptest", "nalgebra/proptest"]
|
||||||
|
|
||||||
|
# Enable to enable running some tests that take a lot of time to run
|
||||||
|
slow-tests = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
nalgebra = { version="0.23", path = "../" }
|
nalgebra = { version="0.23", path = "../" }
|
||||||
num-traits = { version = "0.2", default-features = false }
|
num-traits = { version = "0.2", default-features = false }
|
||||||
proptest = { version = "0.10", optional = true }
|
proptest = { version = "0.10", optional = true }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
itertools = "0.9"
|
||||||
|
|
|
@ -37,7 +37,7 @@ use num_traits::Zero;
|
||||||
///
|
///
|
||||||
/// // TODO: Convert to CSR
|
/// // TODO: Convert to CSR
|
||||||
/// ```
|
/// ```
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct CooMatrix<T> {
|
pub struct CooMatrix<T> {
|
||||||
nrows: usize,
|
nrows: usize,
|
||||||
ncols: usize,
|
ncols: usize,
|
||||||
|
|
|
@ -4,38 +4,177 @@
|
||||||
|
|
||||||
use crate::coo::CooMatrix;
|
use crate::coo::CooMatrix;
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
use proptest::collection::{SizeRange, vec};
|
use proptest::collection::{vec, hash_map};
|
||||||
use nalgebra::Scalar;
|
use nalgebra::Scalar;
|
||||||
|
use std::cmp::min;
|
||||||
|
use std::iter::repeat;
|
||||||
|
use proptest::sample::{Index};
|
||||||
|
|
||||||
/// TODO
|
/// A strategy for generating `nnz` triplets.
|
||||||
pub fn coo<T>(
|
///
|
||||||
value_strategy: T,
|
/// This strategy should generally only be used when `nnz` is close to `nrows * ncols`.
|
||||||
rows: impl Strategy<Value=usize> + 'static,
|
fn dense_triplet_strategy<T>(value_strategy: T,
|
||||||
cols: impl Strategy<Value=usize> + 'static,
|
nrows: usize,
|
||||||
max_nonzeros: usize) -> BoxedStrategy<CooMatrix<T::Value>>
|
ncols: usize,
|
||||||
|
nnz: usize)
|
||||||
|
-> impl Strategy<Value=Vec<(usize, usize, T::Value)>>
|
||||||
where
|
where
|
||||||
T: Strategy + Clone + 'static,
|
T: Strategy + Clone + 'static,
|
||||||
T::Value: Scalar,
|
T::Value: Scalar,
|
||||||
{
|
{
|
||||||
(rows, cols, (0 ..= max_nonzeros))
|
assert!(nnz <= nrows * ncols);
|
||||||
.prop_flat_map(move |(nrows, ncols, nnz)| {
|
|
||||||
// If the numbers of rows and columns are small in comparison with the
|
// Construct a number of booleans of which exactly `nnz` are true.
|
||||||
// max nnz, it will lead to small matrices essentially always turning out to be dense.
|
let booleans: Vec<_> = repeat(true)
|
||||||
// To address this, we correct the nnz by computing the modulo with the
|
.take(nnz)
|
||||||
// maximum number of non-zeros (ignoring duplicates) we can have for
|
.chain(repeat(false))
|
||||||
// the given dimensions.
|
.take(nrows * ncols)
|
||||||
// This way we can still generate very sparse matrices for small matrices.
|
.collect();
|
||||||
let max_nnz = nrows * ncols;
|
|
||||||
let nnz = if max_nnz == 0 { 0 } else { nnz % max_nnz };
|
Just(booleans)
|
||||||
|
// Shuffle the booleans so that they are randomly distributed
|
||||||
|
.prop_shuffle()
|
||||||
|
// Convert the booleans into a list of coordinate pairs
|
||||||
|
.prop_map(move |booleans| {
|
||||||
|
booleans
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(|(index, is_entry)| {
|
||||||
|
if is_entry {
|
||||||
|
// Convert linear index to row/col pair
|
||||||
|
let i = index / ncols;
|
||||||
|
let j = index % ncols;
|
||||||
|
Some((i, j))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
// Assign values to each coordinate pair in order to generate a list of triplets
|
||||||
|
.prop_flat_map(move |coords| {
|
||||||
|
vec![value_strategy.clone(); coords.len()]
|
||||||
|
.prop_map(move |values| {
|
||||||
|
coords.clone().into_iter()
|
||||||
|
.zip(values)
|
||||||
|
.map(|((i, j), v)| {
|
||||||
|
(i, j, v)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A strategy for generating `nnz` triplets.
|
||||||
|
///
|
||||||
|
/// This strategy should generally only be used when `nnz << nrows * ncols`. If `nnz` is too
|
||||||
|
/// close to `nrows * ncols` it may fail due to excessive rejected samples.
|
||||||
|
fn sparse_triplet_strategy<T>(value_strategy: T,
|
||||||
|
nrows: usize,
|
||||||
|
ncols: usize,
|
||||||
|
nnz: usize)
|
||||||
|
-> impl Strategy<Value=Vec<(usize, usize, T::Value)>>
|
||||||
|
where
|
||||||
|
T: Strategy + Clone + 'static,
|
||||||
|
T::Value: Scalar,
|
||||||
|
{
|
||||||
|
// Have to handle the zero case: proptest doesn't like empty ranges (i.e. 0 .. 0)
|
||||||
let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 };
|
let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 };
|
||||||
let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 };
|
let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 };
|
||||||
let row_indices = vec![row_index_strategy.clone(); nnz];
|
let coord_strategy = (row_index_strategy, col_index_strategy);
|
||||||
let col_indices = vec![col_index_strategy.clone(); nnz];
|
hash_map(coord_strategy, value_strategy.clone(), nnz)
|
||||||
let values_strategy = vec![value_strategy.clone(); nnz];
|
.prop_map(|hash_map| {
|
||||||
|
let triplets: Vec<_> = hash_map
|
||||||
(Just(nrows), Just(ncols), row_indices, col_indices, values_strategy)
|
.into_iter()
|
||||||
}).prop_map(|(nrows, ncols, row_indices, col_indices, values)| {
|
.map(|((i, j), v)| (i, j, v))
|
||||||
CooMatrix::try_from_triplets(nrows, ncols, row_indices, col_indices, values)
|
.collect();
|
||||||
.expect("We should always generate valid COO data.")
|
triplets
|
||||||
}).boxed()
|
})
|
||||||
|
// Although order in the hash map is unspecified, it's not necessarily *random*
|
||||||
|
// - or, in particular, it does not necessarily sample the whole space of possible outcomes -
|
||||||
|
// so we additionally shuffle the triplets
|
||||||
|
.prop_shuffle()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO
|
||||||
|
pub fn coo_no_duplicates<T>(
|
||||||
|
value_strategy: T,
|
||||||
|
rows: impl Strategy<Value=usize> + 'static,
|
||||||
|
cols: impl Strategy<Value=usize> + 'static,
|
||||||
|
max_nonzeros: usize) -> impl Strategy<Value=CooMatrix<T::Value>>
|
||||||
|
where
|
||||||
|
T: Strategy + Clone + 'static,
|
||||||
|
T::Value: Scalar,
|
||||||
|
{
|
||||||
|
(rows, cols)
|
||||||
|
.prop_flat_map(move |(nrows, ncols)| {
|
||||||
|
let max_nonzeros = min(max_nonzeros, nrows * ncols);
|
||||||
|
let size_range = 0 ..= max_nonzeros;
|
||||||
|
let value_strategy = value_strategy.clone();
|
||||||
|
|
||||||
|
size_range.prop_flat_map(move |nnz| {
|
||||||
|
let value_strategy = value_strategy.clone();
|
||||||
|
if nnz as f64 > 0.10 * (nrows as f64) * (ncols as f64) {
|
||||||
|
// If the number of nnz is sufficiently dense, then use the dense
|
||||||
|
// sample strategy
|
||||||
|
dense_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed()
|
||||||
|
} else {
|
||||||
|
// Otherwise, use a hash map strategy so that we can get a sparse sampling
|
||||||
|
// (so that complexity is rather on the order of max_nnz than nrows * ncols)
|
||||||
|
sparse_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.prop_map(move |triplets| {
|
||||||
|
let mut coo = CooMatrix::new(nrows, ncols);
|
||||||
|
for (i, j, v) in triplets {
|
||||||
|
coo.push(i, j, v);
|
||||||
|
}
|
||||||
|
coo
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO
|
||||||
|
///
|
||||||
|
/// TODO: Write note on how this strategy only maintains the constraints on values
|
||||||
|
/// for each triplet, but does not consider the sum of triplets
|
||||||
|
pub fn coo_with_duplicates<T>(
|
||||||
|
value_strategy: T,
|
||||||
|
rows: impl Strategy<Value=usize> + 'static,
|
||||||
|
cols: impl Strategy<Value=usize> + 'static,
|
||||||
|
max_nonzeros: usize,
|
||||||
|
max_duplicates: usize)
|
||||||
|
-> impl Strategy<Value=CooMatrix<T::Value>>
|
||||||
|
where
|
||||||
|
T: Strategy + Clone + 'static,
|
||||||
|
T::Value: Scalar,
|
||||||
|
{
|
||||||
|
let coo_strategy = coo_no_duplicates(value_strategy.clone(), rows, cols, max_nonzeros);
|
||||||
|
let duplicate_strategy = vec((any::<Index>(), value_strategy.clone()), 0 ..= max_duplicates);
|
||||||
|
(coo_strategy, duplicate_strategy)
|
||||||
|
.prop_flat_map(|(coo, duplicates)| {
|
||||||
|
let mut triplets: Vec<(usize, usize, T::Value)> = coo.triplet_iter()
|
||||||
|
.map(|(i, j, v)| (i, j, v.clone()))
|
||||||
|
.collect();
|
||||||
|
if !triplets.is_empty() {
|
||||||
|
let duplicates_iter: Vec<_> = duplicates
|
||||||
|
.into_iter()
|
||||||
|
.map(|(idx, val)| {
|
||||||
|
let (i, j, _) = idx.get(&triplets);
|
||||||
|
(*i, *j, val)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
triplets.extend(duplicates_iter);
|
||||||
|
}
|
||||||
|
// Make sure to shuffle so that the duplicates get mixed in with the non-duplicates
|
||||||
|
let shuffled = Just(triplets).prop_shuffle();
|
||||||
|
(Just(coo.nrows()), Just(coo.ncols()), shuffled)
|
||||||
|
})
|
||||||
|
.prop_map(move |(nrows, ncols, triplets)| {
|
||||||
|
let mut coo = CooMatrix::new(nrows, ncols);
|
||||||
|
for (i, j, v) in triplets {
|
||||||
|
coo.push(i, j, v);
|
||||||
|
}
|
||||||
|
coo
|
||||||
|
})
|
||||||
}
|
}
|
|
@ -1,4 +1,7 @@
|
||||||
//! Unit tests
|
//! Unit tests
|
||||||
|
#[cfg(not(feature = "proptest-support"))]
|
||||||
|
compile_error!("Tests must be run with feature proptest-support");
|
||||||
|
|
||||||
mod unit_tests;
|
mod unit_tests;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
|
|
|
@ -3,3 +3,4 @@ mod ops;
|
||||||
mod pattern;
|
mod pattern;
|
||||||
mod csr;
|
mod csr;
|
||||||
mod csc;
|
mod csc;
|
||||||
|
mod proptest;
|
|
@ -0,0 +1,134 @@
|
||||||
|
use nalgebra_sparse::proptest::{coo_with_duplicates, coo_no_duplicates};
|
||||||
|
use nalgebra::DMatrix;
|
||||||
|
|
||||||
|
use proptest::prelude::*;
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::iter::repeat;
|
||||||
|
|
||||||
|
#[cfg(feature = "slow-tests")]
|
||||||
|
use {
|
||||||
|
proptest::test_runner::TestRunner,
|
||||||
|
proptest::strategy::ValueTree
|
||||||
|
};
|
||||||
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
|
#[cfg(feature = "slow-tests")]
|
||||||
|
fn generate_all_possible_matrices(value_range: RangeInclusive<i32>,
|
||||||
|
rows_range: RangeInclusive<usize>,
|
||||||
|
cols_range: RangeInclusive<usize>)
|
||||||
|
-> HashSet<DMatrix<i32>>
|
||||||
|
{
|
||||||
|
// Enumerate all possible combinations
|
||||||
|
let mut all_combinations = HashSet::new();
|
||||||
|
for nrows in rows_range {
|
||||||
|
for ncols in cols_range.clone() {
|
||||||
|
// For the given number of rows and columns
|
||||||
|
let n_values = nrows * ncols;
|
||||||
|
|
||||||
|
if n_values == 0 {
|
||||||
|
// If we have zero rows or columns, the set of matrices with the given
|
||||||
|
// rows and columns is a single element: an empty matrix
|
||||||
|
all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &[]));
|
||||||
|
} else {
|
||||||
|
// Otherwise, we need to sample all possible matrices.
|
||||||
|
// To do this, we generate the values as the (multi) Cartesian product
|
||||||
|
// of the value sets. For example, for a 2x2 matrices, we consider
|
||||||
|
// all possible 4-element arrays that the matrices can take by
|
||||||
|
// considering all elements in the cartesian product
|
||||||
|
// V x V x V x V
|
||||||
|
// where V is the set of eligible values, e.g. V := -1 ..= 1
|
||||||
|
let values_iter = repeat(value_range.clone())
|
||||||
|
.take(n_values)
|
||||||
|
.multi_cartesian_product();
|
||||||
|
for matrix_values in values_iter {
|
||||||
|
all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &matrix_values));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_combinations
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "slow-tests")]
|
||||||
|
#[test]
|
||||||
|
fn coo_no_duplicates_samples_all_admissible_outputs() {
|
||||||
|
// Note: This test basically mirrors a similar test for `matrix` in the `nalgebra` repo.
|
||||||
|
|
||||||
|
// Test that the proptest generation covers all possible outputs for a small space of inputs
|
||||||
|
// given enough samples.
|
||||||
|
|
||||||
|
// We use a deterministic test runner to make the test "stable".
|
||||||
|
let mut runner = TestRunner::deterministic();
|
||||||
|
|
||||||
|
// This number needs to be high enough so that we with high probability sample
|
||||||
|
// all possible cases
|
||||||
|
let num_generated_matrices = 500000;
|
||||||
|
|
||||||
|
let values = -1..=1;
|
||||||
|
let rows = 0..=2;
|
||||||
|
let cols = 0..=3;
|
||||||
|
let strategy = coo_no_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3);
|
||||||
|
|
||||||
|
// Enumerate all possible combinations
|
||||||
|
let all_combinations = generate_all_possible_matrices(values, rows, cols);
|
||||||
|
|
||||||
|
let mut visited_combinations = HashSet::new();
|
||||||
|
for _ in 0..num_generated_matrices {
|
||||||
|
let tree = strategy
|
||||||
|
.new_tree(&mut runner)
|
||||||
|
.expect("Tree generation should not fail");
|
||||||
|
let matrix = tree.current();
|
||||||
|
visited_combinations.insert(DMatrix::from(&matrix));
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(visited_combinations.len(), all_combinations.len());
|
||||||
|
assert_eq!(visited_combinations, all_combinations, "Did not sample all possible values.");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "slow-tests")]
|
||||||
|
#[test]
|
||||||
|
fn coo_with_duplicates_samples_all_admissible_outputs() {
|
||||||
|
// This is almost the same as the test for coo_no_duplicates, except that we need
|
||||||
|
// a different "success" criterion, since coo_with_duplicates is able to generate
|
||||||
|
// matrices with values outside of the value constraints. See below for details.
|
||||||
|
|
||||||
|
// We use a deterministic test runner to make the test "stable".
|
||||||
|
let mut runner = TestRunner::deterministic();
|
||||||
|
|
||||||
|
// This number needs to be high enough so that we with high probability sample
|
||||||
|
// all possible cases
|
||||||
|
let num_generated_matrices = 500000;
|
||||||
|
|
||||||
|
let values = -1..=1;
|
||||||
|
let rows = 0..=2;
|
||||||
|
let cols = 0..=3;
|
||||||
|
let strategy = coo_with_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3, 2);
|
||||||
|
|
||||||
|
// Enumerate all possible combinations that fit the constraints
|
||||||
|
// (note: this is only a subset of the matrices that can be generated by
|
||||||
|
// `coo_with_duplicates`)
|
||||||
|
let all_combinations = generate_all_possible_matrices(values, rows, cols);
|
||||||
|
|
||||||
|
let mut visited_combinations = HashSet::new();
|
||||||
|
for _ in 0..num_generated_matrices {
|
||||||
|
let tree = strategy
|
||||||
|
.new_tree(&mut runner)
|
||||||
|
.expect("Tree generation should not fail");
|
||||||
|
let matrix = tree.current();
|
||||||
|
visited_combinations.insert(DMatrix::from(&matrix));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here we cannot verify that the set of visited combinations is *equal* to
|
||||||
|
// all possible outcomes with the given constraints, however the
|
||||||
|
// strategy should be able to generate all matrices that fit the constraints.
|
||||||
|
// In other words, we need to determine that set of all admissible matrices
|
||||||
|
// is contained in the set of visited matrices
|
||||||
|
assert!(all_combinations.is_subset(&visited_combinations));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn coo_no_duplicates_generates_admissible_matrices() {
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue