Improved CooMatrix proptest strategies

This commit is contained in:
Andreas Longva 2020-11-18 13:54:14 +01:00
parent 46442d6060
commit 7260f05b07
6 changed files with 312 additions and 29 deletions

View File

@ -7,7 +7,13 @@ edition = "2018"
[features]
proptest-support = ["proptest", "nalgebra/proptest"]
# Enable to enable running some tests that take a lot of time to run
slow-tests = []
[dependencies]
nalgebra = { version="0.23", path = "../" }
num-traits = { version = "0.2", default-features = false }
proptest = { version = "0.10", optional = true }
[dev-dependencies]
itertools = "0.9"

View File

@ -37,7 +37,7 @@ use num_traits::Zero;
///
/// // TODO: Convert to CSR
/// ```
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CooMatrix<T> {
nrows: usize,
ncols: usize,

View File

@ -4,38 +4,177 @@
use crate::coo::CooMatrix;
use proptest::prelude::*;
use proptest::collection::{SizeRange, vec};
use proptest::collection::{vec, hash_map};
use nalgebra::Scalar;
use std::cmp::min;
use std::iter::repeat;
use proptest::sample::{Index};
/// TODO
pub fn coo<T>(
value_strategy: T,
rows: impl Strategy<Value=usize> + 'static,
cols: impl Strategy<Value=usize> + 'static,
max_nonzeros: usize) -> BoxedStrategy<CooMatrix<T::Value>>
/// A strategy for generating `nnz` triplets.
///
/// This strategy should generally only be used when `nnz` is close to `nrows * ncols`.
fn dense_triplet_strategy<T>(value_strategy: T,
nrows: usize,
ncols: usize,
nnz: usize)
-> impl Strategy<Value=Vec<(usize, usize, T::Value)>>
where
T: Strategy + Clone + 'static,
T::Value: Scalar,
{
(rows, cols, (0 ..= max_nonzeros))
.prop_flat_map(move |(nrows, ncols, nnz)| {
// If the numbers of rows and columns are small in comparison with the
// max nnz, it will lead to small matrices essentially always turning out to be dense.
// To address this, we correct the nnz by computing the modulo with the
// maximum number of non-zeros (ignoring duplicates) we can have for
// the given dimensions.
// This way we can still generate very sparse matrices for small matrices.
let max_nnz = nrows * ncols;
let nnz = if max_nnz == 0 { 0 } else { nnz % max_nnz };
let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 };
let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 };
let row_indices = vec![row_index_strategy.clone(); nnz];
let col_indices = vec![col_index_strategy.clone(); nnz];
let values_strategy = vec![value_strategy.clone(); nnz];
assert!(nnz <= nrows * ncols);
(Just(nrows), Just(ncols), row_indices, col_indices, values_strategy)
}).prop_map(|(nrows, ncols, row_indices, col_indices, values)| {
CooMatrix::try_from_triplets(nrows, ncols, row_indices, col_indices, values)
.expect("We should always generate valid COO data.")
}).boxed()
// Construct a number of booleans of which exactly `nnz` are true.
let booleans: Vec<_> = repeat(true)
.take(nnz)
.chain(repeat(false))
.take(nrows * ncols)
.collect();
Just(booleans)
// Shuffle the booleans so that they are randomly distributed
.prop_shuffle()
// Convert the booleans into a list of coordinate pairs
.prop_map(move |booleans| {
booleans
.into_iter()
.enumerate()
.filter_map(|(index, is_entry)| {
if is_entry {
// Convert linear index to row/col pair
let i = index / ncols;
let j = index % ncols;
Some((i, j))
} else {
None
}
})
.collect::<Vec<_>>()
})
// Assign values to each coordinate pair in order to generate a list of triplets
.prop_flat_map(move |coords| {
vec![value_strategy.clone(); coords.len()]
.prop_map(move |values| {
coords.clone().into_iter()
.zip(values)
.map(|((i, j), v)| {
(i, j, v)
})
.collect::<Vec<_>>()
})
})
}
/// A strategy for generating `nnz` triplets.
///
/// This strategy should generally only be used when `nnz << nrows * ncols`. If `nnz` is too
/// close to `nrows * ncols` it may fail due to excessive rejected samples.
fn sparse_triplet_strategy<T>(value_strategy: T,
nrows: usize,
ncols: usize,
nnz: usize)
-> impl Strategy<Value=Vec<(usize, usize, T::Value)>>
where
T: Strategy + Clone + 'static,
T::Value: Scalar,
{
// Have to handle the zero case: proptest doesn't like empty ranges (i.e. 0 .. 0)
let row_index_strategy = if nrows > 0 { 0 .. nrows } else { 0 .. 1 };
let col_index_strategy = if ncols > 0 { 0 .. ncols } else { 0 .. 1 };
let coord_strategy = (row_index_strategy, col_index_strategy);
hash_map(coord_strategy, value_strategy.clone(), nnz)
.prop_map(|hash_map| {
let triplets: Vec<_> = hash_map
.into_iter()
.map(|((i, j), v)| (i, j, v))
.collect();
triplets
})
// Although order in the hash map is unspecified, it's not necessarily *random*
// - or, in particular, it does not necessarily sample the whole space of possible outcomes -
// so we additionally shuffle the triplets
.prop_shuffle()
}
/// TODO
pub fn coo_no_duplicates<T>(
value_strategy: T,
rows: impl Strategy<Value=usize> + 'static,
cols: impl Strategy<Value=usize> + 'static,
max_nonzeros: usize) -> impl Strategy<Value=CooMatrix<T::Value>>
where
T: Strategy + Clone + 'static,
T::Value: Scalar,
{
(rows, cols)
.prop_flat_map(move |(nrows, ncols)| {
let max_nonzeros = min(max_nonzeros, nrows * ncols);
let size_range = 0 ..= max_nonzeros;
let value_strategy = value_strategy.clone();
size_range.prop_flat_map(move |nnz| {
let value_strategy = value_strategy.clone();
if nnz as f64 > 0.10 * (nrows as f64) * (ncols as f64) {
// If the number of nnz is sufficiently dense, then use the dense
// sample strategy
dense_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed()
} else {
// Otherwise, use a hash map strategy so that we can get a sparse sampling
// (so that complexity is rather on the order of max_nnz than nrows * ncols)
sparse_triplet_strategy(value_strategy, nrows, ncols, nnz).boxed()
}
})
.prop_map(move |triplets| {
let mut coo = CooMatrix::new(nrows, ncols);
for (i, j, v) in triplets {
coo.push(i, j, v);
}
coo
})
})
}
/// TODO
///
/// TODO: Write note on how this strategy only maintains the constraints on values
/// for each triplet, but does not consider the sum of triplets
pub fn coo_with_duplicates<T>(
value_strategy: T,
rows: impl Strategy<Value=usize> + 'static,
cols: impl Strategy<Value=usize> + 'static,
max_nonzeros: usize,
max_duplicates: usize)
-> impl Strategy<Value=CooMatrix<T::Value>>
where
T: Strategy + Clone + 'static,
T::Value: Scalar,
{
let coo_strategy = coo_no_duplicates(value_strategy.clone(), rows, cols, max_nonzeros);
let duplicate_strategy = vec((any::<Index>(), value_strategy.clone()), 0 ..= max_duplicates);
(coo_strategy, duplicate_strategy)
.prop_flat_map(|(coo, duplicates)| {
let mut triplets: Vec<(usize, usize, T::Value)> = coo.triplet_iter()
.map(|(i, j, v)| (i, j, v.clone()))
.collect();
if !triplets.is_empty() {
let duplicates_iter: Vec<_> = duplicates
.into_iter()
.map(|(idx, val)| {
let (i, j, _) = idx.get(&triplets);
(*i, *j, val)
})
.collect();
triplets.extend(duplicates_iter);
}
// Make sure to shuffle so that the duplicates get mixed in with the non-duplicates
let shuffled = Just(triplets).prop_shuffle();
(Just(coo.nrows()), Just(coo.ncols()), shuffled)
})
.prop_map(move |(nrows, ncols, triplets)| {
let mut coo = CooMatrix::new(nrows, ncols);
for (i, j, v) in triplets {
coo.push(i, j, v);
}
coo
})
}

View File

@ -1,4 +1,7 @@
//! Unit tests
#[cfg(not(feature = "proptest-support"))]
compile_error!("Tests must be run with feature proptest-support");
mod unit_tests;
#[macro_use]

View File

@ -2,4 +2,5 @@ mod coo;
mod ops;
mod pattern;
mod csr;
mod csc;
mod csc;
mod proptest;

View File

@ -0,0 +1,134 @@
use nalgebra_sparse::proptest::{coo_with_duplicates, coo_no_duplicates};
use nalgebra::DMatrix;
use proptest::prelude::*;
use itertools::Itertools;
use std::collections::HashSet;
use std::iter::repeat;
#[cfg(feature = "slow-tests")]
use {
proptest::test_runner::TestRunner,
proptest::strategy::ValueTree
};
use std::ops::RangeInclusive;
#[cfg(feature = "slow-tests")]
fn generate_all_possible_matrices(value_range: RangeInclusive<i32>,
rows_range: RangeInclusive<usize>,
cols_range: RangeInclusive<usize>)
-> HashSet<DMatrix<i32>>
{
// Enumerate all possible combinations
let mut all_combinations = HashSet::new();
for nrows in rows_range {
for ncols in cols_range.clone() {
// For the given number of rows and columns
let n_values = nrows * ncols;
if n_values == 0 {
// If we have zero rows or columns, the set of matrices with the given
// rows and columns is a single element: an empty matrix
all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &[]));
} else {
// Otherwise, we need to sample all possible matrices.
// To do this, we generate the values as the (multi) Cartesian product
// of the value sets. For example, for a 2x2 matrices, we consider
// all possible 4-element arrays that the matrices can take by
// considering all elements in the cartesian product
// V x V x V x V
// where V is the set of eligible values, e.g. V := -1 ..= 1
let values_iter = repeat(value_range.clone())
.take(n_values)
.multi_cartesian_product();
for matrix_values in values_iter {
all_combinations.insert(DMatrix::from_row_slice(nrows, ncols, &matrix_values));
}
}
}
}
all_combinations
}
#[cfg(feature = "slow-tests")]
#[test]
fn coo_no_duplicates_samples_all_admissible_outputs() {
// Note: This test basically mirrors a similar test for `matrix` in the `nalgebra` repo.
// Test that the proptest generation covers all possible outputs for a small space of inputs
// given enough samples.
// We use a deterministic test runner to make the test "stable".
let mut runner = TestRunner::deterministic();
// This number needs to be high enough so that we with high probability sample
// all possible cases
let num_generated_matrices = 500000;
let values = -1..=1;
let rows = 0..=2;
let cols = 0..=3;
let strategy = coo_no_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3);
// Enumerate all possible combinations
let all_combinations = generate_all_possible_matrices(values, rows, cols);
let mut visited_combinations = HashSet::new();
for _ in 0..num_generated_matrices {
let tree = strategy
.new_tree(&mut runner)
.expect("Tree generation should not fail");
let matrix = tree.current();
visited_combinations.insert(DMatrix::from(&matrix));
}
assert_eq!(visited_combinations.len(), all_combinations.len());
assert_eq!(visited_combinations, all_combinations, "Did not sample all possible values.");
}
#[cfg(feature = "slow-tests")]
#[test]
fn coo_with_duplicates_samples_all_admissible_outputs() {
// This is almost the same as the test for coo_no_duplicates, except that we need
// a different "success" criterion, since coo_with_duplicates is able to generate
// matrices with values outside of the value constraints. See below for details.
// We use a deterministic test runner to make the test "stable".
let mut runner = TestRunner::deterministic();
// This number needs to be high enough so that we with high probability sample
// all possible cases
let num_generated_matrices = 500000;
let values = -1..=1;
let rows = 0..=2;
let cols = 0..=3;
let strategy = coo_with_duplicates(values.clone(), rows.clone(), cols.clone(), 2 * 3, 2);
// Enumerate all possible combinations that fit the constraints
// (note: this is only a subset of the matrices that can be generated by
// `coo_with_duplicates`)
let all_combinations = generate_all_possible_matrices(values, rows, cols);
let mut visited_combinations = HashSet::new();
for _ in 0..num_generated_matrices {
let tree = strategy
.new_tree(&mut runner)
.expect("Tree generation should not fail");
let matrix = tree.current();
visited_combinations.insert(DMatrix::from(&matrix));
}
// Here we cannot verify that the set of visited combinations is *equal* to
// all possible outcomes with the given constraints, however the
// strategy should be able to generate all matrices that fit the constraints.
// In other words, we need to determine that set of all admissible matrices
// is contained in the set of visited matrices
assert!(all_combinations.is_subset(&visited_combinations));
}
#[test]
fn coo_no_duplicates_generates_admissible_matrices() {
}