Merge pull request #1000 from aarsenij/csr_csc_constructor_for_unsorte_valid_data

CSR: Provide constructor for unsorted but otherwise valid data
This commit is contained in:
Sébastien Crozet 2021-10-22 10:36:11 +02:00 committed by GitHub
commit 496969bf62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 239 additions and 26 deletions

View File

@ -10,6 +10,7 @@ use crate::{SparseEntry, SparseEntryMut, SparseFormatError, SparseFormatErrorKin
use nalgebra::Scalar;
use num_traits::One;
use std::iter::FromIterator;
use std::slice::{Iter, IterMut};
/// A CSR representation of a sparse matrix.
@ -170,6 +171,77 @@ impl<T> CsrMatrix<T> {
Self::try_from_pattern_and_values(pattern, values)
}
/// Try to construct a CSR matrix from raw CSR data with unsorted column indices.
///
/// It is assumed that each row contains unique column indices that are in
/// bounds with respect to the number of columns in the matrix. If this is not the case,
/// an error is returned to indicate the failure.
///
/// An error is returned if the data given does not conform to the CSR storage format
/// with the exception of having unsorted column indices and values.
/// See the documentation for [CsrMatrix](struct.CsrMatrix.html) for more information.
pub fn try_from_unsorted_csr_data(
num_rows: usize,
num_cols: usize,
row_offsets: Vec<usize>,
col_indices: Vec<usize>,
values: Vec<T>,
) -> Result<Self, SparseFormatError>
where
T: Scalar,
{
use SparsityPatternFormatError::*;
let count = col_indices.len();
let mut p: Vec<usize> = (0..count).collect();
if col_indices.len() != values.len() {
return Err(SparseFormatError::from_kind_and_msg(
SparseFormatErrorKind::InvalidStructure,
"Number of values and column indices must be the same",
));
}
if row_offsets.len() == 0 {
return Err(SparseFormatError::from_kind_and_msg(
SparseFormatErrorKind::InvalidStructure,
"Number of offsets should be greater than 0",
));
}
for (index, &offset) in row_offsets[0..row_offsets.len() - 1].iter().enumerate() {
let next_offset = row_offsets[index + 1];
if next_offset > count {
return Err(SparseFormatError::from_kind_and_msg(
SparseFormatErrorKind::InvalidStructure,
"No row offset should be greater than the number of column indices",
));
}
if offset > next_offset {
return Err(NonmonotonicOffsets).map_err(pattern_format_error_to_csr_error);
}
p[offset..next_offset].sort_by(|a, b| {
let x = &col_indices[*a];
let y = &col_indices[*b];
x.partial_cmp(y).unwrap()
});
}
// permute indices
let sorted_col_indices: Vec<usize> =
Vec::from_iter((p.iter().map(|i| &col_indices[*i])).cloned());
// permute values
let sorted_values: Vec<T> = Vec::from_iter((p.iter().map(|i| &values[*i])).cloned());
return Self::try_from_csr_data(
num_rows,
num_cols,
row_offsets,
sorted_col_indices,
sorted_values,
);
}
/// Try to construct a CSR matrix from a sparsity pattern and associated non-zero values.
///
/// Returns an error if the number of values does not match the number of minor indices

View File

@ -5,6 +5,8 @@ use nalgebra_sparse::{SparseEntry, SparseEntryMut, SparseFormatErrorKind};
use proptest::prelude::*;
use proptest::sample::subsequence;
use super::test_data_examples::InvalidCsrDataExamples;
use crate::assert_panics;
use crate::common::csr_strategy;
@ -171,11 +173,36 @@ fn csr_matrix_valid_data() {
}
}
#[test]
fn csr_matrix_valid_data_unsorted_column_indices() {
let csr = CsrMatrix::try_from_unsorted_csr_data(
4,
5,
vec![0, 3, 5, 8, 11],
vec![4, 1, 3, 3, 1, 2, 3, 0, 3, 4, 1],
vec![5, 1, 4, 7, 4, 2, 3, 1, 8, 9, 6],
)
.unwrap();
let expected_csr = CsrMatrix::try_from_csr_data(
4,
5,
vec![0, 3, 5, 8, 11],
vec![1, 3, 4, 1, 3, 0, 2, 3, 1, 3, 4],
vec![1, 4, 5, 4, 7, 1, 2, 3, 6, 8, 9],
)
.unwrap();
assert_eq!(csr, expected_csr);
}
#[test]
fn csr_matrix_try_from_invalid_csr_data() {
let invalid_data: InvalidCsrDataExamples = InvalidCsrDataExamples::new();
{
// Empty offset array (invalid length)
let matrix = CsrMatrix::try_from_csr_data(0, 0, Vec::new(), Vec::new(), Vec::<u32>::new());
let (offsets, indices, values) = invalid_data.empty_offset_array;
let matrix = CsrMatrix::try_from_csr_data(0, 0, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
@ -184,10 +211,8 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Offset array invalid length for arbitrary data
let offsets = vec![0, 3, 5];
let indices = vec![0, 1, 2, 3, 5];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) =
invalid_data.offset_array_invalid_length_for_arbitrary_data;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -197,9 +222,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Invalid first entry in offsets array
let offsets = vec![1, 2, 2, 5];
let indices = vec![0, 5, 1, 2, 3];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.invalid_first_entry_in_offsets_array;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -209,9 +232,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Invalid last entry in offsets array
let offsets = vec![0, 2, 2, 4];
let indices = vec![0, 5, 1, 2, 3];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.invalid_last_entry_in_offsets_array;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -221,9 +242,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Invalid length of offsets array
let offsets = vec![0, 2, 2];
let indices = vec![0, 5, 1, 2, 3];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.invalid_length_of_offsets_array;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -233,9 +252,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Nonmonotonic offsets
let offsets = vec![0, 3, 2, 5];
let indices = vec![0, 1, 2, 3, 4];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.nonmonotonic_offsets;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -245,9 +262,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Nonmonotonic minor indices
let offsets = vec![0, 2, 2, 5];
let indices = vec![0, 2, 3, 1, 4];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.nonmonotonic_minor_indices;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -257,9 +272,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Minor index out of bounds
let offsets = vec![0, 2, 2, 5];
let indices = vec![0, 6, 1, 2, 3];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.minor_index_out_of_bounds;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -269,9 +282,7 @@ fn csr_matrix_try_from_invalid_csr_data() {
{
// Duplicate entry
let offsets = vec![0, 2, 2, 5];
let indices = vec![0, 5, 2, 2, 3];
let values = vec![0, 1, 2, 3, 4];
let (offsets, indices, values) = invalid_data.duplicate_entry;
let matrix = CsrMatrix::try_from_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
@ -280,6 +291,91 @@ fn csr_matrix_try_from_invalid_csr_data() {
}
}
#[test]
fn csr_matrix_try_from_unsorted_invalid_csr_data() {
let invalid_data: InvalidCsrDataExamples = InvalidCsrDataExamples::new();
{
// Empty offset array (invalid length)
let (offsets, indices, values) = invalid_data.empty_offset_array;
let matrix = CsrMatrix::try_from_unsorted_csr_data(0, 0, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
);
}
{
// Offset array invalid length for arbitrary data
let (offsets, indices, values) =
invalid_data.offset_array_invalid_length_for_arbitrary_data;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
);
}
{
// Invalid first entry in offsets array
let (offsets, indices, values) = invalid_data.invalid_first_entry_in_offsets_array;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
);
}
{
// Invalid last entry in offsets array
let (offsets, indices, values) = invalid_data.invalid_last_entry_in_offsets_array;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
);
}
{
// Invalid length of offsets array
let (offsets, indices, values) = invalid_data.invalid_length_of_offsets_array;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
);
}
{
// Nonmonotonic offsets
let (offsets, indices, values) = invalid_data.nonmonotonic_offsets;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::InvalidStructure
);
}
{
// Minor index out of bounds
let (offsets, indices, values) = invalid_data.minor_index_out_of_bounds;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::IndexOutOfBounds
);
}
{
// Duplicate entry
let (offsets, indices, values) = invalid_data.duplicate_entry;
let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values);
assert_eq!(
matrix.unwrap_err().kind(),
&SparseFormatErrorKind::DuplicateEntry
);
}
}
#[test]
fn csr_disassemble_avoids_clone_when_owned() {
// Test that disassemble avoids cloning the sparsity pattern when it holds the sole reference

View File

@ -6,3 +6,4 @@ mod csr;
mod ops;
mod pattern;
mod proptest;
mod test_data_examples;

View File

@ -0,0 +1,44 @@
/// Examples of *invalid* raw CSR data `(offsets, indices, values)`.
pub struct InvalidCsrDataExamples {
pub empty_offset_array: (Vec<usize>, Vec<usize>, Vec<i32>),
pub offset_array_invalid_length_for_arbitrary_data: (Vec<usize>, Vec<usize>, Vec<i32>),
pub invalid_first_entry_in_offsets_array: (Vec<usize>, Vec<usize>, Vec<i32>),
pub invalid_last_entry_in_offsets_array: (Vec<usize>, Vec<usize>, Vec<i32>),
pub invalid_length_of_offsets_array: (Vec<usize>, Vec<usize>, Vec<i32>),
pub nonmonotonic_offsets: (Vec<usize>, Vec<usize>, Vec<i32>),
pub nonmonotonic_minor_indices: (Vec<usize>, Vec<usize>, Vec<i32>),
pub minor_index_out_of_bounds: (Vec<usize>, Vec<usize>, Vec<i32>),
pub duplicate_entry: (Vec<usize>, Vec<usize>, Vec<i32>),
}
impl InvalidCsrDataExamples {
pub fn new() -> Self {
let empty_offset_array = (Vec::<usize>::new(), Vec::<usize>::new(), Vec::<i32>::new());
let offset_array_invalid_length_for_arbitrary_data =
(vec![0, 3, 5], vec![0, 1, 2, 3, 5], vec![0, 1, 2, 3, 4]);
let invalid_first_entry_in_offsets_array =
(vec![1, 2, 2, 5], vec![0, 5, 1, 2, 3], vec![0, 1, 2, 3, 4]);
let invalid_last_entry_in_offsets_array =
(vec![0, 2, 2, 4], vec![0, 5, 1, 2, 3], vec![0, 1, 2, 3, 4]);
let invalid_length_of_offsets_array =
(vec![0, 2, 2], vec![0, 5, 1, 2, 3], vec![0, 1, 2, 3, 4]);
let nonmonotonic_offsets = (vec![0, 3, 2, 5], vec![0, 1, 2, 3, 4], vec![0, 1, 2, 3, 4]);
let nonmonotonic_minor_indices =
(vec![0, 2, 2, 5], vec![0, 2, 3, 1, 4], vec![0, 1, 2, 3, 4]);
let minor_index_out_of_bounds =
(vec![0, 2, 2, 5], vec![0, 6, 1, 2, 3], vec![0, 1, 2, 3, 4]);
let duplicate_entry = (vec![0, 2, 2, 5], vec![0, 5, 2, 2, 3], vec![0, 1, 2, 3, 4]);
return Self {
empty_offset_array,
offset_array_invalid_length_for_arbitrary_data,
invalid_first_entry_in_offsets_array,
invalid_last_entry_in_offsets_array,
invalid_length_of_offsets_array,
nonmonotonic_minor_indices,
nonmonotonic_offsets,
minor_index_out_of_bounds,
duplicate_entry,
};
}
}