From 757b99e8439c15fa9901c7ffb695da4511a2e60f Mon Sep 17 00:00:00 2001 From: Anton Arsenij <11193727+aarsenij@users.noreply.github.com> Date: Thu, 3 Mar 2022 10:14:16 +0100 Subject: [PATCH 1/4] CSC: Create constructor for unsorted but otherwise valid data (#1015) * CSC: Create constructor for unsorted but otherwise valid data * Test creating csc matrix from unsorted but valid data * Add function for validation and sorting * Move validation function to cs.rs * Restore pattern unit test * Add unit test for 'major offset out of bounds' case * Avoid permutation allocations on 'happy path' * Reuse allocated permutation * Fix comments for test-data examples * Remove unnecessary iter variable * Set up buffers for sorting up front * Use common apply_permutation function * Use common compute_sort_permutation function * Move unsafe down to unchecked call * Add panic cases to documentation * Remove unnecessary Zero bound * Move buffer set up away from loop * Lift T::Zero from cs.rs * Improve checking if values are provided * Simplify copying from slices & add test for wrong values length * Check duplicates after sorting * Fix formatting * Check values length at the beginning * Check length of values if values != None --- nalgebra-sparse/src/convert/serial.rs | 20 +-- nalgebra-sparse/src/cs.rs | 151 +++++++++++++++- nalgebra-sparse/src/csc.rs | 45 +++++ nalgebra-sparse/src/csr.rs | 74 +++----- nalgebra-sparse/src/lib.rs | 1 + nalgebra-sparse/src/pattern.rs | 29 ++++ nalgebra-sparse/src/utils.rs | 26 +++ nalgebra-sparse/tests/unit_tests/csc.rs | 164 +++++++++++++++--- nalgebra-sparse/tests/unit_tests/csr.rs | 58 +++++-- .../tests/unit_tests/test_data_examples.rs | 44 ++++- 10 files changed, 497 insertions(+), 115 deletions(-) create mode 100644 nalgebra-sparse/src/utils.rs diff --git a/nalgebra-sparse/src/convert/serial.rs b/nalgebra-sparse/src/convert/serial.rs index ecbe1dab..50fc50e4 100644 --- a/nalgebra-sparse/src/convert/serial.rs +++ b/nalgebra-sparse/src/convert/serial.rs @@ -14,6 +14,7 @@ use crate::coo::CooMatrix; use crate::cs; use crate::csc::CscMatrix; use crate::csr::CsrMatrix; +use crate::utils::{apply_permutation, compute_sort_permutation}; /// Converts a dense matrix to [`CooMatrix`]. pub fn convert_dense_coo(dense: &Matrix) -> CooMatrix @@ -376,29 +377,12 @@ fn sort_lane( assert_eq!(values.len(), workspace.len()); let permutation = workspace; - // Set permutation to identity - for (i, p) in permutation.iter_mut().enumerate() { - *p = i; - } - - // Compute permutation needed to bring minor indices into sorted order - // Note: Using sort_unstable here avoids internal allocations, which is crucial since - // each lane might have a small number of elements - permutation.sort_unstable_by_key(|idx| minor_idx[*idx]); + compute_sort_permutation(permutation, minor_idx); apply_permutation(minor_idx_result, minor_idx, permutation); apply_permutation(values_result, values, permutation); } -// TODO: Move this into `utils` or something? -fn apply_permutation(out_slice: &mut [T], in_slice: &[T], permutation: &[usize]) { - assert_eq!(out_slice.len(), in_slice.len()); - assert_eq!(out_slice.len(), permutation.len()); - for (out_element, old_pos) in out_slice.iter_mut().zip(permutation) { - *out_element = in_slice[*old_pos].clone(); - } -} - /// Given *sorted* indices and corresponding scalar values, combines duplicates with the given /// associative combiner and calls the provided produce methods with combined indices and values. fn combine_duplicates( diff --git a/nalgebra-sparse/src/cs.rs b/nalgebra-sparse/src/cs.rs index cffdd6c7..474eb2c0 100644 --- a/nalgebra-sparse/src/cs.rs +++ b/nalgebra-sparse/src/cs.rs @@ -6,7 +6,8 @@ use num_traits::One; use nalgebra::Scalar; use crate::pattern::SparsityPattern; -use crate::{SparseEntry, SparseEntryMut}; +use crate::utils::{apply_permutation, compute_sort_permutation}; +use crate::{SparseEntry, SparseEntryMut, SparseFormatError, SparseFormatErrorKind}; /// An abstract compressed matrix. /// @@ -543,3 +544,151 @@ pub fn convert_counts_to_offsets(counts: &mut [usize]) { offset += count; } } + +/// Validates cs data, optionally sorts minor indices and values +pub(crate) fn validate_and_optionally_sort_cs_data( + major_dim: usize, + minor_dim: usize, + major_offsets: &[usize], + minor_indices: &mut [usize], + values: Option<&mut [T]>, + sort: bool, +) -> Result<(), SparseFormatError> +where + T: Scalar, +{ + let mut values_option = values; + + if let Some(values) = values_option.as_mut() { + if minor_indices.len() != values.len() { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::InvalidStructure, + "Number of values and minor indices must be the same.", + )); + } + } else if sort { + unreachable!("Internal error: Sorting currently not supported if no values are present."); + } + if major_offsets.len() == 0 { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::InvalidStructure, + "Number of offsets should be greater than 0.", + )); + } + if major_offsets.len() != major_dim + 1 { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::InvalidStructure, + "Length of offset array is not equal to (major_dim + 1).", + )); + } + + // Check that the first and last offsets conform to the specification + { + let first_offset_ok = *major_offsets.first().unwrap() == 0; + let last_offset_ok = *major_offsets.last().unwrap() == minor_indices.len(); + if !first_offset_ok || !last_offset_ok { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::InvalidStructure, + "First or last offset is incompatible with format.", + )); + } + } + + // Set up required buffers up front + let mut minor_idx_buffer: Vec = Vec::new(); + let mut values_buffer: Vec = Vec::new(); + let mut minor_index_permutation: Vec = Vec::new(); + + // Test that each lane has strictly monotonically increasing minor indices, i.e. + // minor indices within a lane are sorted, unique. Sort minor indices within a lane if needed. + // In addition, each minor index must be in bounds with respect to the minor dimension. + { + for lane_idx in 0..major_dim { + let range_start = major_offsets[lane_idx]; + let range_end = major_offsets[lane_idx + 1]; + + // Test that major offsets are monotonically increasing + if range_start > range_end { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::InvalidStructure, + "Offsets are not monotonically increasing.", + )); + } + + let minor_idx_in_lane = minor_indices.get(range_start..range_end).ok_or( + SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::IndexOutOfBounds, + "A major offset is out of bounds.", + ), + )?; + + // We test for in-bounds, uniqueness and monotonicity at the same time + // to ensure that we only visit each minor index once + let mut prev = None; + let mut monotonic = true; + + for &minor_idx in minor_idx_in_lane { + if minor_idx >= minor_dim { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::IndexOutOfBounds, + "A minor index is out of bounds.", + )); + } + + if let Some(prev) = prev { + if prev >= minor_idx { + if !sort { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::InvalidStructure, + "Minor indices are not strictly monotonically increasing in each lane.", + )); + } + monotonic = false; + } + } + prev = Some(minor_idx); + } + + // sort if indices are not monotonic and sorting is expected + if !monotonic && sort { + let range_size = range_end - range_start; + minor_index_permutation.resize(range_size, 0); + compute_sort_permutation(&mut minor_index_permutation, &minor_idx_in_lane); + minor_idx_buffer.clear(); + minor_idx_buffer.extend_from_slice(&minor_idx_in_lane); + apply_permutation( + &mut minor_indices[range_start..range_end], + &minor_idx_buffer, + &minor_index_permutation, + ); + + // check duplicates + prev = None; + for &minor_idx in &minor_indices[range_start..range_end] { + if let Some(prev) = prev { + if prev == minor_idx { + return Err(SparseFormatError::from_kind_and_msg( + SparseFormatErrorKind::DuplicateEntry, + "Input data contains duplicate entries.", + )); + } + } + prev = Some(minor_idx); + } + + // sort values if they exist + if let Some(values) = values_option.as_mut() { + values_buffer.clear(); + values_buffer.extend_from_slice(&values[range_start..range_end]); + apply_permutation( + &mut values[range_start..range_end], + &values_buffer, + &minor_index_permutation, + ); + } + } + } + } + + Ok(()) +} diff --git a/nalgebra-sparse/src/csc.rs b/nalgebra-sparse/src/csc.rs index a461658e..d926dafb 100644 --- a/nalgebra-sparse/src/csc.rs +++ b/nalgebra-sparse/src/csc.rs @@ -6,6 +6,7 @@ #[cfg(feature = "serde-serialize")] mod csc_serde; +use crate::cs; use crate::cs::{CsLane, CsLaneIter, CsLaneIterMut, CsLaneMut, CsMatrix}; use crate::csr::CsrMatrix; use crate::pattern::{SparsityPattern, SparsityPatternFormatError, SparsityPatternIter}; @@ -173,6 +174,50 @@ impl CscMatrix { Self::try_from_pattern_and_values(pattern, values) } + /// Try to construct a CSC matrix from raw CSC data with unsorted row indices. + /// + /// It is assumed that each column contains unique row indices that are in + /// bounds with respect to the number of rows in the matrix. If this is not the case, + /// an error is returned to indicate the failure. + /// + /// An error is returned if the data given does not conform to the CSC storage format + /// with the exception of having unsorted row indices and values. + /// See the documentation for [CscMatrix](struct.CscMatrix.html) for more information. + pub fn try_from_unsorted_csc_data( + num_rows: usize, + num_cols: usize, + col_offsets: Vec, + mut row_indices: Vec, + mut values: Vec, + ) -> Result + where + T: Scalar, + { + let result = cs::validate_and_optionally_sort_cs_data( + num_cols, + num_rows, + &col_offsets, + &mut row_indices, + Some(&mut values), + true, + ); + + match result { + Ok(()) => { + let pattern = unsafe { + SparsityPattern::from_offset_and_indices_unchecked( + num_cols, + num_rows, + col_offsets, + row_indices, + ) + }; + Self::try_from_pattern_and_values(pattern, values) + } + Err(err) => Err(err), + } + } + /// Try to construct a CSC matrix from a sparsity pattern and associated non-zero values. /// /// Returns an error if the number of values does not match the number of minor indices diff --git a/nalgebra-sparse/src/csr.rs b/nalgebra-sparse/src/csr.rs index bd35bf6b..90be35f1 100644 --- a/nalgebra-sparse/src/csr.rs +++ b/nalgebra-sparse/src/csr.rs @@ -6,6 +6,7 @@ #[cfg(feature = "serde-serialize")] mod csr_serde; +use crate::cs; use crate::cs::{CsLane, CsLaneIter, CsLaneIterMut, CsLaneMut, CsMatrix}; use crate::csc::CscMatrix; use crate::pattern::{SparsityPattern, SparsityPatternFormatError, SparsityPatternIter}; @@ -13,7 +14,7 @@ use crate::{SparseEntry, SparseEntryMut, SparseFormatError, SparseFormatErrorKin use nalgebra::Scalar; use num_traits::One; -use std::iter::FromIterator; + use std::slice::{Iter, IterMut}; /// A CSR representation of a sparse matrix. @@ -187,62 +188,35 @@ impl CsrMatrix { num_rows: usize, num_cols: usize, row_offsets: Vec, - col_indices: Vec, - values: Vec, + mut col_indices: Vec, + mut values: Vec, ) -> Result where T: Scalar, { - use SparsityPatternFormatError::*; - let count = col_indices.len(); - let mut p: Vec = (0..count).collect(); - - if col_indices.len() != values.len() { - return Err(SparseFormatError::from_kind_and_msg( - SparseFormatErrorKind::InvalidStructure, - "Number of values and column indices must be the same", - )); - } - - if row_offsets.len() == 0 { - return Err(SparseFormatError::from_kind_and_msg( - SparseFormatErrorKind::InvalidStructure, - "Number of offsets should be greater than 0", - )); - } - - for (index, &offset) in row_offsets[0..row_offsets.len() - 1].iter().enumerate() { - let next_offset = row_offsets[index + 1]; - if next_offset > count { - return Err(SparseFormatError::from_kind_and_msg( - SparseFormatErrorKind::InvalidStructure, - "No row offset should be greater than the number of column indices", - )); - } - if offset > next_offset { - return Err(NonmonotonicOffsets).map_err(pattern_format_error_to_csr_error); - } - p[offset..next_offset].sort_by(|a, b| { - let x = &col_indices[*a]; - let y = &col_indices[*b]; - x.partial_cmp(y).unwrap() - }); - } - - // permute indices - let sorted_col_indices: Vec = - Vec::from_iter((p.iter().map(|i| &col_indices[*i])).cloned()); - - // permute values - let sorted_values: Vec = Vec::from_iter((p.iter().map(|i| &values[*i])).cloned()); - - return Self::try_from_csr_data( + let result = cs::validate_and_optionally_sort_cs_data( num_rows, num_cols, - row_offsets, - sorted_col_indices, - sorted_values, + &row_offsets, + &mut col_indices, + Some(&mut values), + true, ); + + match result { + Ok(()) => { + let pattern = unsafe { + SparsityPattern::from_offset_and_indices_unchecked( + num_rows, + num_cols, + row_offsets, + col_indices, + ) + }; + Self::try_from_pattern_and_values(pattern, values) + } + Err(err) => Err(err), + } } /// Try to construct a CSR matrix from a sparsity pattern and associated non-zero values. diff --git a/nalgebra-sparse/src/lib.rs b/nalgebra-sparse/src/lib.rs index edbf83bd..8567261a 100644 --- a/nalgebra-sparse/src/lib.rs +++ b/nalgebra-sparse/src/lib.rs @@ -160,6 +160,7 @@ pub mod ops; pub mod pattern; pub(crate) mod cs; +pub(crate) mod utils; #[cfg(feature = "proptest-support")] pub mod proptest; diff --git a/nalgebra-sparse/src/pattern.rs b/nalgebra-sparse/src/pattern.rs index 50fae072..c51945b7 100644 --- a/nalgebra-sparse/src/pattern.rs +++ b/nalgebra-sparse/src/pattern.rs @@ -188,6 +188,35 @@ impl SparsityPattern { }) } + /// Try to construct a sparsity pattern from the given dimensions, major offsets + /// and minor indices. + /// + /// # Panics + /// + /// Panics if the number of major offsets is not exactly one greater than the major dimension + /// or if major offsets do not start with 0 and end with the number of minor indices. + pub unsafe fn from_offset_and_indices_unchecked( + major_dim: usize, + minor_dim: usize, + major_offsets: Vec, + minor_indices: Vec, + ) -> Self { + assert_eq!(major_offsets.len(), major_dim + 1); + + // Check that the first and last offsets conform to the specification + { + let first_offset_ok = *major_offsets.first().unwrap() == 0; + let last_offset_ok = *major_offsets.last().unwrap() == minor_indices.len(); + assert!(first_offset_ok && last_offset_ok); + } + + Self { + major_offsets, + minor_indices, + minor_dim, + } + } + /// An iterator over the explicitly stored "non-zero" entries (i, j). /// /// The iteration happens in a lane-major fashion, meaning that the lane index i diff --git a/nalgebra-sparse/src/utils.rs b/nalgebra-sparse/src/utils.rs new file mode 100644 index 00000000..73d4e967 --- /dev/null +++ b/nalgebra-sparse/src/utils.rs @@ -0,0 +1,26 @@ +//! Helper functions for sparse matrix computations + +/// permutes entries of in_slice according to permutation slice and puts them to out_slice +#[inline] +pub fn apply_permutation(out_slice: &mut [T], in_slice: &[T], permutation: &[usize]) { + assert_eq!(out_slice.len(), in_slice.len()); + assert_eq!(out_slice.len(), permutation.len()); + for (out_element, old_pos) in out_slice.iter_mut().zip(permutation) { + *out_element = in_slice[*old_pos].clone(); + } +} + +/// computes permutation by using provided indices as keys +#[inline] +pub fn compute_sort_permutation(permutation: &mut [usize], indices: &[usize]) { + assert_eq!(permutation.len(), indices.len()); + // Set permutation to identity + for (i, p) in permutation.iter_mut().enumerate() { + *p = i; + } + + // Compute permutation needed to bring minor indices into sorted order + // Note: Using sort_unstable here avoids internal allocations, which is crucial since + // each lane might have a small number of elements + permutation.sort_unstable_by_key(|idx| indices[*idx]); +} diff --git a/nalgebra-sparse/tests/unit_tests/csc.rs b/nalgebra-sparse/tests/unit_tests/csc.rs index 7fb0de54..1554b8a6 100644 --- a/nalgebra-sparse/tests/unit_tests/csc.rs +++ b/nalgebra-sparse/tests/unit_tests/csc.rs @@ -5,6 +5,8 @@ use nalgebra_sparse::{SparseEntry, SparseEntryMut, SparseFormatErrorKind}; use proptest::prelude::*; use proptest::sample::subsequence; +use super::test_data_examples::{InvalidCsDataExamples, ValidCsDataExamples}; + use crate::assert_panics; use crate::common::csc_strategy; @@ -171,11 +173,26 @@ fn csc_matrix_valid_data() { } } +#[test] +fn csc_matrix_valid_data_unsorted_column_indices() { + let valid_data: ValidCsDataExamples = ValidCsDataExamples::new(); + + let (offsets, indices, values) = valid_data.valid_unsorted_cs_data; + let csc = CscMatrix::try_from_unsorted_csc_data(5, 4, offsets, indices, values).unwrap(); + + let (offsets2, indices2, values2) = valid_data.valid_cs_data; + let expected_csc = CscMatrix::try_from_csc_data(5, 4, offsets2, indices2, values2).unwrap(); + + assert_eq!(csc, expected_csc); +} + #[test] fn csc_matrix_try_from_invalid_csc_data() { + let invalid_data: InvalidCsDataExamples = InvalidCsDataExamples::new(); { // Empty offset array (invalid length) - let matrix = CscMatrix::try_from_csc_data(0, 0, Vec::new(), Vec::new(), Vec::::new()); + let (offsets, indices, values) = invalid_data.empty_offset_array; + let matrix = CscMatrix::try_from_csc_data(0, 0, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), &SparseFormatErrorKind::InvalidStructure @@ -184,10 +201,8 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Offset array invalid length for arbitrary data - let offsets = vec![0, 3, 5]; - let indices = vec![0, 1, 2, 3, 5]; - let values = vec![0, 1, 2, 3, 4]; - + let (offsets, indices, values) = + invalid_data.offset_array_invalid_length_for_arbitrary_data; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -197,9 +212,7 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Invalid first entry in offsets array - let offsets = vec![1, 2, 2, 5]; - let indices = vec![0, 5, 1, 2, 3]; - let values = vec![0, 1, 2, 3, 4]; + let (offsets, indices, values) = invalid_data.invalid_first_entry_in_offsets_array; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -209,9 +222,7 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Invalid last entry in offsets array - let offsets = vec![0, 2, 2, 4]; - let indices = vec![0, 5, 1, 2, 3]; - let values = vec![0, 1, 2, 3, 4]; + let (offsets, indices, values) = invalid_data.invalid_last_entry_in_offsets_array; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -221,9 +232,7 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Invalid length of offsets array - let offsets = vec![0, 2, 2]; - let indices = vec![0, 5, 1, 2, 3]; - let values = vec![0, 1, 2, 3, 4]; + let (offsets, indices, values) = invalid_data.invalid_length_of_offsets_array; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -233,9 +242,7 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Nonmonotonic offsets - let offsets = vec![0, 3, 2, 5]; - let indices = vec![0, 1, 2, 3, 4]; - let values = vec![0, 1, 2, 3, 4]; + let (offsets, indices, values) = invalid_data.nonmonotonic_offsets; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -257,9 +264,7 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Minor index out of bounds - let offsets = vec![0, 2, 2, 5]; - let indices = vec![0, 6, 1, 2, 3]; - let values = vec![0, 1, 2, 3, 4]; + let (offsets, indices, values) = invalid_data.minor_index_out_of_bounds; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -269,9 +274,7 @@ fn csc_matrix_try_from_invalid_csc_data() { { // Duplicate entry - let offsets = vec![0, 2, 2, 5]; - let indices = vec![0, 5, 2, 2, 3]; - let values = vec![0, 1, 2, 3, 4]; + let (offsets, indices, values) = invalid_data.duplicate_entry; let matrix = CscMatrix::try_from_csc_data(6, 3, offsets, indices, values); assert_eq!( matrix.unwrap_err().kind(), @@ -280,6 +283,121 @@ fn csc_matrix_try_from_invalid_csc_data() { } } +#[test] +fn csc_matrix_try_from_unsorted_invalid_csc_data() { + let invalid_data: InvalidCsDataExamples = InvalidCsDataExamples::new(); + { + // Empty offset array (invalid length) + let (offsets, indices, values) = invalid_data.empty_offset_array; + let matrix = CscMatrix::try_from_unsorted_csc_data(0, 0, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } + + { + // Offset array invalid length for arbitrary data + let (offsets, indices, values) = + invalid_data.offset_array_invalid_length_for_arbitrary_data; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } + + { + // Invalid first entry in offsets array + let (offsets, indices, values) = invalid_data.invalid_first_entry_in_offsets_array; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } + + { + // Invalid last entry in offsets array + let (offsets, indices, values) = invalid_data.invalid_last_entry_in_offsets_array; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } + + { + // Invalid length of offsets array + let (offsets, indices, values) = invalid_data.invalid_length_of_offsets_array; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } + + { + // Nonmonotonic offsets + let (offsets, indices, values) = invalid_data.nonmonotonic_offsets; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } + + { + // Major offset out of bounds + let (offsets, indices, values) = invalid_data.major_offset_out_of_bounds; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::IndexOutOfBounds + ); + } + + { + // Minor index out of bounds + let (offsets, indices, values) = invalid_data.minor_index_out_of_bounds; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::IndexOutOfBounds + ); + } + + { + // Duplicate entry + let (offsets, indices, values) = invalid_data.duplicate_entry; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::DuplicateEntry + ); + } + + { + // Duplicate entry in unsorted lane + let (offsets, indices, values) = invalid_data.duplicate_entry_unsorted; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::DuplicateEntry + ); + } + + { + // Wrong values length + let (offsets, indices, values) = invalid_data.wrong_values_length; + let matrix = CscMatrix::try_from_unsorted_csc_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } +} + #[test] fn csc_disassemble_avoids_clone_when_owned() { // Test that disassemble avoids cloning the sparsity pattern when it holds the sole reference diff --git a/nalgebra-sparse/tests/unit_tests/csr.rs b/nalgebra-sparse/tests/unit_tests/csr.rs index 3ca2f0dc..a00470d5 100644 --- a/nalgebra-sparse/tests/unit_tests/csr.rs +++ b/nalgebra-sparse/tests/unit_tests/csr.rs @@ -5,7 +5,7 @@ use nalgebra_sparse::{SparseEntry, SparseEntryMut, SparseFormatErrorKind}; use proptest::prelude::*; use proptest::sample::subsequence; -use super::test_data_examples::InvalidCsrDataExamples; +use super::test_data_examples::{InvalidCsDataExamples, ValidCsDataExamples}; use crate::assert_panics; use crate::common::csr_strategy; @@ -175,30 +175,20 @@ fn csr_matrix_valid_data() { #[test] fn csr_matrix_valid_data_unsorted_column_indices() { - let csr = CsrMatrix::try_from_unsorted_csr_data( - 4, - 5, - vec![0, 3, 5, 8, 11], - vec![4, 1, 3, 3, 1, 2, 3, 0, 3, 4, 1], - vec![5, 1, 4, 7, 4, 2, 3, 1, 8, 9, 6], - ) - .unwrap(); + let valid_data: ValidCsDataExamples = ValidCsDataExamples::new(); - let expected_csr = CsrMatrix::try_from_csr_data( - 4, - 5, - vec![0, 3, 5, 8, 11], - vec![1, 3, 4, 1, 3, 0, 2, 3, 1, 3, 4], - vec![1, 4, 5, 4, 7, 1, 2, 3, 6, 8, 9], - ) - .unwrap(); + let (offsets, indices, values) = valid_data.valid_unsorted_cs_data; + let csr = CsrMatrix::try_from_unsorted_csr_data(4, 5, offsets, indices, values).unwrap(); + + let (offsets2, indices2, values2) = valid_data.valid_cs_data; + let expected_csr = CsrMatrix::try_from_csr_data(4, 5, offsets2, indices2, values2).unwrap(); assert_eq!(csr, expected_csr); } #[test] fn csr_matrix_try_from_invalid_csr_data() { - let invalid_data: InvalidCsrDataExamples = InvalidCsrDataExamples::new(); + let invalid_data: InvalidCsDataExamples = InvalidCsDataExamples::new(); { // Empty offset array (invalid length) let (offsets, indices, values) = invalid_data.empty_offset_array; @@ -293,7 +283,7 @@ fn csr_matrix_try_from_invalid_csr_data() { #[test] fn csr_matrix_try_from_unsorted_invalid_csr_data() { - let invalid_data: InvalidCsrDataExamples = InvalidCsrDataExamples::new(); + let invalid_data: InvalidCsDataExamples = InvalidCsDataExamples::new(); { // Empty offset array (invalid length) let (offsets, indices, values) = invalid_data.empty_offset_array; @@ -355,6 +345,16 @@ fn csr_matrix_try_from_unsorted_invalid_csr_data() { ); } + { + // Major offset out of bounds + let (offsets, indices, values) = invalid_data.major_offset_out_of_bounds; + let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::IndexOutOfBounds + ); + } + { // Minor index out of bounds let (offsets, indices, values) = invalid_data.minor_index_out_of_bounds; @@ -374,6 +374,26 @@ fn csr_matrix_try_from_unsorted_invalid_csr_data() { &SparseFormatErrorKind::DuplicateEntry ); } + + { + // Duplicate entry in unsorted lane + let (offsets, indices, values) = invalid_data.duplicate_entry_unsorted; + let matrix = CsrMatrix::try_from_unsorted_csr_data(3, 6, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::DuplicateEntry + ); + } + + { + // Wrong values length + let (offsets, indices, values) = invalid_data.wrong_values_length; + let matrix = CsrMatrix::try_from_unsorted_csr_data(6, 3, offsets, indices, values); + assert_eq!( + matrix.unwrap_err().kind(), + &SparseFormatErrorKind::InvalidStructure + ); + } } #[test] diff --git a/nalgebra-sparse/tests/unit_tests/test_data_examples.rs b/nalgebra-sparse/tests/unit_tests/test_data_examples.rs index 20721087..a80b5064 100644 --- a/nalgebra-sparse/tests/unit_tests/test_data_examples.rs +++ b/nalgebra-sparse/tests/unit_tests/test_data_examples.rs @@ -1,5 +1,31 @@ -/// Examples of *invalid* raw CSR data `(offsets, indices, values)`. -pub struct InvalidCsrDataExamples { +/// Examples of *valid* raw CS data `(offsets, indices, values)`. +pub struct ValidCsDataExamples { + pub valid_cs_data: (Vec, Vec, Vec), + pub valid_unsorted_cs_data: (Vec, Vec, Vec), +} + +impl ValidCsDataExamples { + pub fn new() -> Self { + let valid_cs_data = ( + vec![0, 3, 5, 8, 11], + vec![1, 3, 4, 1, 3, 0, 2, 3, 1, 3, 4], + vec![1, 4, 5, 4, 7, 1, 2, 3, 6, 8, 9], + ); + let valid_unsorted_cs_data = ( + vec![0, 3, 5, 8, 11], + vec![4, 1, 3, 3, 1, 2, 3, 0, 3, 4, 1], + vec![5, 1, 4, 7, 4, 2, 3, 1, 8, 9, 6], + ); + + return Self { + valid_cs_data, + valid_unsorted_cs_data, + }; + } +} + +/// Examples of *invalid* raw CS data `(offsets, indices, values)`. +pub struct InvalidCsDataExamples { pub empty_offset_array: (Vec, Vec, Vec), pub offset_array_invalid_length_for_arbitrary_data: (Vec, Vec, Vec), pub invalid_first_entry_in_offsets_array: (Vec, Vec, Vec), @@ -7,11 +33,14 @@ pub struct InvalidCsrDataExamples { pub invalid_length_of_offsets_array: (Vec, Vec, Vec), pub nonmonotonic_offsets: (Vec, Vec, Vec), pub nonmonotonic_minor_indices: (Vec, Vec, Vec), + pub major_offset_out_of_bounds: (Vec, Vec, Vec), pub minor_index_out_of_bounds: (Vec, Vec, Vec), pub duplicate_entry: (Vec, Vec, Vec), + pub duplicate_entry_unsorted: (Vec, Vec, Vec), + pub wrong_values_length: (Vec, Vec, Vec), } -impl InvalidCsrDataExamples { +impl InvalidCsDataExamples { pub fn new() -> Self { let empty_offset_array = (Vec::::new(), Vec::::new(), Vec::::new()); let offset_array_invalid_length_for_arbitrary_data = @@ -25,9 +54,13 @@ impl InvalidCsrDataExamples { let nonmonotonic_offsets = (vec![0, 3, 2, 5], vec![0, 1, 2, 3, 4], vec![0, 1, 2, 3, 4]); let nonmonotonic_minor_indices = (vec![0, 2, 2, 5], vec![0, 2, 3, 1, 4], vec![0, 1, 2, 3, 4]); + let major_offset_out_of_bounds = + (vec![0, 7, 2, 5], vec![0, 2, 3, 1, 4], vec![0, 1, 2, 3, 4]); let minor_index_out_of_bounds = (vec![0, 2, 2, 5], vec![0, 6, 1, 2, 3], vec![0, 1, 2, 3, 4]); - let duplicate_entry = (vec![0, 2, 2, 5], vec![0, 5, 2, 2, 3], vec![0, 1, 2, 3, 4]); + let duplicate_entry = (vec![0, 1, 2, 5], vec![1, 3, 2, 3, 3], vec![0, 1, 2, 3, 4]); + let duplicate_entry_unsorted = (vec![0, 1, 4, 5], vec![1, 3, 2, 3, 3], vec![0, 1, 2, 3, 4]); + let wrong_values_length = (vec![0, 1, 2, 5], vec![1, 3, 2, 3, 0], vec![5, 4]); return Self { empty_offset_array, @@ -37,8 +70,11 @@ impl InvalidCsrDataExamples { invalid_length_of_offsets_array, nonmonotonic_minor_indices, nonmonotonic_offsets, + major_offset_out_of_bounds, minor_index_out_of_bounds, duplicate_entry, + duplicate_entry_unsorted, + wrong_values_length, }; } } From 325618ba22660b5ee22f308b4f06ac5f0196a9ae Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Wed, 9 Mar 2022 02:13:12 -0500 Subject: [PATCH 2/4] Fix SVD instability bug --- src/linalg/svd.rs | 22 +++++++--------------- tests/linalg/svd.rs | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/linalg/svd.rs b/src/linalg/svd.rs index 3f945a65..42a6abb3 100644 --- a/src/linalg/svd.rs +++ b/src/linalg/svd.rs @@ -2,7 +2,6 @@ use serde::{Deserialize, Serialize}; use std::any::TypeId; -use approx::AbsDiffEq; use num::{One, Zero}; use crate::allocator::Allocator; @@ -94,14 +93,7 @@ where /// The singular values are not guaranteed to be sorted in any particular order. /// If a descending order is required, consider using `new` instead. pub fn new_unordered(matrix: OMatrix, compute_u: bool, compute_v: bool) -> Self { - Self::try_new_unordered( - matrix, - compute_u, - compute_v, - T::RealField::default_epsilon(), - 0, - ) - .unwrap() + Self::try_new_unordered(matrix, compute_u, compute_v, crate::convert(1e-15), 0).unwrap() } /// Attempts to compute the Singular Value Decomposition of `matrix` using implicit shift. @@ -888,13 +880,13 @@ fn compute_2x2_uptrig_svd( v_t = Some(csv.clone()); } - if compute_u { - let cu = (m11.scale(csv.c()) + m12 * csv.s()) / v1.clone(); - let su = (m22 * csv.s()) / v1.clone(); - let (csu, sgn_u) = GivensRotation::new(cu, su); + let cu = (m11.scale(csv.c()) + m12 * csv.s()) / v1.clone(); + let su = (m22 * csv.s()) / v1.clone(); + let (csu, sgn_u) = GivensRotation::new(cu, su); + v1 *= sgn_u.clone(); + v2 *= sgn_u; - v1 *= sgn_u.clone(); - v2 *= sgn_u; + if compute_u { u = Some(csu); } } diff --git a/tests/linalg/svd.rs b/tests/linalg/svd.rs index deb3d38d..7eabe3d0 100644 --- a/tests/linalg/svd.rs +++ b/tests/linalg/svd.rs @@ -460,3 +460,27 @@ fn svd_sorted() { epsilon = 1.0e-5 ); } + +#[test] +// Exercises bug reported in issue #983 of nalgebra +fn svd_consistent() { + let m = nalgebra::dmatrix![ + 10.74785316637712f64, -5.994983325167452, -6.064492921857296; + -4.149751381521569, 20.654504205822462, -4.470436210703133; + -22.772715014220207, -1.4554372570788008, 18.108113992170573 + ] + .transpose(); + let svd1 = m.clone().svd(true, true); + let svd2 = m.clone().svd(false, true); + let svd3 = m.clone().svd(true, false); + let svd4 = m.svd(false, false); + + assert_relative_eq!(svd1.singular_values, svd2.singular_values, epsilon = 1e-5); + assert_relative_eq!(svd1.singular_values, svd3.singular_values, epsilon = 1e-5); + assert_relative_eq!(svd1.singular_values, svd4.singular_values, epsilon = 1e-5); + assert_relative_eq!( + svd1.singular_values, + nalgebra::dvector![3.16188022e+01, 2.23811978e+01, 0.], + epsilon = 1e-5 + ); +} From 1acd48f6f1b684317e7cd893d973ba67ab4f7290 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Wed, 9 Mar 2022 21:04:43 -0500 Subject: [PATCH 3/4] Address review comments --- src/linalg/svd.rs | 10 +++++++++- tests/linalg/svd.rs | 12 ++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/linalg/svd.rs b/src/linalg/svd.rs index 42a6abb3..06bae4a3 100644 --- a/src/linalg/svd.rs +++ b/src/linalg/svd.rs @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize}; use std::any::TypeId; +use approx::AbsDiffEq; use num::{One, Zero}; use crate::allocator::Allocator; @@ -93,7 +94,14 @@ where /// The singular values are not guaranteed to be sorted in any particular order. /// If a descending order is required, consider using `new` instead. pub fn new_unordered(matrix: OMatrix, compute_u: bool, compute_v: bool) -> Self { - Self::try_new_unordered(matrix, compute_u, compute_v, crate::convert(1e-15), 0).unwrap() + Self::try_new_unordered( + matrix, + compute_u, + compute_v, + T::RealField::default_epsilon() * crate::convert(5.0), + 0, + ) + .unwrap() } /// Attempts to compute the Singular Value Decomposition of `matrix` using implicit shift. diff --git a/tests/linalg/svd.rs b/tests/linalg/svd.rs index 7eabe3d0..8e83df81 100644 --- a/tests/linalg/svd.rs +++ b/tests/linalg/svd.rs @@ -462,8 +462,8 @@ fn svd_sorted() { } #[test] -// Exercises bug reported in issue #983 of nalgebra -fn svd_consistent() { +// Exercises bug reported in issue #983 of nalgebra (https://github.com/dimforge/nalgebra/issues/983) +fn svd_regression_issue_983() { let m = nalgebra::dmatrix![ 10.74785316637712f64, -5.994983325167452, -6.064492921857296; -4.149751381521569, 20.654504205822462, -4.470436210703133; @@ -475,12 +475,12 @@ fn svd_consistent() { let svd3 = m.clone().svd(true, false); let svd4 = m.svd(false, false); - assert_relative_eq!(svd1.singular_values, svd2.singular_values, epsilon = 1e-5); - assert_relative_eq!(svd1.singular_values, svd3.singular_values, epsilon = 1e-5); - assert_relative_eq!(svd1.singular_values, svd4.singular_values, epsilon = 1e-5); + assert_relative_eq!(svd1.singular_values, svd2.singular_values, epsilon = 1e-9); + assert_relative_eq!(svd1.singular_values, svd3.singular_values, epsilon = 1e-9); + assert_relative_eq!(svd1.singular_values, svd4.singular_values, epsilon = 1e-9); assert_relative_eq!( svd1.singular_values, nalgebra::dvector![3.16188022e+01, 2.23811978e+01, 0.], - epsilon = 1e-5 + epsilon = 1e-6 ); } From a27d121a7a1783c822ff47c46aee0475bda15852 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Wed, 9 Mar 2022 21:10:45 -0500 Subject: [PATCH 4/4] Add regression test for #1072 --- tests/linalg/svd.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/linalg/svd.rs b/tests/linalg/svd.rs index 8e83df81..900901ad 100644 --- a/tests/linalg/svd.rs +++ b/tests/linalg/svd.rs @@ -484,3 +484,18 @@ fn svd_regression_issue_983() { epsilon = 1e-6 ); } + +#[test] +// Exercises bug reported in issue #1072 of nalgebra (https://github.com/dimforge/nalgebra/issues/1072) +fn svd_regression_issue_1072() { + let x = nalgebra::dmatrix![-6.206610118536945f64, -3.67612186839874; -1.2755730783423473, 6.047238193479124]; + let mut x_svd = x.svd(true, true); + x_svd.singular_values = nalgebra::dvector![1.0, 0.0]; + let y = x_svd.recompose().unwrap(); + let y_svd = y.svd(true, true); + assert_relative_eq!( + y_svd.singular_values, + nalgebra::dvector![1.0, 0.0], + epsilon = 1e-9 + ); +}