From 1dbccfeb7c3a452a78cd973627f793f7caee3f81 Mon Sep 17 00:00:00 2001 From: Andreas Longva Date: Mon, 13 Jul 2020 18:44:40 +0200 Subject: [PATCH] Initial COO implementation --- Cargo.toml | 2 +- nalgebra-sparse/Cargo.toml | 9 ++ nalgebra-sparse/src/coo.rs | 202 ++++++++++++++++++++++++ nalgebra-sparse/src/lib.rs | 56 +++++++ nalgebra-sparse/src/ops.rs | 70 ++++++++ nalgebra-sparse/tests/common/mod.rs | 20 +++ nalgebra-sparse/tests/unit.rs | 5 + nalgebra-sparse/tests/unit_tests/coo.rs | 190 ++++++++++++++++++++++ nalgebra-sparse/tests/unit_tests/mod.rs | 2 + nalgebra-sparse/tests/unit_tests/ops.rs | 28 ++++ 10 files changed, 583 insertions(+), 1 deletion(-) create mode 100644 nalgebra-sparse/Cargo.toml create mode 100644 nalgebra-sparse/src/coo.rs create mode 100644 nalgebra-sparse/src/lib.rs create mode 100644 nalgebra-sparse/src/ops.rs create mode 100644 nalgebra-sparse/tests/common/mod.rs create mode 100644 nalgebra-sparse/tests/unit.rs create mode 100644 nalgebra-sparse/tests/unit_tests/coo.rs create mode 100644 nalgebra-sparse/tests/unit_tests/mod.rs create mode 100644 nalgebra-sparse/tests/unit_tests/ops.rs diff --git a/Cargo.toml b/Cargo.toml index 2662fe2c..9d689dac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,7 +80,7 @@ proptest = { version = "=0.10.1" } itertools = "0.9" [workspace] -members = [ "nalgebra-lapack", "nalgebra-glm" ] +members = [ "nalgebra-lapack", "nalgebra-glm", "nalgebra-sparse" ] [[bench]] name = "nalgebra_bench" diff --git a/nalgebra-sparse/Cargo.toml b/nalgebra-sparse/Cargo.toml new file mode 100644 index 00000000..04b5dfc9 --- /dev/null +++ b/nalgebra-sparse/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "nalgebra-sparse" +version = "0.1.0" +authors = [ "Andreas Longva", "Sébastien Crozet " ] +edition = "2018" + +[dependencies] +nalgebra = { version="0.21", path = "../" } +num-traits = { version = "0.2", default-features = false } \ No newline at end of file diff --git a/nalgebra-sparse/src/coo.rs b/nalgebra-sparse/src/coo.rs new file mode 100644 index 00000000..a1b28233 --- /dev/null +++ b/nalgebra-sparse/src/coo.rs @@ -0,0 +1,202 @@ +use crate::SparseFormatError; +use nalgebra::{ClosedAdd, DMatrix, Scalar}; +use num_traits::Zero; + +/// A COO representation of a sparse matrix. +/// +/// A COO matrix stores entries in coordinate-form, that is triplets `(i, j, v)`, where `i` and `j` +/// correspond to row and column indices of the entry, and `v` to the value of the entry. +/// With the rare exception of matrix-vector multiplication of certain extremely sparse matrices, +/// it is of limited use for standard matrix operations. Its main purpose is to facilitate +/// easy construction of other, more efficient matrix formats (such as CSR/COO), and the +/// conversion between different formats. +/// +/// Representation +/// -------------- +/// +/// For given dimensions `nrows` and `ncols`, the matrix is represented by three same-length +/// arrays `row_indices`, `col_indices` and `values` that constitute the coordinate triplets +/// of the matrix. The indices must be in bounds, but *duplicate entries are explicitly allowed*. +/// Upon conversion to other formats, the duplicate entries may be summed together. See the +/// documentation for the respective conversion functions. +/// +/// Example +/// ------- +/// +/// ```rust +/// # use nalgebra_sparse::CooMatrix; +/// // Create a zero matrix +/// let mut coo = CooMatrix::new(4, 4); +/// // Or initialize it with a set of triplets +/// coo = CooMatrix::try_from_triplets(4, 4, vec![1, 2], vec![0, 1], vec![3.0, 4.0]).unwrap(); +/// +/// // Push a single triplet +/// coo.push(2, 0, 1.0); +/// +/// // TODO: Convert to CSR +/// ``` +#[derive(Debug, Clone)] +pub struct CooMatrix { + nrows: usize, + ncols: usize, + row_indices: Vec, + col_indices: Vec, + values: Vec, +} + +impl CooMatrix +where + T: Scalar, +{ + /// Construct a zero COO matrix of the given dimensions. + /// + /// Specifically, the collection of triplets - corresponding to explicitly stored entries - + /// is empty, so that the matrix (implicitly) represented by the COO matrix consists of all + /// zero entries. + pub fn new(nrows: usize, ncols: usize) -> Self { + Self { + nrows, + ncols, + row_indices: Vec::new(), + col_indices: Vec::new(), + values: Vec::new(), + } + } + + /// Try to construct a COO matrix from the given dimensions and a collection of + /// (i, j, v) triplets. + /// + /// Returns an error if either row or column indices contain indices out of bounds, + /// or if the data arrays do not all have the same length. Note that the COO format + /// inherently supports duplicate entries. + pub fn try_from_triplets( + nrows: usize, + ncols: usize, + row_indices: Vec, + col_indices: Vec, + values: Vec, + ) -> Result { + if row_indices.len() != col_indices.len() { + return Err(SparseFormatError::InvalidStructure( + Box::from("Number of row and col indices must be the same.") + )); + } else if col_indices.len() != values.len() { + return Err(SparseFormatError::InvalidStructure( + Box::from("Number of col indices and values must be the same.") + )); + } + + let row_indices_in_bounds = row_indices.iter().all(|i| *i < nrows); + let col_indices_in_bounds = col_indices.iter().all(|j| *j < ncols); + + if !row_indices_in_bounds { + Err(SparseFormatError::IndexOutOfBounds(Box::from( + "Row index out of bounds.", + ))) + } else if !col_indices_in_bounds { + Err(SparseFormatError::IndexOutOfBounds(Box::from( + "Col index out of bounds.", + ))) + } else { + Ok(Self { + nrows, + ncols, + row_indices, + col_indices, + values, + }) + } + } + + /// An iterator over triplets (i, j, v). + // TODO: Consider giving the iterator a concrete type instead of impl trait...? + pub fn triplet_iter(&self) -> impl Iterator { + self.row_indices + .iter() + .zip(&self.col_indices) + .zip(&self.values) + .map(|((i, j), v)| (*i, *j, v)) + } + + /// Push a single triplet to the matrix. + /// + /// This adds the value `v` to the `i`th row and `j`th column in the matrix. + /// + /// Panics + /// ------ + /// + /// Panics if `i` or `j` is out of bounds. + #[inline(always)] + pub fn push(&mut self, i: usize, j: usize, v: T) { + assert!(i < self.nrows); + assert!(j < self.ncols); + self.row_indices.push(i); + self.col_indices.push(j); + self.values.push(v); + } + + /// The number of rows in the matrix. + #[inline(always)] + pub fn nrows(&self) -> usize { + self.nrows + } + + /// The number of columns in the matrix. + #[inline(always)] + pub fn ncols(&self) -> usize { + self.ncols + } + + /// The row indices of the explicitly stored entries. + pub fn row_indices(&self) -> &[usize] { + &self.row_indices + } + + /// The column indices of the explicitly stored entries. + pub fn col_indices(&self) -> &[usize] { + &self.col_indices + } + + /// The values of the explicitly stored entries. + pub fn values(&self) -> &[T] { + &self.values + } + + /// Disassembles the matrix into individual triplet arrays. + /// + /// Examples + /// -------- + /// + /// ``` + /// # use nalgebra_sparse::CooMatrix; + /// let row_indices = vec![0, 1]; + /// let col_indices = vec![1, 2]; + /// let values = vec![1.0, 2.0]; + /// let coo = CooMatrix::try_from_triplets(2, 3, row_indices, col_indices, values) + /// .unwrap(); + /// + /// let (row_idx, col_idx, val) = coo.disassemble(); + /// assert_eq!(row_idx, vec![0, 1]); + /// assert_eq!(col_idx, vec![1, 2]); + /// assert_eq!(val, vec![1.0, 2.0]); + /// ``` + pub fn disassemble(self) -> (Vec, Vec, Vec) { + (self.row_indices, self.col_indices, self.values) + } + + /// Construct the dense representation of the COO matrix. + /// + /// Duplicate entries are summed together. + pub fn to_dense(&self) -> DMatrix + where + T: ClosedAdd + Zero, + { + let mut result = DMatrix::zeros(self.nrows, self.ncols); + + for (i, j, v) in self.triplet_iter() { + result[(i, j)] += v.clone(); + } + + result + } +} diff --git a/nalgebra-sparse/src/lib.rs b/nalgebra-sparse/src/lib.rs new file mode 100644 index 00000000..9967e94a --- /dev/null +++ b/nalgebra-sparse/src/lib.rs @@ -0,0 +1,56 @@ +mod coo; +mod csr; +mod pattern; + +pub mod ops; + +pub use coo::CooMatrix; +pub use csr::CsrMatrix; +pub use pattern::{SparsityPattern}; + +/// Iterator types for matrices. +/// +/// Most users will not need to interface with these types directly. Instead, refer to the +/// iterator methods for the respective matrix formats. +pub mod iter { + // Iterators are best implemented in the same modules as the matrices they iterate over, + // since they are so closely tied to their respective implementations. However, + // in the crate's public API we move them into a separate `iter` module in order to avoid + // cluttering the docs with iterator types that most users will never need to explicitly + // know about. + pub use crate::pattern::SparsityPatternIter; + pub use crate::csr::{CsrTripletIter, CsrTripletIterMut}; +} + +use std::error::Error; +use std::fmt; + +#[derive(Debug)] +#[non_exhaustive] +pub enum SparseFormatError { + /// Indicates that the index data associated with the format contains at least one index + /// out of bounds. + IndexOutOfBounds(Box), + + /// Indicates that the provided data contains at least one duplicate entry, and the + /// current format does not support duplicate entries. + DuplicateEntry(Box), + + /// Indicates that the provided data for the format does not conform to the high-level + /// structure of the format. + /// + /// For example, the arrays defining the format data might have incompatible sizes. + InvalidStructure(Box), +} + +impl fmt::Display for SparseFormatError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::IndexOutOfBounds(err) => err.fmt(f), + Self::DuplicateEntry(err) => err.fmt(f), + Self::InvalidStructure(err) => err.fmt(f) + } + } +} + +impl Error for SparseFormatError {} diff --git a/nalgebra-sparse/src/ops.rs b/nalgebra-sparse/src/ops.rs new file mode 100644 index 00000000..b7b2c47b --- /dev/null +++ b/nalgebra-sparse/src/ops.rs @@ -0,0 +1,70 @@ +//! Matrix operations involving sparse matrices. + +use crate::CooMatrix; +use nalgebra::base::storage::{Storage, StorageMut}; +use nalgebra::{ClosedAdd, ClosedMul, Dim, Scalar, Vector}; +use num_traits::{One, Zero}; + +/// Sparse matrix-vector multiplication `y = beta * y + alpha * A * x`. +/// +/// Computes a matrix-vector product with the COO matrix "A" and the vector `x`, storing the +/// result in `y`. +/// +/// If `beta == 0`, the elements in `y` are never read. +/// +/// Panics +/// ------ +/// +/// Panics if `y`, `a` and `x` do not have compatible dimensions. +pub fn spmv_coo( + beta: T, + y: &mut Vector, + alpha: T, + a: &CooMatrix, + x: &Vector, +) where + T: Scalar + ClosedAdd + ClosedMul + Zero + One, + YDim: Dim, + XDim: Dim, + Y: StorageMut, + X: Storage, +{ + assert_eq!( + y.len(), + a.nrows(), + "y and a must be dimensionally compatible" + ); + assert_eq!( + a.ncols(), + x.len(), + "a and x must be dimensionally compatible" + ); + + if beta == T::zero() { + // If `y` is constructed through `new_uninitialized()`, we must make sure to not read + // any of the elements in order to avoid UB, so we special case beta == 0 + // in order to ensure that we only write, not read, the elements in y. + for y_i in y.iter_mut() { + *y_i = T::zero(); + } + } else if beta != T::one() { + // Since the COO triplets have no particular structure, we cannot combine initialization + // of y with the triplet loop below, and instead have to do it in a pre-pass. + for y_i in y.iter_mut() { + *y_i *= beta.inlined_clone(); + } + } + + for (i, j, v) in a.triplet_iter() { + // TODO: We could skip bounds checks with unsafe here, since COO ensures that all indices + // are in bounds and we assert on dimensions up-front. + // The compiler will not be able to elide the checks, since we're doing + // random/unpredictable access to elements in `x` and `y`. + let (alpha, v, x_j) = ( + alpha.inlined_clone(), + v.inlined_clone(), + x[j].inlined_clone(), + ); + y[i] += alpha * v * x_j; + } +} diff --git a/nalgebra-sparse/tests/common/mod.rs b/nalgebra-sparse/tests/common/mod.rs new file mode 100644 index 00000000..bb77a10a --- /dev/null +++ b/nalgebra-sparse/tests/common/mod.rs @@ -0,0 +1,20 @@ +#[macro_export] +macro_rules! assert_panics { + ($e:expr) => {{ + use std::panic::{catch_unwind}; + use std::stringify; + let expr_string = stringify!($e); + + // Note: We cannot manipulate the panic hook here, because it is global and the test + // suite is run in parallel, which leads to race conditions in the sense + // that some regular tests that panic might not output anything anymore. + // Unfortunately this means that output is still printed to stdout if + // we run cargo test -- --nocapture. But Cargo does not forward this if the test + // binary is not run with nocapture, so it is somewhat acceptable nonetheless. + + let result = catch_unwind(|| $e); + if result.is_ok() { + panic!("assert_panics!({}) failed: the expression did not panic.", expr_string); + } + }}; +} \ No newline at end of file diff --git a/nalgebra-sparse/tests/unit.rs b/nalgebra-sparse/tests/unit.rs new file mode 100644 index 00000000..ee44e73b --- /dev/null +++ b/nalgebra-sparse/tests/unit.rs @@ -0,0 +1,5 @@ +//! Unit tests +mod unit_tests; + +#[macro_use] +pub mod common; \ No newline at end of file diff --git a/nalgebra-sparse/tests/unit_tests/coo.rs b/nalgebra-sparse/tests/unit_tests/coo.rs new file mode 100644 index 00000000..b7504d51 --- /dev/null +++ b/nalgebra-sparse/tests/unit_tests/coo.rs @@ -0,0 +1,190 @@ +use nalgebra_sparse::{CooMatrix, SparsePatternError}; +use nalgebra::DMatrix; +use crate::assert_panics; + +#[test] +fn coo_construction_for_valid_data() { + // Test that construction with try_from_triplets succeeds, that the state of the + // matrix afterwards is as expected, and that the dense representation matches expectations. + + { + // Zero matrix + let coo = CooMatrix::::try_from_triplets(3, 2, Vec::new(), Vec::new(), Vec::new()) + .unwrap(); + assert_eq!(coo.nrows(), 3); + assert_eq!(coo.ncols(), 2); + assert!(coo.triplet_iter().next().is_none()); + assert!(coo.row_indices().is_empty()); + assert!(coo.col_indices().is_empty()); + assert!(coo.values().is_empty()); + + assert_eq!(coo.to_dense(), DMatrix::repeat(3, 2, 0)); + } + + { + // Arbitrary matrix, no duplicates + let i = vec![0, 1, 0, 0, 2]; + let j = vec![0, 2, 1, 3, 3]; + let v = vec![2, 3, 7, 3, 1]; + let coo = CooMatrix::::try_from_triplets(3, 5, i.clone(), j.clone(), v.clone()) + .unwrap(); + assert_eq!(coo.nrows(), 3); + assert_eq!(coo.ncols(), 5); + + assert_eq!(i.as_slice(), coo.row_indices()); + assert_eq!(j.as_slice(), coo.col_indices()); + assert_eq!(v.as_slice(), coo.values()); + + let expected_triplets: Vec<_> = i + .iter() + .zip(&j) + .zip(&v) + .map(|((i, j), v)| (*i, *j, *v)) + .collect(); + let actual_triplets: Vec<_> = coo.triplet_iter().map(|(i, j, v)| (i, j, *v)).collect(); + assert_eq!(actual_triplets, expected_triplets); + + #[rustfmt::skip] + let expected_dense = DMatrix::from_row_slice(3, 5, &[ + 2, 7, 0, 3, 0, + 0, 0, 3, 0, 0, + 0, 0, 0, 1, 0 + ]); + assert_eq!(coo.to_dense(), expected_dense); + } + + { + // Arbitrary matrix, with duplicates + let i = vec![0, 1, 0, 0, 0, 0, 2, 1]; + let j = vec![0, 2, 0, 1, 0, 3, 3, 2]; + let v = vec![2, 3, 4, 7, 1, 3, 1, 5]; + let coo = CooMatrix::::try_from_triplets(3, 5, i.clone(), j.clone(), v.clone()) + .unwrap(); + assert_eq!(coo.nrows(), 3); + assert_eq!(coo.ncols(), 5); + + assert_eq!(i.as_slice(), coo.row_indices()); + assert_eq!(j.as_slice(), coo.col_indices()); + assert_eq!(v.as_slice(), coo.values()); + + let expected_triplets: Vec<_> = i + .iter() + .zip(&j) + .zip(&v) + .map(|((i, j), v)| (*i, *j, *v)) + .collect(); + let actual_triplets: Vec<_> = coo.triplet_iter().map(|(i, j, v)| (i, j, *v)).collect(); + assert_eq!(actual_triplets, expected_triplets); + + #[rustfmt::skip] + let expected_dense = DMatrix::from_row_slice(3, 5, &[ + 7, 7, 0, 3, 0, + 0, 0, 8, 0, 0, + 0, 0, 0, 1, 0 + ]); + assert_eq!(coo.to_dense(), expected_dense); + } +} + +#[test] +fn coo_try_from_triplets_reports_out_of_bounds_indices() { + { + // 0x0 matrix + let result = CooMatrix::::try_from_triplets(0, 0, vec![0], vec![0], vec![2]); + assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_)))); + } + + { + // 1x1 matrix, row out of bounds + let result = CooMatrix::::try_from_triplets(1, 1, vec![1], vec![0], vec![2]); + assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_)))); + } + + { + // 1x1 matrix, col out of bounds + let result = CooMatrix::::try_from_triplets(1, 1, vec![0], vec![1], vec![2]); + assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_)))); + } + + { + // 1x1 matrix, row and col out of bounds + let result = CooMatrix::::try_from_triplets(1, 1, vec![1], vec![1], vec![2]); + assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_)))); + } + + { + // Arbitrary matrix, row out of bounds + let i = vec![0, 1, 0, 3, 2]; + let j = vec![0, 2, 1, 3, 3]; + let v = vec![2, 3, 7, 3, 1]; + let result = CooMatrix::::try_from_triplets(3, 5, i, j, v); + assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_)))); + } + + { + // Arbitrary matrix, col out of bounds + let i = vec![0, 1, 0, 0, 2]; + let j = vec![0, 2, 1, 5, 3]; + let v = vec![2, 3, 7, 3, 1]; + let result = CooMatrix::::try_from_triplets(3, 5, i, j, v); + assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_)))); + } +} + +#[test] +fn coo_try_from_triplets_panics_on_mismatched_vectors() { + // Check that try_from_triplets panics when the triplet vectors have different lengths + macro_rules! assert_errs { + ($result:expr) => { + assert!(matches!($result, Err(SparseFormatError::InvalidStructure(_)))) + } + } + + assert_errs!(CooMatrix::::try_from_triplets(3, 5, vec![1, 2], vec![0], vec![0])); + assert_errs!(CooMatrix::::try_from_triplets(3, 5, vec![1], vec![0, 0], vec![0])); + assert_errs!(CooMatrix::::try_from_triplets(3, 5, vec![1], vec![0], vec![0, 1])); + assert_errs!(CooMatrix::::try_from_triplets(3, 5, vec![1, 2], vec![0, 1], vec![0])); + assert_errs!(CooMatrix::::try_from_triplets(3, 5, vec![1], vec![0, 1], vec![0, 1])); + assert_errs!(CooMatrix::::try_from_triplets(3, 5, vec![1, 1], vec![0], vec![0, 1])); +} + +#[test] +fn coo_push_valid_entries() { + let mut coo = CooMatrix::new(3, 3); + + coo.push(0, 0, 1); + assert_eq!(coo.triplet_iter().collect::>(), vec![(0, 0, &1)]); + + coo.push(0, 0, 2); + assert_eq!(coo.triplet_iter().collect::>(), vec![(0, 0, &1), (0, 0, &2)]); + + coo.push(2, 2, 3); + assert_eq!(coo.triplet_iter().collect::>(), vec![(0, 0, &1), (0, 0, &2), (2, 2, &3)]); +} + +#[test] +fn coo_push_out_of_bounds_entries() { + { + // 0x0 matrix + let coo = CooMatrix::new(0, 0); + assert_panics!(coo.clone().push(0, 0, 1)); + } + + { + // 0x1 matrix + assert_panics!(CooMatrix::new(0, 1).push(0, 0, 1)); + } + + { + // 1x0 matrix + assert_panics!(CooMatrix::new(1, 0).push(0, 0, 1)); + } + + { + // Arbitrary matrix dimensions + let coo = CooMatrix::new(3, 2); + assert_panics!(coo.clone().push(3, 0, 1)); + assert_panics!(coo.clone().push(2, 2, 1)); + assert_panics!(coo.clone().push(3, 2, 1)); + } +} \ No newline at end of file diff --git a/nalgebra-sparse/tests/unit_tests/mod.rs b/nalgebra-sparse/tests/unit_tests/mod.rs new file mode 100644 index 00000000..0c1631c4 --- /dev/null +++ b/nalgebra-sparse/tests/unit_tests/mod.rs @@ -0,0 +1,2 @@ +mod coo; +mod ops; \ No newline at end of file diff --git a/nalgebra-sparse/tests/unit_tests/ops.rs b/nalgebra-sparse/tests/unit_tests/ops.rs new file mode 100644 index 00000000..c4b86c38 --- /dev/null +++ b/nalgebra-sparse/tests/unit_tests/ops.rs @@ -0,0 +1,28 @@ +use nalgebra_sparse::CooMatrix; +use nalgebra_sparse::ops::spmv_coo; +use nalgebra::DVector; + +#[test] +fn spmv_coo_agrees_with_dense_gemv() { + let x = DVector::from_column_slice(&[2, 3, 4, 5]); + + let i = vec![0, 0, 1, 1, 2, 2]; + let j = vec![0, 3, 0, 1, 1, 3]; + let v = vec![3, 2, 1, 2, 3, 1]; + let a = CooMatrix::try_from_triplets(3, 4, i, j, v).unwrap(); + + let betas = [0, 1, 2]; + let alphas = [0, 1, 2]; + + for &beta in &betas { + for &alpha in &alphas { + let mut y = DVector::from_column_slice(&[2, 5, 3]); + let mut y_dense = y.clone(); + spmv_coo(beta, &mut y, alpha, &a, &x); + + y_dense.gemv(alpha, &a.to_dense(), &x, beta); + + assert_eq!(y, y_dense); + } + } +} \ No newline at end of file