Initial COO implementation

2020-07-13 18:44:40 +02:00 · 2020-07-13 18:44:40 +02:00 · 1dbccfeb7c
parent 646f62a293
commit 1dbccfeb7c
10 changed files with 583 additions and 1 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -80,7 +80,7 @@ proptest = { version = "=0.10.1" }
 itertools = "0.9"

 [workspace]
-members = [ "nalgebra-lapack", "nalgebra-glm" ]
+members = [ "nalgebra-lapack", "nalgebra-glm", "nalgebra-sparse" ]

 [[bench]]
 name = "nalgebra_bench"
--- a/nalgebra-sparse/Cargo.toml
+++ b/nalgebra-sparse/Cargo.toml
@ -0,0 +1,9 @@
+[package]
+name = "nalgebra-sparse"
+version = "0.1.0"
+authors = [ "Andreas Longva", "Sébastien Crozet <developer@crozet.re>" ]
+edition = "2018"
+
+[dependencies]
+nalgebra = { version="0.21", path = "../" }
+num-traits = { version = "0.2", default-features = false }
--- a/nalgebra-sparse/src/coo.rs
+++ b/nalgebra-sparse/src/coo.rs
@ -0,0 +1,202 @@
+use crate::SparseFormatError;
+use nalgebra::{ClosedAdd, DMatrix, Scalar};
+use num_traits::Zero;
+
+/// A COO representation of a sparse matrix.
+///
+/// A COO matrix stores entries in coordinate-form, that is triplets `(i, j, v)`, where `i` and `j`
+/// correspond to row and column indices of the entry, and `v` to the value of the entry.
+/// With the rare exception of matrix-vector multiplication of certain extremely sparse matrices,
+/// it is of limited use for standard matrix operations. Its main purpose is to facilitate
+/// easy construction of other, more efficient matrix formats (such as CSR/COO), and the
+/// conversion between different formats.
+///
+/// Representation
+/// --------------
+///
+/// For given dimensions `nrows` and `ncols`, the matrix is represented by three same-length
+/// arrays `row_indices`, `col_indices` and `values` that constitute the coordinate triplets
+/// of the matrix. The indices must be in bounds, but *duplicate entries are explicitly allowed*.
+/// Upon conversion to other formats, the duplicate entries may be summed together. See the
+/// documentation for the respective conversion functions.
+///
+/// Example
+/// -------
+///
+/// ```rust
+/// # use nalgebra_sparse::CooMatrix;
+/// // Create a zero matrix
+/// let mut coo = CooMatrix::new(4, 4);
+/// // Or initialize it with a set of triplets
+/// coo = CooMatrix::try_from_triplets(4, 4, vec![1, 2], vec![0, 1], vec![3.0, 4.0]).unwrap();
+///
+/// // Push a single triplet
+/// coo.push(2, 0, 1.0);
+///
+/// // TODO: Convert to CSR
+/// ```
+#[derive(Debug, Clone)]
+pub struct CooMatrix<T> {
+    nrows: usize,
+    ncols: usize,
+    row_indices: Vec<usize>,
+    col_indices: Vec<usize>,
+    values: Vec<T>,
+}
+
+impl<T> CooMatrix<T>
+where
+    T: Scalar,
+{
+    /// Construct a zero COO matrix of the given dimensions.
+    ///
+    /// Specifically, the collection of triplets - corresponding to explicitly stored entries -
+    /// is empty, so that the matrix (implicitly) represented by the COO matrix consists of all
+    /// zero entries.
+    pub fn new(nrows: usize, ncols: usize) -> Self {
+        Self {
+            nrows,
+            ncols,
+            row_indices: Vec::new(),
+            col_indices: Vec::new(),
+            values: Vec::new(),
+        }
+    }
+
+    /// Try to construct a COO matrix from the given dimensions and a collection of
+    /// (i, j, v) triplets.
+    ///
+    /// Returns an error if either row or column indices contain indices out of bounds,
+    /// or if the data arrays do not all have the same length. Note that the COO format
+    /// inherently supports duplicate entries.
+    pub fn try_from_triplets(
+        nrows: usize,
+        ncols: usize,
+        row_indices: Vec<usize>,
+        col_indices: Vec<usize>,
+        values: Vec<T>,
+    ) -> Result<Self, SparseFormatError> {
+        if row_indices.len() != col_indices.len() {
+            return Err(SparseFormatError::InvalidStructure(
+                Box::from("Number of row and col indices must be the same.")
+            ));
+        } else if col_indices.len() != values.len() {
+            return Err(SparseFormatError::InvalidStructure(
+                Box::from("Number of col indices and values must be the same.")
+            ));
+        }
+
+        let row_indices_in_bounds = row_indices.iter().all(|i| *i < nrows);
+        let col_indices_in_bounds = col_indices.iter().all(|j| *j < ncols);
+
+        if !row_indices_in_bounds {
+            Err(SparseFormatError::IndexOutOfBounds(Box::from(
+                "Row index out of bounds.",
+            )))
+        } else if !col_indices_in_bounds {
+            Err(SparseFormatError::IndexOutOfBounds(Box::from(
+                "Col index out of bounds.",
+            )))
+        } else {
+            Ok(Self {
+                nrows,
+                ncols,
+                row_indices,
+                col_indices,
+                values,
+            })
+        }
+    }
+
+    /// An iterator over triplets (i, j, v).
+    // TODO: Consider giving the iterator a concrete type instead of impl trait...?
+    pub fn triplet_iter(&self) -> impl Iterator<Item = (usize, usize, &T)> {
+        self.row_indices
+            .iter()
+            .zip(&self.col_indices)
+            .zip(&self.values)
+            .map(|((i, j), v)| (*i, *j, v))
+    }
+
+    /// Push a single triplet to the matrix.
+    ///
+    /// This adds the value `v` to the `i`th row and `j`th column in the matrix.
+    ///
+    /// Panics
+    /// ------
+    ///
+    /// Panics if `i` or `j` is out of bounds.
+    #[inline(always)]
+    pub fn push(&mut self, i: usize, j: usize, v: T) {
+        assert!(i < self.nrows);
+        assert!(j < self.ncols);
+        self.row_indices.push(i);
+        self.col_indices.push(j);
+        self.values.push(v);
+    }
+
+    /// The number of rows in the matrix.
+    #[inline(always)]
+    pub fn nrows(&self) -> usize {
+        self.nrows
+    }
+
+    /// The number of columns in the matrix.
+    #[inline(always)]
+    pub fn ncols(&self) -> usize {
+        self.ncols
+    }
+
+    /// The row indices of the explicitly stored entries.
+    pub fn row_indices(&self) -> &[usize] {
+        &self.row_indices
+    }
+
+    /// The column indices of the explicitly stored entries.
+    pub fn col_indices(&self) -> &[usize] {
+        &self.col_indices
+    }
+
+    /// The values of the explicitly stored entries.
+    pub fn values(&self) -> &[T] {
+        &self.values
+    }
+
+    /// Disassembles the matrix into individual triplet arrays.
+    ///
+    /// Examples
+    /// --------
+    ///
+    /// ```
+    /// # use nalgebra_sparse::CooMatrix;
+    /// let row_indices = vec![0, 1];
+    /// let col_indices = vec![1, 2];
+    /// let values = vec![1.0, 2.0];
+    /// let coo = CooMatrix::try_from_triplets(2, 3, row_indices, col_indices, values)
+    ///     .unwrap();
+    ///
+    /// let (row_idx, col_idx, val) = coo.disassemble();
+    /// assert_eq!(row_idx, vec![0, 1]);
+    /// assert_eq!(col_idx, vec![1, 2]);
+    /// assert_eq!(val, vec![1.0, 2.0]);
+    /// ```
+    pub fn disassemble(self) -> (Vec<usize>, Vec<usize>, Vec<T>) {
+        (self.row_indices, self.col_indices, self.values)
+    }
+
+    /// Construct the dense representation of the COO matrix.
+    ///
+    /// Duplicate entries are summed together.
+    pub fn to_dense(&self) -> DMatrix<T>
+    where
+        T: ClosedAdd + Zero,
+    {
+        let mut result = DMatrix::zeros(self.nrows, self.ncols);
+
+        for (i, j, v) in self.triplet_iter() {
+            result[(i, j)] += v.clone();
+        }
+
+        result
+    }
+}
--- a/nalgebra-sparse/src/lib.rs
+++ b/nalgebra-sparse/src/lib.rs
@ -0,0 +1,56 @@
+mod coo;
+mod csr;
+mod pattern;
+
+pub mod ops;
+
+pub use coo::CooMatrix;
+pub use csr::CsrMatrix;
+pub use pattern::{SparsityPattern};
+
+/// Iterator types for matrices.
+///
+/// Most users will not need to interface with these types directly. Instead, refer to the
+/// iterator methods for the respective matrix formats.
+pub mod iter {
+    // Iterators are best implemented in the same modules as the matrices they iterate over,
+    // since they are so closely tied to their respective implementations. However,
+    // in the crate's public API we move them into a separate `iter` module in order to avoid
+    // cluttering the docs with iterator types that most users will never need to explicitly
+    // know about.
+    pub use crate::pattern::SparsityPatternIter;
+    pub use crate::csr::{CsrTripletIter, CsrTripletIterMut};
+}
+
+use std::error::Error;
+use std::fmt;
+
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum SparseFormatError {
+    /// Indicates that the index data associated with the format contains at least one index
+    /// out of bounds.
+    IndexOutOfBounds(Box<dyn Error>),
+
+    /// Indicates that the provided data contains at least one duplicate entry, and the
+    /// current format does not support duplicate entries.
+    DuplicateEntry(Box<dyn Error>),
+
+    /// Indicates that the provided data for the format does not conform to the high-level
+    /// structure of the format.
+    ///
+    /// For example, the arrays defining the format data might have incompatible sizes.
+    InvalidStructure(Box<dyn Error>),
+}
+
+impl fmt::Display for SparseFormatError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::IndexOutOfBounds(err) => err.fmt(f),
+            Self::DuplicateEntry(err) => err.fmt(f),
+            Self::InvalidStructure(err) => err.fmt(f)
+        }
+    }
+}
+
+impl Error for SparseFormatError {}
--- a/nalgebra-sparse/src/ops.rs
+++ b/nalgebra-sparse/src/ops.rs
@ -0,0 +1,70 @@
+//! Matrix operations involving sparse matrices.
+
+use crate::CooMatrix;
+use nalgebra::base::storage::{Storage, StorageMut};
+use nalgebra::{ClosedAdd, ClosedMul, Dim, Scalar, Vector};
+use num_traits::{One, Zero};
+
+/// Sparse matrix-vector multiplication `y = beta * y + alpha * A * x`.
+///
+/// Computes a matrix-vector product with the COO matrix "A" and the vector `x`, storing the
+/// result in `y`.
+///
+/// If `beta == 0`, the elements in `y` are never read.
+///
+/// Panics
+/// ------
+///
+/// Panics if `y`, `a` and `x` do not have compatible dimensions.
+pub fn spmv_coo<T, Y, X, YDim, XDim>(
+    beta: T,
+    y: &mut Vector<T, YDim, Y>,
+    alpha: T,
+    a: &CooMatrix<T>,
+    x: &Vector<T, XDim, X>,
+) where
+    T: Scalar + ClosedAdd + ClosedMul + Zero + One,
+    YDim: Dim,
+    XDim: Dim,
+    Y: StorageMut<T, YDim>,
+    X: Storage<T, XDim>,
+{
+    assert_eq!(
+        y.len(),
+        a.nrows(),
+        "y and a must be dimensionally compatible"
+    );
+    assert_eq!(
+        a.ncols(),
+        x.len(),
+        "a and x must be dimensionally compatible"
+    );
+
+    if beta == T::zero() {
+        // If `y` is constructed through `new_uninitialized()`, we must make sure to not read
+        // any of the elements in order to avoid UB, so we special case beta == 0
+        // in order to ensure that we only write, not read, the elements in y.
+        for y_i in y.iter_mut() {
+            *y_i = T::zero();
+        }
+    } else if beta != T::one() {
+        // Since the COO triplets have no particular structure, we cannot combine initialization
+        // of y with the triplet loop below, and instead have to do it in a pre-pass.
+        for y_i in y.iter_mut() {
+            *y_i *= beta.inlined_clone();
+        }
+    }
+
+    for (i, j, v) in a.triplet_iter() {
+        // TODO: We could skip bounds checks with unsafe here, since COO ensures that all indices
+        // are in bounds and we assert on dimensions up-front.
+        // The compiler will not be able to elide the checks, since we're doing
+        // random/unpredictable access to elements in `x` and `y`.
+        let (alpha, v, x_j) = (
+            alpha.inlined_clone(),
+            v.inlined_clone(),
+            x[j].inlined_clone(),
+        );
+        y[i] += alpha * v * x_j;
+    }
+}
--- a/nalgebra-sparse/tests/common/mod.rs
+++ b/nalgebra-sparse/tests/common/mod.rs
@ -0,0 +1,20 @@
+#[macro_export]
+macro_rules! assert_panics {
+    ($e:expr) => {{
+        use std::panic::{catch_unwind};
+        use std::stringify;
+        let expr_string = stringify!($e);
+
+        // Note: We cannot manipulate the panic hook here, because it is global and the test
+        // suite is run in parallel, which leads to race conditions in the sense
+        // that some regular tests that panic might not output anything anymore.
+        // Unfortunately this means that output is still printed to stdout if
+        // we run cargo test -- --nocapture. But Cargo does not forward this if the test
+        // binary is not run with nocapture, so it is somewhat acceptable nonetheless.
+
+        let result = catch_unwind(|| $e);
+        if result.is_ok() {
+            panic!("assert_panics!({}) failed: the expression did not panic.", expr_string);
+        }
+    }};
+}
--- a/nalgebra-sparse/tests/unit.rs
+++ b/nalgebra-sparse/tests/unit.rs
@ -0,0 +1,5 @@
+//! Unit tests
+mod unit_tests;
+
+#[macro_use]
+pub mod common;
--- a/nalgebra-sparse/tests/unit_tests/coo.rs
+++ b/nalgebra-sparse/tests/unit_tests/coo.rs
@ -0,0 +1,190 @@
+use nalgebra_sparse::{CooMatrix, SparsePatternError};
+use nalgebra::DMatrix;
+use crate::assert_panics;
+
+#[test]
+fn coo_construction_for_valid_data() {
+    // Test that construction with try_from_triplets succeeds, that the state of the
+    // matrix afterwards is as expected, and that the dense representation matches expectations.
+
+    {
+        // Zero matrix
+        let coo = CooMatrix::<i32>::try_from_triplets(3, 2, Vec::new(), Vec::new(), Vec::new())
+            .unwrap();
+        assert_eq!(coo.nrows(), 3);
+        assert_eq!(coo.ncols(), 2);
+        assert!(coo.triplet_iter().next().is_none());
+        assert!(coo.row_indices().is_empty());
+        assert!(coo.col_indices().is_empty());
+        assert!(coo.values().is_empty());
+
+        assert_eq!(coo.to_dense(), DMatrix::repeat(3, 2, 0));
+    }
+
+    {
+        // Arbitrary matrix, no duplicates
+        let i = vec![0, 1, 0, 0, 2];
+        let j = vec![0, 2, 1, 3, 3];
+        let v = vec![2, 3, 7, 3, 1];
+        let coo = CooMatrix::<i32>::try_from_triplets(3, 5, i.clone(), j.clone(), v.clone())
+            .unwrap();
+        assert_eq!(coo.nrows(), 3);
+        assert_eq!(coo.ncols(), 5);
+
+        assert_eq!(i.as_slice(), coo.row_indices());
+        assert_eq!(j.as_slice(), coo.col_indices());
+        assert_eq!(v.as_slice(), coo.values());
+
+        let expected_triplets: Vec<_> = i
+            .iter()
+            .zip(&j)
+            .zip(&v)
+            .map(|((i, j), v)| (*i, *j, *v))
+            .collect();
+        let actual_triplets: Vec<_> = coo.triplet_iter().map(|(i, j, v)| (i, j, *v)).collect();
+        assert_eq!(actual_triplets, expected_triplets);
+
+        #[rustfmt::skip]
+        let expected_dense = DMatrix::from_row_slice(3, 5, &[
+            2, 7, 0, 3, 0,
+            0, 0, 3, 0, 0,
+            0, 0, 0, 1, 0
+        ]);
+        assert_eq!(coo.to_dense(), expected_dense);
+    }
+
+    {
+        // Arbitrary matrix, with duplicates
+        let i = vec![0, 1, 0, 0, 0, 0, 2, 1];
+        let j = vec![0, 2, 0, 1, 0, 3, 3, 2];
+        let v = vec![2, 3, 4, 7, 1, 3, 1, 5];
+        let coo = CooMatrix::<i32>::try_from_triplets(3, 5, i.clone(), j.clone(), v.clone())
+            .unwrap();
+        assert_eq!(coo.nrows(), 3);
+        assert_eq!(coo.ncols(), 5);
+
+        assert_eq!(i.as_slice(), coo.row_indices());
+        assert_eq!(j.as_slice(), coo.col_indices());
+        assert_eq!(v.as_slice(), coo.values());
+
+        let expected_triplets: Vec<_> = i
+            .iter()
+            .zip(&j)
+            .zip(&v)
+            .map(|((i, j), v)| (*i, *j, *v))
+            .collect();
+        let actual_triplets: Vec<_> = coo.triplet_iter().map(|(i, j, v)| (i, j, *v)).collect();
+        assert_eq!(actual_triplets, expected_triplets);
+
+        #[rustfmt::skip]
+            let expected_dense = DMatrix::from_row_slice(3, 5, &[
+            7, 7, 0, 3, 0,
+            0, 0, 8, 0, 0,
+            0, 0, 0, 1, 0
+        ]);
+        assert_eq!(coo.to_dense(), expected_dense);
+    }
+}
+
+#[test]
+fn coo_try_from_triplets_reports_out_of_bounds_indices() {
+    {
+        // 0x0 matrix
+        let result = CooMatrix::<i32>::try_from_triplets(0, 0, vec![0], vec![0], vec![2]);
+        assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_))));
+    }
+
+    {
+        // 1x1 matrix, row out of bounds
+        let result = CooMatrix::<i32>::try_from_triplets(1, 1, vec![1], vec![0], vec![2]);
+        assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_))));
+    }
+
+    {
+        // 1x1 matrix, col out of bounds
+        let result = CooMatrix::<i32>::try_from_triplets(1, 1, vec![0], vec![1], vec![2]);
+        assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_))));
+    }
+
+    {
+        // 1x1 matrix, row and col out of bounds
+        let result = CooMatrix::<i32>::try_from_triplets(1, 1, vec![1], vec![1], vec![2]);
+        assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_))));
+    }
+
+    {
+        // Arbitrary matrix, row out of bounds
+        let i = vec![0, 1, 0, 3, 2];
+        let j = vec![0, 2, 1, 3, 3];
+        let v = vec![2, 3, 7, 3, 1];
+        let result = CooMatrix::<i32>::try_from_triplets(3, 5, i, j, v);
+        assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_))));
+    }
+
+    {
+        // Arbitrary matrix, col out of bounds
+        let i = vec![0, 1, 0, 0, 2];
+        let j = vec![0, 2, 1, 5, 3];
+        let v = vec![2, 3, 7, 3, 1];
+        let result = CooMatrix::<i32>::try_from_triplets(3, 5, i, j, v);
+        assert!(matches!(result, Err(SparsePatternError::IndexOutOfBounds(_))));
+    }
+}
+
+#[test]
+fn coo_try_from_triplets_panics_on_mismatched_vectors() {
+    // Check that try_from_triplets panics when the triplet vectors have different lengths
+    macro_rules! assert_errs {
+        ($result:expr) => {
+            assert!(matches!($result, Err(SparseFormatError::InvalidStructure(_))))
+        }
+    }
+
+    assert_errs!(CooMatrix::<i32>::try_from_triplets(3, 5, vec![1, 2], vec![0], vec![0]));
+    assert_errs!(CooMatrix::<i32>::try_from_triplets(3, 5, vec![1], vec![0, 0], vec![0]));
+    assert_errs!(CooMatrix::<i32>::try_from_triplets(3, 5, vec![1], vec![0], vec![0, 1]));
+    assert_errs!(CooMatrix::<i32>::try_from_triplets(3, 5, vec![1, 2], vec![0, 1], vec![0]));
+    assert_errs!(CooMatrix::<i32>::try_from_triplets(3, 5, vec![1], vec![0, 1], vec![0, 1]));
+    assert_errs!(CooMatrix::<i32>::try_from_triplets(3, 5, vec![1, 1], vec![0], vec![0, 1]));
+}
+
+#[test]
+fn coo_push_valid_entries() {
+    let mut coo = CooMatrix::new(3, 3);
+
+    coo.push(0, 0, 1);
+    assert_eq!(coo.triplet_iter().collect::<Vec<_>>(), vec![(0, 0, &1)]);
+
+    coo.push(0, 0, 2);
+    assert_eq!(coo.triplet_iter().collect::<Vec<_>>(), vec![(0, 0, &1), (0, 0, &2)]);
+
+    coo.push(2, 2, 3);
+    assert_eq!(coo.triplet_iter().collect::<Vec<_>>(), vec![(0, 0, &1), (0, 0, &2), (2, 2, &3)]);
+}
+
+#[test]
+fn coo_push_out_of_bounds_entries() {
+    {
+        // 0x0 matrix
+        let coo = CooMatrix::new(0, 0);
+        assert_panics!(coo.clone().push(0, 0, 1));
+    }
+
+    {
+        // 0x1 matrix
+        assert_panics!(CooMatrix::new(0, 1).push(0, 0, 1));
+    }
+
+    {
+        // 1x0 matrix
+        assert_panics!(CooMatrix::new(1, 0).push(0, 0, 1));
+    }
+
+    {
+        // Arbitrary matrix dimensions
+        let coo = CooMatrix::new(3, 2);
+        assert_panics!(coo.clone().push(3, 0, 1));
+        assert_panics!(coo.clone().push(2, 2, 1));
+        assert_panics!(coo.clone().push(3, 2, 1));
+    }
+}
--- a/nalgebra-sparse/tests/unit_tests/mod.rs
+++ b/nalgebra-sparse/tests/unit_tests/mod.rs
@ -0,0 +1,2 @@
+mod coo;
+mod ops;
--- a/nalgebra-sparse/tests/unit_tests/ops.rs
+++ b/nalgebra-sparse/tests/unit_tests/ops.rs
@ -0,0 +1,28 @@
+use nalgebra_sparse::CooMatrix;
+use nalgebra_sparse::ops::spmv_coo;
+use nalgebra::DVector;
+
+#[test]
+fn spmv_coo_agrees_with_dense_gemv() {
+    let x = DVector::from_column_slice(&[2, 3, 4, 5]);
+
+    let i = vec![0, 0, 1, 1, 2, 2];
+    let j = vec![0, 3, 0, 1, 1, 3];
+    let v = vec![3, 2, 1, 2, 3, 1];
+    let a = CooMatrix::try_from_triplets(3, 4, i, j, v).unwrap();
+
+    let betas = [0, 1, 2];
+    let alphas = [0, 1, 2];
+
+    for &beta in &betas {
+        for &alpha in &alphas {
+            let mut y = DVector::from_column_slice(&[2, 5, 3]);
+            let mut y_dense = y.clone();
+            spmv_coo(beta, &mut y, alpha, &a, &x);
+
+            y_dense.gemv(alpha, &a.to_dense(), &x, beta);
+
+            assert_eq!(y, y_dense);
+        }
+    }
+}