From ed07b78b972466e6a32ceb62557286062a6f69f3 Mon Sep 17 00:00:00 2001 From: sebcrozet Date: Tue, 6 Nov 2018 18:31:04 +0100 Subject: [PATCH] Add matrixmarket parser. --- Cargo.toml | 3 + src/io/matrix_market.pest | 16 +++ src/io/matrix_market.rs | 51 ++++++++ src/io/mod.rs | 3 + src/sparse/cs_matrix.rs | 194 +++++++++++++++++++++++++---- src/sparse/cs_matrix_conversion.rs | 60 ++++++++- src/sparse/cs_utils.rs | 18 +++ src/sparse/mod.rs | 1 + tests/sparse/cs_conversion.rs | 76 ++++++++++- tests/sparse/cs_matrix_market.rs | 55 ++++++++ tests/sparse/mod.rs | 2 + 11 files changed, 450 insertions(+), 29 deletions(-) create mode 100644 src/io/matrix_market.pest create mode 100644 src/io/matrix_market.rs create mode 100644 src/io/mod.rs create mode 100644 src/sparse/cs_utils.rs create mode 100644 tests/sparse/cs_matrix_market.rs diff --git a/Cargo.toml b/Cargo.toml index 88857c55..ed76cbb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ abomonation-serialize = [ "abomonation" ] sparse = [ ] debug = [ ] alloc = [ ] +io = [ "pest", "pest_derive" ] [dependencies] typenum = "1.10" @@ -41,6 +42,8 @@ serde_derive = { version = "1.0", optional = true } abomonation = { version = "0.5", optional = true } mint = { version = "0.5", optional = true } quickcheck = { version = "0.6", optional = true } +pest = { version = "2.0", optional = true } +pest_derive = { version = "2.0", optional = true } [dev-dependencies] serde_json = "1.0" diff --git a/src/io/matrix_market.pest b/src/io/matrix_market.pest new file mode 100644 index 00000000..eafe5b82 --- /dev/null +++ b/src/io/matrix_market.pest @@ -0,0 +1,16 @@ +WHITESPACE = _{ " " } + +Comments = _{ "%" ~ (!NEWLINE ~ ANY)* } +Header = { "%%" ~ (!NEWLINE ~ ANY)* } +Shape = { Dimension ~ Dimension ~ Dimension } +Document = { + SOI ~ + NEWLINE ~ + Header ~ + (NEWLINE ~ Comments)* ~ + (NEWLINE ~ Shape) ~ + (NEWLINE ~ Entry?)* +} +Dimension = @{ ASCII_DIGIT+ } +Value = @{ ("+" | "-")? ~ NUMBER+ ~ ("." ~ NUMBER+)? ~ ("e" ~ ("+" | "-")? ~ NUMBER+)? } +Entry = { Dimension ~ Dimension ~ Value } \ No newline at end of file diff --git a/src/io/matrix_market.rs b/src/io/matrix_market.rs new file mode 100644 index 00000000..12fb6c55 --- /dev/null +++ b/src/io/matrix_market.rs @@ -0,0 +1,51 @@ +use std::fs; +use std::path::Path; + +use pest::Parser; +use sparse::CsMatrix; +use Real; + +#[derive(Parser)] +#[grammar = "io/matrix_market.pest"] +struct MatrixMarketParser; + +// FIXME: return an Error instead of an Option. +pub fn cs_matrix_from_matrix_market>(path: P) -> Option> { + let file = fs::read_to_string(path).ok()?; + cs_matrix_from_matrix_market_str(&file) +} + +// FIXME: return an Error instead of an Option. +pub fn cs_matrix_from_matrix_market_str(data: &str) -> Option> { + let file = MatrixMarketParser::parse(Rule::Document, data) + .unwrap() + .next()?; + let mut shape = (0, 0, 0); + let mut rows: Vec = Vec::new(); + let mut cols: Vec = Vec::new(); + let mut data: Vec = Vec::new(); + + for line in file.into_inner() { + match line.as_rule() { + Rule::Header => {} + Rule::Shape => { + let mut inner = line.into_inner(); + shape.0 = inner.next()?.as_str().parse::().ok()?; + shape.1 = inner.next()?.as_str().parse::().ok()?; + shape.2 = inner.next()?.as_str().parse::().ok()?; + } + Rule::Entry => { + let mut inner = line.into_inner(); + // NOTE: indices are 1-based. + rows.push(inner.next()?.as_str().parse::().ok()? - 1); + cols.push(inner.next()?.as_str().parse::().ok()? - 1); + data.push(::convert(inner.next()?.as_str().parse::().ok()?)); + } + _ => return None, // FIXME: return an Err instead. + } + } + + Some(CsMatrix::from_triplet( + shape.0, shape.1, &rows, &cols, &data, + )) +} diff --git a/src/io/mod.rs b/src/io/mod.rs new file mode 100644 index 00000000..fd7dc536 --- /dev/null +++ b/src/io/mod.rs @@ -0,0 +1,3 @@ +pub use self::matrix_market::*; + +mod matrix_market; diff --git a/src/sparse/cs_matrix.rs b/src/sparse/cs_matrix.rs index 707ea93d..9bb03cda 100644 --- a/src/sparse/cs_matrix.rs +++ b/src/sparse/cs_matrix.rs @@ -7,8 +7,43 @@ use std::slice; use allocator::Allocator; use constraint::{AreMultipliable, DimEq, SameNumberOfRows, ShapeConstraint}; +use sparse::cs_utils; use storage::{Storage, StorageMut}; -use {DefaultAllocator, Dim, Matrix, MatrixMN, Real, Scalar, Vector, VectorN, U1}; +use { + DVector, DefaultAllocator, Dim, Dynamic, Matrix, MatrixMN, MatrixVec, Real, Scalar, Vector, + VectorN, U1, +}; + +pub struct ColumnEntries<'a, N> { + curr: usize, + i: &'a [usize], + v: &'a [N], +} + +impl<'a, N> ColumnEntries<'a, N> { + #[inline] + pub fn new(i: &'a [usize], v: &'a [N]) -> Self { + assert_eq!(i.len(), v.len()); + ColumnEntries { curr: 0, i, v } + } +} + +impl<'a, N: Copy> Iterator for ColumnEntries<'a, N> { + type Item = (usize, N); + + #[inline] + fn next(&mut self) -> Option<(usize, N)> { + if self.curr >= self.i.len() { + None + } else { + let res = Some((unsafe { *self.i.get_unchecked(self.curr) }, unsafe { + *self.v.get_unchecked(self.curr) + })); + self.curr += 1; + res + } + } +} // FIXME: this structure exists for now only because impl trait // cannot be used for trait method return types. @@ -17,12 +52,15 @@ pub trait CsStorageIter<'a, N, R, C = U1> { type ColumnRowIndices: Iterator; fn column_row_indices(&'a self, j: usize) -> Self::ColumnRowIndices; + #[inline(always)] fn column_entries(&'a self, j: usize) -> Self::ColumnEntries; } pub trait CsStorageIterMut<'a, N: 'a, R, C = U1> { + type ValuesMut: Iterator; type ColumnEntriesMut: Iterator; + fn values_mut(&'a mut self) -> Self::ValuesMut; fn column_entries_mut(&'a mut self, j: usize) -> Self::ColumnEntriesMut; } @@ -41,7 +79,7 @@ pub trait CsStorageMut: { } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct CsVecStorage where DefaultAllocator: Allocator, @@ -59,6 +97,12 @@ where pub fn values(&self) -> &[N] { &self.vals } + pub fn p(&self) -> &[usize] { + self.p.as_slice() + } + pub fn i(&self) -> &[usize] { + &self.i + } } impl CsVecStorage where DefaultAllocator: Allocator {} @@ -67,17 +111,13 @@ impl<'a, N: Scalar, R: Dim, C: Dim> CsStorageIter<'a, N, R, C> for CsVecStorage< where DefaultAllocator: Allocator, { - type ColumnEntries = - iter::Zip>, iter::Cloned>>; + type ColumnEntries = ColumnEntries<'a, N>; type ColumnRowIndices = iter::Cloned>; #[inline] fn column_entries(&'a self, j: usize) -> Self::ColumnEntries { let rng = self.column_range(j); - self.i[rng.clone()] - .iter() - .cloned() - .zip(self.vals[rng].iter().cloned()) + ColumnEntries::new(&self.i[rng.clone()], &self.vals[rng]) } #[inline] @@ -137,8 +177,14 @@ impl<'a, N: Scalar, R: Dim, C: Dim> CsStorageIterMut<'a, N, R, C> for CsVecStora where DefaultAllocator: Allocator, { + type ValuesMut = slice::IterMut<'a, N>; type ColumnEntriesMut = iter::Zip>, slice::IterMut<'a, N>>; + #[inline] + fn values_mut(&'a mut self) -> Self::ValuesMut { + self.vals.iter_mut() + } + #[inline] fn column_entries_mut(&'a mut self, j: usize) -> Self::ColumnEntriesMut { let rng = self.column_range(j); @@ -163,13 +209,18 @@ pub struct CsSliceStorage<'a, N: Scalar, R: Dim, C: DimAdd> { }*/ /// A compressed sparse column matrix. -#[derive(Clone, Debug)] -pub struct CsMatrix = CsVecStorage> { +#[derive(Clone, Debug, PartialEq)] +pub struct CsMatrix< + N: Scalar, + R: Dim = Dynamic, + C: Dim = Dynamic, + S: CsStorage = CsVecStorage, +> { pub data: S, _phantoms: PhantomData<(N, R, C)>, } -pub type CsVector> = CsMatrix; +pub type CsVector> = CsMatrix; impl CsMatrix where @@ -198,22 +249,66 @@ where _phantoms: PhantomData, } } + + pub fn from_parts_generic( + nrows: R, + ncols: C, + p: VectorN, + i: Vec, + vals: Vec, + ) -> Self + where + N: Zero + ClosedAdd, + DefaultAllocator: Allocator, + { + assert_eq!(ncols.value(), p.len(), "Invalid inptr size."); + assert_eq!(i.len(), vals.len(), "Invalid value size."); + + // Check p. + for ptr in &p { + assert!(*ptr < i.len(), "Invalid inptr value."); + } + + for ptr in p.as_slice().windows(2) { + assert!(ptr[0] <= ptr[1], "Invalid inptr ordering."); + } + + // Check i. + for i in &i { + assert!(*i < nrows.value(), "Invalid row ptr value.") + } + + let mut res = CsMatrix { + data: CsVecStorage { + shape: (nrows, ncols), + p, + i, + vals, + }, + _phantoms: PhantomData, + }; + + // Sort and remove duplicates. + res.sort(); + res.dedup(); + + res + } } -fn cumsum(a: &mut VectorN, b: &mut VectorN) -> usize -where - DefaultAllocator: Allocator, -{ - assert!(a.len() == b.len()); - let mut sum = 0; - - for i in 0..a.len() { - b[i] = sum; - sum += a[i]; - a[i] = b[i]; +impl CsMatrix { + pub fn from_parts( + nrows: usize, + ncols: usize, + p: Vec, + i: Vec, + vals: Vec, + ) -> Self { + let nrows = Dynamic::new(nrows); + let ncols = Dynamic::new(ncols); + let p = DVector::from_data(MatrixVec::new(ncols, U1, p)); + Self::from_parts_generic(nrows, ncols, p, i, vals) } - - sum } impl> CsMatrix { @@ -288,7 +383,7 @@ impl> CsMatrix { workspace[row_id] += 1; } - let _ = cumsum(&mut workspace, &mut res.data.p); + let _ = cs_utils::cumsum(&mut workspace, &mut res.data.p); // Fill the result. for j in 0..ncols.value() { @@ -305,6 +400,13 @@ impl> CsMatrix { } } +impl> CsMatrix { + #[inline] + pub fn values_mut(&mut self) -> impl Iterator { + self.data.values_mut() + } +} + impl CsMatrix where DefaultAllocator: Allocator, @@ -341,4 +443,46 @@ where } } } + + // Remove dupliate entries on a sorted CsMatrix. + pub(crate) fn dedup(&mut self) + where + N: Zero + ClosedAdd, + { + let mut curr_i = 0; + + for j in 0..self.ncols() { + let range = self.data.column_range(j); + self.data.p[j] = curr_i; + + if range.start != range.end { + let mut value = N::zero(); + let mut irow = self.data.i[range.start]; + + for idx in range { + let curr_irow = self.data.i[idx]; + + if curr_irow == irow { + value += self.data.vals[idx]; + } else { + self.data.i[curr_i] = irow; + self.data.vals[curr_i] = value; + value = self.data.vals[idx]; + irow = curr_irow; + curr_i += 1; + } + } + + // Handle the last entry. + self.data.i[curr_i] = irow; + self.data.vals[curr_i] = value; + curr_i += 1; + } + } + + self.data.i.truncate(curr_i); + self.data.i.shrink_to_fit(); + self.data.vals.truncate(curr_i); + self.data.vals.shrink_to_fit(); + } } diff --git a/src/sparse/cs_matrix_conversion.rs b/src/sparse/cs_matrix_conversion.rs index 90f5cde0..b764bf10 100644 --- a/src/sparse/cs_matrix_conversion.rs +++ b/src/sparse/cs_matrix_conversion.rs @@ -7,9 +7,67 @@ use std::slice; use allocator::Allocator; use constraint::{AreMultipliable, DimEq, SameNumberOfRows, ShapeConstraint}; +use sparse::cs_utils; use sparse::{CsMatrix, CsStorage, CsVector}; use storage::{Storage, StorageMut}; -use {DefaultAllocator, Dim, Matrix, MatrixMN, Real, Scalar, Vector, VectorN, U1}; +use {DefaultAllocator, Dim, Dynamic, Matrix, MatrixMN, Real, Scalar, Vector, VectorN, U1}; + +impl<'a, N: Scalar + Zero + ClosedAdd> CsMatrix { + // FIXME: implement for dimensions other than Dynamic too. + pub fn from_triplet( + nrows: usize, + ncols: usize, + irows: &[usize], + icols: &[usize], + vals: &[N], + ) -> Self { + Self::from_triplet_generic(Dynamic::new(nrows), Dynamic::new(ncols), irows, icols, vals) + } +} + +impl<'a, N: Scalar + Zero + ClosedAdd, R: Dim, C: Dim> CsMatrix +where + DefaultAllocator: Allocator + Allocator, +{ + pub fn from_triplet_generic( + nrows: R, + ncols: C, + irows: &[usize], + icols: &[usize], + vals: &[N], + ) -> Self { + assert!(vals.len() == irows.len()); + assert!(vals.len() == icols.len()); + + let mut res = CsMatrix::new_uninitialized_generic(nrows, ncols, vals.len()); + let mut workspace = res.data.p.clone(); + + // Column count. + for j in icols.iter().cloned() { + workspace[j] += 1; + } + + let _ = cs_utils::cumsum(&mut workspace, &mut res.data.p); + + // Fill i and vals. + for ((i, j), val) in irows + .iter() + .cloned() + .zip(icols.iter().cloned()) + .zip(vals.iter().cloned()) + { + let offset = workspace[j]; + res.data.i[offset] = i; + res.data.vals[offset] = val; + workspace[j] = offset + 1; + } + + // Sort the result. + res.sort(); + res.dedup(); + res + } +} impl<'a, N: Scalar + Zero, R: Dim, C: Dim, S> From> for MatrixMN where diff --git a/src/sparse/cs_utils.rs b/src/sparse/cs_utils.rs new file mode 100644 index 00000000..a79ee4d9 --- /dev/null +++ b/src/sparse/cs_utils.rs @@ -0,0 +1,18 @@ +use allocator::Allocator; +use {DefaultAllocator, Dim, VectorN}; + +pub fn cumsum(a: &mut VectorN, b: &mut VectorN) -> usize +where + DefaultAllocator: Allocator, +{ + assert!(a.len() == b.len()); + let mut sum = 0; + + for i in 0..a.len() { + b[i] = sum; + sum += a[i]; + a[i] = b[i]; + } + + sum +} diff --git a/src/sparse/mod.rs b/src/sparse/mod.rs index 411e133b..546507eb 100644 --- a/src/sparse/mod.rs +++ b/src/sparse/mod.rs @@ -8,3 +8,4 @@ mod cs_matrix_cholesky; mod cs_matrix_conversion; mod cs_matrix_ops; mod cs_matrix_solve; +pub mod cs_utils; diff --git a/tests/sparse/cs_conversion.rs b/tests/sparse/cs_conversion.rs index 8a337636..f08fe758 100644 --- a/tests/sparse/cs_conversion.rs +++ b/tests/sparse/cs_conversion.rs @@ -1,9 +1,8 @@ -#![cfg_attr(rustfmt, rustfmt_skip)] - -use na::{Matrix4x5, CsMatrix}; +use na::{CsMatrix, DMatrix, Matrix4x5}; #[test] fn cs_from_to_matrix() { + #[cfg_attr(rustfmt, rustfmt_skip)] let m = Matrix4x5::new( 5.0, 6.0, 0.0, 8.0, 15.0, 9.0, 10.0, 11.0, 12.0, 0.0, @@ -17,3 +16,74 @@ fn cs_from_to_matrix() { let m2: Matrix4x5<_> = cs.into(); assert_eq!(m2, m); } + +#[test] +fn cs_matrix_from_triplet() { + let mut irows = vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3]; + let mut icols = vec![0, 1, 3, 4, 0, 1, 2, 3, 2, 1, 2, 4]; + let mut vals = vec![ + 5.0, 6.0, 8.0, 15.0, 9.0, 10.0, 11.0, 12.0, 13.0, 1.0, 4.0, 14.0, + ]; + + #[cfg_attr(rustfmt, rustfmt_skip)] + let expected = DMatrix::from_row_slice(4, 5, &[ + 5.0, 6.0, 0.0, 8.0, 15.0, + 9.0, 10.0, 11.0, 12.0, 0.0, + 0.0, 0.0, 13.0, 0.0, 0.0, + 0.0, 1.0, 4.0, 0.0, 14.0, + ]); + let cs_expected = CsMatrix::from_parts( + 4, + 5, + vec![0, 2, 5, 8, 10], + vec![0, 1, 0, 1, 3, 1, 2, 3, 0, 1, 0, 3], + vec![ + 5.0, 9.0, 6.0, 10.0, 1.0, 11.0, 13.0, 4.0, 8.0, 12.0, 15.0, 14.0, + ], + ); + + let cs_mat = CsMatrix::from_triplet(4, 5, &irows, &icols, &vals); + println!("Mat from triplet: {:?}", cs_mat); + assert!(cs_mat.is_sorted()); + assert_eq!(cs_mat, cs_expected); + + let m: DMatrix<_> = cs_mat.into(); + assert_eq!(m, expected); + + /* + * Try again with some permutations. + */ + let permutations = [(2, 5), (0, 4), (8, 10), (1, 11)]; + + for (i, j) in &permutations { + irows.swap(*i, *j); + icols.swap(*i, *j); + vals.swap(*i, *j); + } + + let cs_mat = CsMatrix::from_triplet(4, 5, &irows, &icols, &vals); + println!("Mat from triplet: {:?}", cs_mat); + assert!(cs_mat.is_sorted()); + assert_eq!(cs_mat, cs_expected); + + let m: DMatrix<_> = cs_mat.into(); + assert_eq!(m, expected); + + /* + * Try again, duplicating all entries. + */ + let mut ir = irows.clone(); + let mut ic = icols.clone(); + let mut va = vals.clone(); + irows.append(&mut ir); + icols.append(&mut ic); + vals.append(&mut va); + + let cs_mat = CsMatrix::from_triplet(4, 5, &irows, &icols, &vals); + println!("Mat from triplet: {:?}", cs_mat); + assert!(cs_mat.is_sorted()); + assert_eq!(cs_mat, cs_expected * 2.0); + + let m: DMatrix<_> = cs_mat.into(); + assert_eq!(m, expected * 2.0); +} diff --git a/tests/sparse/cs_matrix_market.rs b/tests/sparse/cs_matrix_market.rs new file mode 100644 index 00000000..12414b37 --- /dev/null +++ b/tests/sparse/cs_matrix_market.rs @@ -0,0 +1,55 @@ +#![cfg_attr(rustfmt, rustfmt_skip)] + + +use na::io; +use na::DMatrix; + +#[test] +fn cs_matrix_market() { + let file_str = r#" + %%MatrixMarket matrix coordinate real general +%================================================================================= +% +% This ASCII file represents a sparse MxN matrix with L +% nonzeros in the following Matrix Market format: +% +% +----------------------------------------------+ +% |%%MatrixMarket matrix coordinate real general | <--- header line +% |% | <--+ +% |% comments | |-- 0 or more comment lines +% |% | <--+ +% | M N L | <--- rows, columns, entries +% | I1 J1 A(I1, J1) | <--+ +% | I2 J2 A(I2, J2) | | +% | I3 J3 A(I3, J3) | |-- L lines +% | . . . | | +% | IL JL A(IL, JL) | <--+ +% +----------------------------------------------+ +% +% Indices are 1-based, i.e. A(1,1) is the first element. +% +%================================================================================= + 5 5 8 + 1 1 1.000e+00 + 2 2 1.050e+01 + 3 3 1.500e-02 + 1 4 6.000e+00 + 4 2 2.505e+02 + 4 4 -2.800e+02 + 4 5 3.332e+01 + 5 5 1.200e+01 +"#; + + let cs_mat = io::cs_matrix_from_matrix_market_str(file_str).unwrap(); + println!("CS mat: {:?}", cs_mat); + let mat: DMatrix<_> = cs_mat.into(); + let expected = DMatrix::from_row_slice(5, 5, &[ + 1.0, 0.0, 0.0, 6.0, 0.0, + 0.0, 10.5, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.015, 0.0, 0.0, + 0.0, 250.5, 0.0, -280.0, 33.32, + 0.0, 0.0, 0.0, 0.0, 12.0, + ]); + + assert_eq!(mat, expected); +} diff --git a/tests/sparse/mod.rs b/tests/sparse/mod.rs index 0e772c99..df8e7e37 100644 --- a/tests/sparse/mod.rs +++ b/tests/sparse/mod.rs @@ -2,5 +2,7 @@ mod cs_cholesky; mod cs_construction; mod cs_conversion; mod cs_matrix; +#[cfg(feature = "io")] +mod cs_matrix_market; mod cs_ops; mod cs_solve;