Add matrixmarket parser.

This commit is contained in:
sebcrozet 2018-11-06 18:31:04 +01:00
parent 383a18f083
commit ed07b78b97
11 changed files with 450 additions and 29 deletions

View File

@ -26,6 +26,7 @@ abomonation-serialize = [ "abomonation" ]
sparse = [ ]
debug = [ ]
alloc = [ ]
io = [ "pest", "pest_derive" ]
[dependencies]
typenum = "1.10"
@ -41,6 +42,8 @@ serde_derive = { version = "1.0", optional = true }
abomonation = { version = "0.5", optional = true }
mint = { version = "0.5", optional = true }
quickcheck = { version = "0.6", optional = true }
pest = { version = "2.0", optional = true }
pest_derive = { version = "2.0", optional = true }
[dev-dependencies]
serde_json = "1.0"

16
src/io/matrix_market.pest Normal file
View File

@ -0,0 +1,16 @@
WHITESPACE = _{ " " }
Comments = _{ "%" ~ (!NEWLINE ~ ANY)* }
Header = { "%%" ~ (!NEWLINE ~ ANY)* }
Shape = { Dimension ~ Dimension ~ Dimension }
Document = {
SOI ~
NEWLINE ~
Header ~
(NEWLINE ~ Comments)* ~
(NEWLINE ~ Shape) ~
(NEWLINE ~ Entry?)*
}
Dimension = @{ ASCII_DIGIT+ }
Value = @{ ("+" | "-")? ~ NUMBER+ ~ ("." ~ NUMBER+)? ~ ("e" ~ ("+" | "-")? ~ NUMBER+)? }
Entry = { Dimension ~ Dimension ~ Value }

51
src/io/matrix_market.rs Normal file
View File

@ -0,0 +1,51 @@
use std::fs;
use std::path::Path;
use pest::Parser;
use sparse::CsMatrix;
use Real;
#[derive(Parser)]
#[grammar = "io/matrix_market.pest"]
struct MatrixMarketParser;
// FIXME: return an Error instead of an Option.
pub fn cs_matrix_from_matrix_market<N: Real, P: AsRef<Path>>(path: P) -> Option<CsMatrix<N>> {
let file = fs::read_to_string(path).ok()?;
cs_matrix_from_matrix_market_str(&file)
}
// FIXME: return an Error instead of an Option.
pub fn cs_matrix_from_matrix_market_str<N: Real>(data: &str) -> Option<CsMatrix<N>> {
let file = MatrixMarketParser::parse(Rule::Document, data)
.unwrap()
.next()?;
let mut shape = (0, 0, 0);
let mut rows: Vec<usize> = Vec::new();
let mut cols: Vec<usize> = Vec::new();
let mut data: Vec<N> = Vec::new();
for line in file.into_inner() {
match line.as_rule() {
Rule::Header => {}
Rule::Shape => {
let mut inner = line.into_inner();
shape.0 = inner.next()?.as_str().parse::<usize>().ok()?;
shape.1 = inner.next()?.as_str().parse::<usize>().ok()?;
shape.2 = inner.next()?.as_str().parse::<usize>().ok()?;
}
Rule::Entry => {
let mut inner = line.into_inner();
// NOTE: indices are 1-based.
rows.push(inner.next()?.as_str().parse::<usize>().ok()? - 1);
cols.push(inner.next()?.as_str().parse::<usize>().ok()? - 1);
data.push(::convert(inner.next()?.as_str().parse::<f64>().ok()?));
}
_ => return None, // FIXME: return an Err instead.
}
}
Some(CsMatrix::from_triplet(
shape.0, shape.1, &rows, &cols, &data,
))
}

3
src/io/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub use self::matrix_market::*;
mod matrix_market;

View File

@ -7,8 +7,43 @@ use std::slice;
use allocator::Allocator;
use constraint::{AreMultipliable, DimEq, SameNumberOfRows, ShapeConstraint};
use sparse::cs_utils;
use storage::{Storage, StorageMut};
use {DefaultAllocator, Dim, Matrix, MatrixMN, Real, Scalar, Vector, VectorN, U1};
use {
DVector, DefaultAllocator, Dim, Dynamic, Matrix, MatrixMN, MatrixVec, Real, Scalar, Vector,
VectorN, U1,
};
pub struct ColumnEntries<'a, N> {
curr: usize,
i: &'a [usize],
v: &'a [N],
}
impl<'a, N> ColumnEntries<'a, N> {
#[inline]
pub fn new(i: &'a [usize], v: &'a [N]) -> Self {
assert_eq!(i.len(), v.len());
ColumnEntries { curr: 0, i, v }
}
}
impl<'a, N: Copy> Iterator for ColumnEntries<'a, N> {
type Item = (usize, N);
#[inline]
fn next(&mut self) -> Option<(usize, N)> {
if self.curr >= self.i.len() {
None
} else {
let res = Some((unsafe { *self.i.get_unchecked(self.curr) }, unsafe {
*self.v.get_unchecked(self.curr)
}));
self.curr += 1;
res
}
}
}
// FIXME: this structure exists for now only because impl trait
// cannot be used for trait method return types.
@ -17,12 +52,15 @@ pub trait CsStorageIter<'a, N, R, C = U1> {
type ColumnRowIndices: Iterator<Item = usize>;
fn column_row_indices(&'a self, j: usize) -> Self::ColumnRowIndices;
#[inline(always)]
fn column_entries(&'a self, j: usize) -> Self::ColumnEntries;
}
pub trait CsStorageIterMut<'a, N: 'a, R, C = U1> {
type ValuesMut: Iterator<Item = &'a mut N>;
type ColumnEntriesMut: Iterator<Item = (usize, &'a mut N)>;
fn values_mut(&'a mut self) -> Self::ValuesMut;
fn column_entries_mut(&'a mut self, j: usize) -> Self::ColumnEntriesMut;
}
@ -41,7 +79,7 @@ pub trait CsStorageMut<N, R, C = U1>:
{
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct CsVecStorage<N: Scalar, R: Dim, C: Dim>
where
DefaultAllocator: Allocator<usize, C>,
@ -59,6 +97,12 @@ where
pub fn values(&self) -> &[N] {
&self.vals
}
pub fn p(&self) -> &[usize] {
self.p.as_slice()
}
pub fn i(&self) -> &[usize] {
&self.i
}
}
impl<N: Scalar, R: Dim, C: Dim> CsVecStorage<N, R, C> where DefaultAllocator: Allocator<usize, C> {}
@ -67,17 +111,13 @@ impl<'a, N: Scalar, R: Dim, C: Dim> CsStorageIter<'a, N, R, C> for CsVecStorage<
where
DefaultAllocator: Allocator<usize, C>,
{
type ColumnEntries =
iter::Zip<iter::Cloned<slice::Iter<'a, usize>>, iter::Cloned<slice::Iter<'a, N>>>;
type ColumnEntries = ColumnEntries<'a, N>;
type ColumnRowIndices = iter::Cloned<slice::Iter<'a, usize>>;
#[inline]
fn column_entries(&'a self, j: usize) -> Self::ColumnEntries {
let rng = self.column_range(j);
self.i[rng.clone()]
.iter()
.cloned()
.zip(self.vals[rng].iter().cloned())
ColumnEntries::new(&self.i[rng.clone()], &self.vals[rng])
}
#[inline]
@ -137,8 +177,14 @@ impl<'a, N: Scalar, R: Dim, C: Dim> CsStorageIterMut<'a, N, R, C> for CsVecStora
where
DefaultAllocator: Allocator<usize, C>,
{
type ValuesMut = slice::IterMut<'a, N>;
type ColumnEntriesMut = iter::Zip<iter::Cloned<slice::Iter<'a, usize>>, slice::IterMut<'a, N>>;
#[inline]
fn values_mut(&'a mut self) -> Self::ValuesMut {
self.vals.iter_mut()
}
#[inline]
fn column_entries_mut(&'a mut self, j: usize) -> Self::ColumnEntriesMut {
let rng = self.column_range(j);
@ -163,13 +209,18 @@ pub struct CsSliceStorage<'a, N: Scalar, R: Dim, C: DimAdd<U1>> {
}*/
/// A compressed sparse column matrix.
#[derive(Clone, Debug)]
pub struct CsMatrix<N: Scalar, R: Dim, C: Dim, S: CsStorage<N, R, C> = CsVecStorage<N, R, C>> {
#[derive(Clone, Debug, PartialEq)]
pub struct CsMatrix<
N: Scalar,
R: Dim = Dynamic,
C: Dim = Dynamic,
S: CsStorage<N, R, C> = CsVecStorage<N, R, C>,
> {
pub data: S,
_phantoms: PhantomData<(N, R, C)>,
}
pub type CsVector<N, R, S = CsVecStorage<N, R, U1>> = CsMatrix<N, R, U1, S>;
pub type CsVector<N, R = Dynamic, S = CsVecStorage<N, R, U1>> = CsMatrix<N, R, U1, S>;
impl<N: Scalar, R: Dim, C: Dim> CsMatrix<N, R, C>
where
@ -198,22 +249,66 @@ where
_phantoms: PhantomData,
}
}
}
fn cumsum<D: Dim>(a: &mut VectorN<usize, D>, b: &mut VectorN<usize, D>) -> usize
where
DefaultAllocator: Allocator<usize, D>,
{
assert!(a.len() == b.len());
let mut sum = 0;
pub fn from_parts_generic(
nrows: R,
ncols: C,
p: VectorN<usize, C>,
i: Vec<usize>,
vals: Vec<N>,
) -> Self
where
N: Zero + ClosedAdd,
DefaultAllocator: Allocator<N, R>,
{
assert_eq!(ncols.value(), p.len(), "Invalid inptr size.");
assert_eq!(i.len(), vals.len(), "Invalid value size.");
for i in 0..a.len() {
b[i] = sum;
sum += a[i];
a[i] = b[i];
// Check p.
for ptr in &p {
assert!(*ptr < i.len(), "Invalid inptr value.");
}
sum
for ptr in p.as_slice().windows(2) {
assert!(ptr[0] <= ptr[1], "Invalid inptr ordering.");
}
// Check i.
for i in &i {
assert!(*i < nrows.value(), "Invalid row ptr value.")
}
let mut res = CsMatrix {
data: CsVecStorage {
shape: (nrows, ncols),
p,
i,
vals,
},
_phantoms: PhantomData,
};
// Sort and remove duplicates.
res.sort();
res.dedup();
res
}
}
impl<N: Scalar + Zero + ClosedAdd> CsMatrix<N> {
pub fn from_parts(
nrows: usize,
ncols: usize,
p: Vec<usize>,
i: Vec<usize>,
vals: Vec<N>,
) -> Self {
let nrows = Dynamic::new(nrows);
let ncols = Dynamic::new(ncols);
let p = DVector::from_data(MatrixVec::new(ncols, U1, p));
Self::from_parts_generic(nrows, ncols, p, i, vals)
}
}
impl<N: Scalar, R: Dim, C: Dim, S: CsStorage<N, R, C>> CsMatrix<N, R, C, S> {
@ -288,7 +383,7 @@ impl<N: Scalar, R: Dim, C: Dim, S: CsStorage<N, R, C>> CsMatrix<N, R, C, S> {
workspace[row_id] += 1;
}
let _ = cumsum(&mut workspace, &mut res.data.p);
let _ = cs_utils::cumsum(&mut workspace, &mut res.data.p);
// Fill the result.
for j in 0..ncols.value() {
@ -305,6 +400,13 @@ impl<N: Scalar, R: Dim, C: Dim, S: CsStorage<N, R, C>> CsMatrix<N, R, C, S> {
}
}
impl<N: Scalar, R: Dim, C: Dim, S: CsStorageMut<N, R, C>> CsMatrix<N, R, C, S> {
#[inline]
pub fn values_mut(&mut self) -> impl Iterator<Item = &mut N> {
self.data.values_mut()
}
}
impl<N: Scalar, R: Dim, C: Dim> CsMatrix<N, R, C>
where
DefaultAllocator: Allocator<usize, C>,
@ -341,4 +443,46 @@ where
}
}
}
// Remove dupliate entries on a sorted CsMatrix.
pub(crate) fn dedup(&mut self)
where
N: Zero + ClosedAdd,
{
let mut curr_i = 0;
for j in 0..self.ncols() {
let range = self.data.column_range(j);
self.data.p[j] = curr_i;
if range.start != range.end {
let mut value = N::zero();
let mut irow = self.data.i[range.start];
for idx in range {
let curr_irow = self.data.i[idx];
if curr_irow == irow {
value += self.data.vals[idx];
} else {
self.data.i[curr_i] = irow;
self.data.vals[curr_i] = value;
value = self.data.vals[idx];
irow = curr_irow;
curr_i += 1;
}
}
// Handle the last entry.
self.data.i[curr_i] = irow;
self.data.vals[curr_i] = value;
curr_i += 1;
}
}
self.data.i.truncate(curr_i);
self.data.i.shrink_to_fit();
self.data.vals.truncate(curr_i);
self.data.vals.shrink_to_fit();
}
}

View File

@ -7,9 +7,67 @@ use std::slice;
use allocator::Allocator;
use constraint::{AreMultipliable, DimEq, SameNumberOfRows, ShapeConstraint};
use sparse::cs_utils;
use sparse::{CsMatrix, CsStorage, CsVector};
use storage::{Storage, StorageMut};
use {DefaultAllocator, Dim, Matrix, MatrixMN, Real, Scalar, Vector, VectorN, U1};
use {DefaultAllocator, Dim, Dynamic, Matrix, MatrixMN, Real, Scalar, Vector, VectorN, U1};
impl<'a, N: Scalar + Zero + ClosedAdd> CsMatrix<N> {
// FIXME: implement for dimensions other than Dynamic too.
pub fn from_triplet(
nrows: usize,
ncols: usize,
irows: &[usize],
icols: &[usize],
vals: &[N],
) -> Self {
Self::from_triplet_generic(Dynamic::new(nrows), Dynamic::new(ncols), irows, icols, vals)
}
}
impl<'a, N: Scalar + Zero + ClosedAdd, R: Dim, C: Dim> CsMatrix<N, R, C>
where
DefaultAllocator: Allocator<usize, C> + Allocator<N, R>,
{
pub fn from_triplet_generic(
nrows: R,
ncols: C,
irows: &[usize],
icols: &[usize],
vals: &[N],
) -> Self {
assert!(vals.len() == irows.len());
assert!(vals.len() == icols.len());
let mut res = CsMatrix::new_uninitialized_generic(nrows, ncols, vals.len());
let mut workspace = res.data.p.clone();
// Column count.
for j in icols.iter().cloned() {
workspace[j] += 1;
}
let _ = cs_utils::cumsum(&mut workspace, &mut res.data.p);
// Fill i and vals.
for ((i, j), val) in irows
.iter()
.cloned()
.zip(icols.iter().cloned())
.zip(vals.iter().cloned())
{
let offset = workspace[j];
res.data.i[offset] = i;
res.data.vals[offset] = val;
workspace[j] = offset + 1;
}
// Sort the result.
res.sort();
res.dedup();
res
}
}
impl<'a, N: Scalar + Zero, R: Dim, C: Dim, S> From<CsMatrix<N, R, C, S>> for MatrixMN<N, R, C>
where

18
src/sparse/cs_utils.rs Normal file
View File

@ -0,0 +1,18 @@
use allocator::Allocator;
use {DefaultAllocator, Dim, VectorN};
pub fn cumsum<D: Dim>(a: &mut VectorN<usize, D>, b: &mut VectorN<usize, D>) -> usize
where
DefaultAllocator: Allocator<usize, D>,
{
assert!(a.len() == b.len());
let mut sum = 0;
for i in 0..a.len() {
b[i] = sum;
sum += a[i];
a[i] = b[i];
}
sum
}

View File

@ -8,3 +8,4 @@ mod cs_matrix_cholesky;
mod cs_matrix_conversion;
mod cs_matrix_ops;
mod cs_matrix_solve;
pub mod cs_utils;

View File

@ -1,9 +1,8 @@
#![cfg_attr(rustfmt, rustfmt_skip)]
use na::{Matrix4x5, CsMatrix};
use na::{CsMatrix, DMatrix, Matrix4x5};
#[test]
fn cs_from_to_matrix() {
#[cfg_attr(rustfmt, rustfmt_skip)]
let m = Matrix4x5::new(
5.0, 6.0, 0.0, 8.0, 15.0,
9.0, 10.0, 11.0, 12.0, 0.0,
@ -17,3 +16,74 @@ fn cs_from_to_matrix() {
let m2: Matrix4x5<_> = cs.into();
assert_eq!(m2, m);
}
#[test]
fn cs_matrix_from_triplet() {
let mut irows = vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3];
let mut icols = vec![0, 1, 3, 4, 0, 1, 2, 3, 2, 1, 2, 4];
let mut vals = vec![
5.0, 6.0, 8.0, 15.0, 9.0, 10.0, 11.0, 12.0, 13.0, 1.0, 4.0, 14.0,
];
#[cfg_attr(rustfmt, rustfmt_skip)]
let expected = DMatrix::from_row_slice(4, 5, &[
5.0, 6.0, 0.0, 8.0, 15.0,
9.0, 10.0, 11.0, 12.0, 0.0,
0.0, 0.0, 13.0, 0.0, 0.0,
0.0, 1.0, 4.0, 0.0, 14.0,
]);
let cs_expected = CsMatrix::from_parts(
4,
5,
vec![0, 2, 5, 8, 10],
vec![0, 1, 0, 1, 3, 1, 2, 3, 0, 1, 0, 3],
vec![
5.0, 9.0, 6.0, 10.0, 1.0, 11.0, 13.0, 4.0, 8.0, 12.0, 15.0, 14.0,
],
);
let cs_mat = CsMatrix::from_triplet(4, 5, &irows, &icols, &vals);
println!("Mat from triplet: {:?}", cs_mat);
assert!(cs_mat.is_sorted());
assert_eq!(cs_mat, cs_expected);
let m: DMatrix<_> = cs_mat.into();
assert_eq!(m, expected);
/*
* Try again with some permutations.
*/
let permutations = [(2, 5), (0, 4), (8, 10), (1, 11)];
for (i, j) in &permutations {
irows.swap(*i, *j);
icols.swap(*i, *j);
vals.swap(*i, *j);
}
let cs_mat = CsMatrix::from_triplet(4, 5, &irows, &icols, &vals);
println!("Mat from triplet: {:?}", cs_mat);
assert!(cs_mat.is_sorted());
assert_eq!(cs_mat, cs_expected);
let m: DMatrix<_> = cs_mat.into();
assert_eq!(m, expected);
/*
* Try again, duplicating all entries.
*/
let mut ir = irows.clone();
let mut ic = icols.clone();
let mut va = vals.clone();
irows.append(&mut ir);
icols.append(&mut ic);
vals.append(&mut va);
let cs_mat = CsMatrix::from_triplet(4, 5, &irows, &icols, &vals);
println!("Mat from triplet: {:?}", cs_mat);
assert!(cs_mat.is_sorted());
assert_eq!(cs_mat, cs_expected * 2.0);
let m: DMatrix<_> = cs_mat.into();
assert_eq!(m, expected * 2.0);
}

View File

@ -0,0 +1,55 @@
#![cfg_attr(rustfmt, rustfmt_skip)]
use na::io;
use na::DMatrix;
#[test]
fn cs_matrix_market() {
let file_str = r#"
%%MatrixMarket matrix coordinate real general
%=================================================================================
%
% This ASCII file represents a sparse MxN matrix with L
% nonzeros in the following Matrix Market format:
%
% +----------------------------------------------+
% |%%MatrixMarket matrix coordinate real general | <--- header line
% |% | <--+
% |% comments | |-- 0 or more comment lines
% |% | <--+
% | M N L | <--- rows, columns, entries
% | I1 J1 A(I1, J1) | <--+
% | I2 J2 A(I2, J2) | |
% | I3 J3 A(I3, J3) | |-- L lines
% | . . . | |
% | IL JL A(IL, JL) | <--+
% +----------------------------------------------+
%
% Indices are 1-based, i.e. A(1,1) is the first element.
%
%=================================================================================
5 5 8
1 1 1.000e+00
2 2 1.050e+01
3 3 1.500e-02
1 4 6.000e+00
4 2 2.505e+02
4 4 -2.800e+02
4 5 3.332e+01
5 5 1.200e+01
"#;
let cs_mat = io::cs_matrix_from_matrix_market_str(file_str).unwrap();
println!("CS mat: {:?}", cs_mat);
let mat: DMatrix<_> = cs_mat.into();
let expected = DMatrix::from_row_slice(5, 5, &[
1.0, 0.0, 0.0, 6.0, 0.0,
0.0, 10.5, 0.0, 0.0, 0.0,
0.0, 0.0, 0.015, 0.0, 0.0,
0.0, 250.5, 0.0, -280.0, 33.32,
0.0, 0.0, 0.0, 0.0, 12.0,
]);
assert_eq!(mat, expected);
}

View File

@ -2,5 +2,7 @@ mod cs_cholesky;
mod cs_construction;
mod cs_conversion;
mod cs_matrix;
#[cfg(feature = "io")]
mod cs_matrix_market;
mod cs_ops;
mod cs_solve;