Move sparse matrix serialization to separate files

This commit is contained in:
Fabian Loeschner 2022-01-04 15:15:52 +01:00
parent 583fde05fe
commit 38989ed5f0
8 changed files with 265 additions and 267 deletions

View File

@ -1,5 +1,8 @@
//! An implementation of the COO sparse matrix format.
#[cfg(feature = "serde-serialize")]
mod coo_serde;
use crate::SparseFormatError;
/// A COO representation of a sparse matrix.
@ -273,72 +276,3 @@ impl<T> CooMatrix<T> {
(self.row_indices, self.col_indices, self.values)
}
}
#[cfg(feature = "serde-serialize")]
mod serde_serialize {
use super::CooMatrix;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `CooMatrix`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `row_indices` and `col_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even if the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct CooMatrixSerializationData<Indices, Values> {
nrows: usize,
ncols: usize,
row_indices: Indices,
col_indices: Indices,
values: Values,
}
impl<T> Serialize for CooMatrix<T>
where
T: Serialize + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
CooMatrixSerializationData::<&[usize], &[T]> {
nrows: self.nrows(),
ncols: self.ncols(),
row_indices: self.row_indices(),
col_indices: self.col_indices(),
values: self.values(),
}
.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for CooMatrix<T>
where
T: Deserialize<'de> + Clone,
{
fn deserialize<D>(deserializer: D) -> Result<CooMatrix<T>, D::Error>
where
D: Deserializer<'de>,
{
let de = CooMatrixSerializationData::<Vec<usize>, Vec<T>>::deserialize(deserializer)?;
CooMatrix::try_from_triplets(
de.nrows,
de.ncols,
de.row_indices,
de.col_indices,
de.values,
)
.map_err(|e| de::Error::custom(e))
}
}
}

View File

@ -0,0 +1,65 @@
use crate::coo::CooMatrix;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `CooMatrix`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `row_indices` and `col_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even if the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct CooMatrixSerializationData<Indices, Values> {
nrows: usize,
ncols: usize,
row_indices: Indices,
col_indices: Indices,
values: Values,
}
impl<T> Serialize for CooMatrix<T>
where
T: Serialize + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
CooMatrixSerializationData::<&[usize], &[T]> {
nrows: self.nrows(),
ncols: self.ncols(),
row_indices: self.row_indices(),
col_indices: self.col_indices(),
values: self.values(),
}
.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for CooMatrix<T>
where
T: Deserialize<'de> + Clone,
{
fn deserialize<D>(deserializer: D) -> Result<CooMatrix<T>, D::Error>
where
D: Deserializer<'de>,
{
let de = CooMatrixSerializationData::<Vec<usize>, Vec<T>>::deserialize(deserializer)?;
CooMatrix::try_from_triplets(
de.nrows,
de.ncols,
de.row_indices,
de.col_indices,
de.values,
)
.map_err(|e| de::Error::custom(e))
}
}

View File

@ -3,6 +3,9 @@
//! This is the module-level documentation. See [`CscMatrix`] for the main documentation of the
//! CSC implementation.
#[cfg(feature = "serde-serialize")]
mod csc_serde;
use crate::cs::{CsLane, CsLaneIter, CsLaneIterMut, CsLaneMut, CsMatrix};
use crate::csr::CsrMatrix;
use crate::pattern::{SparsityPattern, SparsityPatternFormatError, SparsityPatternIter};
@ -520,75 +523,6 @@ impl<T> CscMatrix<T> {
}
}
#[cfg(feature = "serde-serialize")]
mod serde_serialize {
use super::CscMatrix;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `CscMatrix`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `col_offsets` and `row_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even if the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct CscMatrixSerializationData<Indices, Values> {
nrows: usize,
ncols: usize,
col_offsets: Indices,
row_indices: Indices,
values: Values,
}
impl<T> Serialize for CscMatrix<T>
where
T: Serialize + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
CscMatrixSerializationData::<&[usize], &[T]> {
nrows: self.nrows(),
ncols: self.ncols(),
col_offsets: self.col_offsets(),
row_indices: self.row_indices(),
values: self.values(),
}
.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for CscMatrix<T>
where
T: Deserialize<'de> + Clone,
{
fn deserialize<D>(deserializer: D) -> Result<CscMatrix<T>, D::Error>
where
D: Deserializer<'de>,
{
let de = CscMatrixSerializationData::<Vec<usize>, Vec<T>>::deserialize(deserializer)?;
CscMatrix::try_from_csc_data(
de.nrows,
de.ncols,
de.col_offsets,
de.row_indices,
de.values,
)
.map_err(|e| de::Error::custom(e))
}
}
}
/// Convert pattern format errors into more meaningful CSC-specific errors.
///
/// This ensures that the terminology is consistent: we are talking about rows and columns,

View File

@ -0,0 +1,65 @@
use crate::CscMatrix;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `CscMatrix`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `col_offsets` and `row_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even if the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct CscMatrixSerializationData<Indices, Values> {
nrows: usize,
ncols: usize,
col_offsets: Indices,
row_indices: Indices,
values: Values,
}
impl<T> Serialize for CscMatrix<T>
where
T: Serialize + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
CscMatrixSerializationData::<&[usize], &[T]> {
nrows: self.nrows(),
ncols: self.ncols(),
col_offsets: self.col_offsets(),
row_indices: self.row_indices(),
values: self.values(),
}
.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for CscMatrix<T>
where
T: Deserialize<'de> + Clone,
{
fn deserialize<D>(deserializer: D) -> Result<CscMatrix<T>, D::Error>
where
D: Deserializer<'de>,
{
let de = CscMatrixSerializationData::<Vec<usize>, Vec<T>>::deserialize(deserializer)?;
CscMatrix::try_from_csc_data(
de.nrows,
de.ncols,
de.col_offsets,
de.row_indices,
de.values,
)
.map_err(|e| de::Error::custom(e))
}
}

View File

@ -2,6 +2,10 @@
//!
//! This is the module-level documentation. See [`CsrMatrix`] for the main documentation of the
//! CSC implementation.
#[cfg(feature = "serde-serialize")]
mod csr_serde;
use crate::cs::{CsLane, CsLaneIter, CsLaneIterMut, CsLaneMut, CsMatrix};
use crate::csc::CscMatrix;
use crate::pattern::{SparsityPattern, SparsityPatternFormatError, SparsityPatternIter};
@ -591,75 +595,6 @@ impl<T> CsrMatrix<T> {
}
}
#[cfg(feature = "serde-serialize")]
mod serde_serialize {
use super::CsrMatrix;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `CsrMatrix`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `row_offsets` and `cal_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even if the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct CsrMatrixSerializationData<Indices, Values> {
nrows: usize,
ncols: usize,
row_offsets: Indices,
col_indices: Indices,
values: Values,
}
impl<T> Serialize for CsrMatrix<T>
where
T: Serialize + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
CsrMatrixSerializationData::<&[usize], &[T]> {
nrows: self.nrows(),
ncols: self.ncols(),
row_offsets: self.row_offsets(),
col_indices: self.col_indices(),
values: self.values(),
}
.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for CsrMatrix<T>
where
T: Deserialize<'de> + Clone,
{
fn deserialize<D>(deserializer: D) -> Result<CsrMatrix<T>, D::Error>
where
D: Deserializer<'de>,
{
let de = CsrMatrixSerializationData::<Vec<usize>, Vec<T>>::deserialize(deserializer)?;
CsrMatrix::try_from_csr_data(
de.nrows,
de.ncols,
de.row_offsets,
de.col_indices,
de.values,
)
.map_err(|e| de::Error::custom(e))
}
}
}
/// Convert pattern format errors into more meaningful CSR-specific errors.
///
/// This ensures that the terminology is consistent: we are talking about rows and columns,

View File

@ -0,0 +1,65 @@
use crate::CsrMatrix;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `CsrMatrix`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `row_offsets` and `cal_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even if the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct CsrMatrixSerializationData<Indices, Values> {
nrows: usize,
ncols: usize,
row_offsets: Indices,
col_indices: Indices,
values: Values,
}
impl<T> Serialize for CsrMatrix<T>
where
T: Serialize + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
CsrMatrixSerializationData::<&[usize], &[T]> {
nrows: self.nrows(),
ncols: self.ncols(),
row_offsets: self.row_offsets(),
col_indices: self.col_indices(),
values: self.values(),
}
.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for CsrMatrix<T>
where
T: Deserialize<'de> + Clone,
{
fn deserialize<D>(deserializer: D) -> Result<CsrMatrix<T>, D::Error>
where
D: Deserializer<'de>,
{
let de = CsrMatrixSerializationData::<Vec<usize>, Vec<T>>::deserialize(deserializer)?;
CsrMatrix::try_from_csr_data(
de.nrows,
de.ncols,
de.row_offsets,
de.col_indices,
de.values,
)
.map_err(|e| de::Error::custom(e))
}
}

View File

@ -1,4 +1,8 @@
//! Sparsity patterns for CSR and CSC matrices.
#[cfg(feature = "serde-serialize")]
mod pattern_serde;
use crate::cs::transpose_cs;
use crate::SparseFormatError;
use std::error::Error;
@ -289,66 +293,6 @@ pub enum SparsityPatternFormatError {
NonmonotonicMinorIndices,
}
#[cfg(feature = "serde-serialize")]
mod serde_serialize {
use super::SparsityPattern;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `SparsityPattern`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `major_offsets` and `minor_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even when the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct SparsityPatternSerializationData<Indices> {
major_dim: usize,
minor_dim: usize,
major_offsets: Indices,
minor_indices: Indices,
}
impl Serialize for SparsityPattern {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
SparsityPatternSerializationData::<&[usize]> {
major_dim: self.major_dim(),
minor_dim: self.minor_dim(),
major_offsets: self.major_offsets(),
minor_indices: self.minor_indices(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for SparsityPattern {
fn deserialize<D>(deserializer: D) -> Result<SparsityPattern, D::Error>
where
D: Deserializer<'de>,
{
let de = SparsityPatternSerializationData::<Vec<usize>>::deserialize(deserializer)?;
SparsityPattern::try_from_offsets_and_indices(
de.major_dim,
de.minor_dim,
de.major_offsets,
de.minor_indices,
)
.map_err(|e| de::Error::custom(e))
}
}
}
impl From<SparsityPatternFormatError> for SparseFormatError {
fn from(err: SparsityPatternFormatError) -> Self {
use crate::SparseFormatErrorKind;

View File

@ -0,0 +1,56 @@
use crate::pattern::SparsityPattern;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// This is an intermediate type for (de)serializing `SparsityPattern`.
///
/// Deserialization requires using a `try_from_*` function for validation. We could have used
/// the `remote = "Self"` trick (https://github.com/serde-rs/serde/issues/1220) which allows
/// to directly serialize/deserialize the original fields and combine it with validation.
/// However, this would lead to nested serialization of the `CsMatrix` and `SparsityPattern`
/// types. Instead, we decided that we want a more human-readable serialization format using
/// field names like `major_offsets` and `minor_indices`. The easiest way to achieve this is to
/// introduce an intermediate type. It also allows the serialization format to stay constant
/// even when the internal layout in `nalgebra` changes.
///
/// We want to avoid unnecessary copies when serializing (i.e. cloning slices into owned
/// storage). Therefore, we use generic arguments to allow using slices during serialization and
/// owned storage (i.e. `Vec`) during deserialization. Without a major update of serde, slices
/// and `Vec`s should always (de)serialize identically.
#[derive(Serialize, Deserialize)]
struct SparsityPatternSerializationData<Indices> {
major_dim: usize,
minor_dim: usize,
major_offsets: Indices,
minor_indices: Indices,
}
impl Serialize for SparsityPattern {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
SparsityPatternSerializationData::<&[usize]> {
major_dim: self.major_dim(),
minor_dim: self.minor_dim(),
major_offsets: self.major_offsets(),
minor_indices: self.minor_indices(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for SparsityPattern {
fn deserialize<D>(deserializer: D) -> Result<SparsityPattern, D::Error>
where
D: Deserializer<'de>,
{
let de = SparsityPatternSerializationData::<Vec<usize>>::deserialize(deserializer)?;
SparsityPattern::try_from_offsets_and_indices(
de.major_dim,
de.minor_dim,
de.major_offsets,
de.minor_indices,
)
.map_err(|e| de::Error::custom(e))
}
}