forked from M-Labs/nalgebra
put back checked kernels and refactor upper layer
This commit is contained in:
parent
6d26f4f32c
commit
c6f832d1d8
@ -1,5 +1,3 @@
|
|||||||
//use std::collections::HashSet;
|
|
||||||
|
|
||||||
use crate::cs::CsMatrix;
|
use crate::cs::CsMatrix;
|
||||||
use crate::ops::serial::{OperationError, OperationErrorKind};
|
use crate::ops::serial::{OperationError, OperationErrorKind};
|
||||||
use crate::ops::Op;
|
use crate::ops::Op;
|
||||||
@ -7,12 +5,12 @@ use crate::SparseEntryMut;
|
|||||||
use nalgebra::{ClosedAdd, ClosedMul, DMatrixSlice, DMatrixSliceMut, Scalar};
|
use nalgebra::{ClosedAdd, ClosedMul, DMatrixSlice, DMatrixSliceMut, Scalar};
|
||||||
use num_traits::{One, Zero};
|
use num_traits::{One, Zero};
|
||||||
|
|
||||||
//fn spmm_cs_unexpected_entry() -> OperationError {
|
fn spmm_cs_unexpected_entry() -> OperationError {
|
||||||
// OperationError::from_kind_and_message(
|
OperationError::from_kind_and_message(
|
||||||
// OperationErrorKind::InvalidPattern,
|
OperationErrorKind::InvalidPattern,
|
||||||
// String::from("Found unexpected entry that is not present in `c`."),
|
String::from("Found unexpected entry that is not present in `c`."),
|
||||||
// )
|
)
|
||||||
//}
|
}
|
||||||
|
|
||||||
/// Helper functionality for implementing CSR/CSC SPMM.
|
/// Helper functionality for implementing CSR/CSC SPMM.
|
||||||
///
|
///
|
||||||
@ -22,7 +20,7 @@ use num_traits::{One, Zero};
|
|||||||
/// reversed (since transpose(AB) = transpose(B) * transpose(A) and CSC(A) = transpose(CSR(A)).
|
/// reversed (since transpose(AB) = transpose(B) * transpose(A) and CSC(A) = transpose(CSR(A)).
|
||||||
///
|
///
|
||||||
/// We assume here that the matrices have already been verified to be dimensionally compatible.
|
/// We assume here that the matrices have already been verified to be dimensionally compatible.
|
||||||
pub fn spmm_cs_prealloc<T>(
|
pub fn spmm_cs_prealloc_unchecked<T>(
|
||||||
beta: T,
|
beta: T,
|
||||||
c: &mut CsMatrix<T>,
|
c: &mut CsMatrix<T>,
|
||||||
alpha: T,
|
alpha: T,
|
||||||
@ -43,8 +41,10 @@ where
|
|||||||
let b_lane_k = b.get_lane(k).unwrap();
|
let b_lane_k = b.get_lane(k).unwrap();
|
||||||
let alpha_aik = alpha.clone() * a_ik.clone();
|
let alpha_aik = alpha.clone() * a_ik.clone();
|
||||||
for (j, b_kj) in b_lane_k.minor_indices().iter().zip(b_lane_k.values()) {
|
for (j, b_kj) in b_lane_k.minor_indices().iter().zip(b_lane_k.values()) {
|
||||||
// Determine the location in C to append the value
|
// use a dense scatter vector to accumulate non-zeros quickly
|
||||||
scratchpad_values[*j] += alpha_aik.clone() * b_kj.clone();
|
unsafe {
|
||||||
|
*scratchpad_values.get_unchecked_mut(*j) += alpha_aik.clone() * b_kj.clone();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,15 +53,55 @@ where
|
|||||||
values
|
values
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.zip(indices)
|
.zip(indices)
|
||||||
.for_each(|(output_ref, index)| {
|
.for_each(|(output_ref, index)| unsafe {
|
||||||
*output_ref = beta.clone() * output_ref.clone() + scratchpad_values[*index].clone();
|
*output_ref = beta.clone() * output_ref.clone()
|
||||||
scratchpad_values[*index] = Zero::zero();
|
+ scratchpad_values.get_unchecked(*index).clone();
|
||||||
|
*scratchpad_values.get_unchecked_mut(*index) = Zero::zero();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn spmm_cs_prealloc_checked<T>(
|
||||||
|
beta: T,
|
||||||
|
c: &mut CsMatrix<T>,
|
||||||
|
alpha: T,
|
||||||
|
a: &CsMatrix<T>,
|
||||||
|
b: &CsMatrix<T>,
|
||||||
|
) -> Result<(), OperationError>
|
||||||
|
where
|
||||||
|
T: Scalar + ClosedAdd + ClosedMul + Zero + One,
|
||||||
|
{
|
||||||
|
for i in 0..c.pattern().major_dim() {
|
||||||
|
let a_lane_i = a.get_lane(i).unwrap();
|
||||||
|
let mut c_lane_i = c.get_lane_mut(i).unwrap();
|
||||||
|
for c_ij in c_lane_i.values_mut() {
|
||||||
|
*c_ij = beta.clone() * c_ij.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (&k, a_ik) in a_lane_i.minor_indices().iter().zip(a_lane_i.values()) {
|
||||||
|
let b_lane_k = b.get_lane(k).unwrap();
|
||||||
|
let (mut c_lane_i_cols, mut c_lane_i_values) = c_lane_i.indices_and_values_mut();
|
||||||
|
let alpha_aik = alpha.clone() * a_ik.clone();
|
||||||
|
for (j, b_kj) in b_lane_k.minor_indices().iter().zip(b_lane_k.values()) {
|
||||||
|
// Determine the location in C to append the value
|
||||||
|
let (c_local_idx, _) = c_lane_i_cols
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.find(|(_, c_col)| *c_col == j)
|
||||||
|
.ok_or_else(spmm_cs_unexpected_entry)?;
|
||||||
|
|
||||||
|
c_lane_i_values[c_local_idx] += alpha_aik.clone() * b_kj.clone();
|
||||||
|
c_lane_i_cols = &c_lane_i_cols[c_local_idx..];
|
||||||
|
c_lane_i_values = &mut c_lane_i_values[c_local_idx..];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn spadd_cs_unexpected_entry() -> OperationError {
|
fn spadd_cs_unexpected_entry() -> OperationError {
|
||||||
OperationError::from_kind_and_message(
|
OperationError::from_kind_and_message(
|
||||||
OperationErrorKind::InvalidPattern,
|
OperationErrorKind::InvalidPattern,
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use crate::csc::CscMatrix;
|
use crate::csc::CscMatrix;
|
||||||
use crate::ops::serial::cs::{spadd_cs_prealloc, spmm_cs_dense, spmm_cs_prealloc};
|
use crate::ops::serial::cs::{
|
||||||
|
spadd_cs_prealloc, spmm_cs_dense, spmm_cs_prealloc_checked, spmm_cs_prealloc_unchecked,
|
||||||
|
};
|
||||||
use crate::ops::serial::{OperationError, OperationErrorKind};
|
use crate::ops::serial::{OperationError, OperationErrorKind};
|
||||||
use crate::ops::Op;
|
use crate::ops::Op;
|
||||||
use nalgebra::{ClosedAdd, ClosedMul, DMatrixSlice, DMatrixSliceMut, RealField, Scalar};
|
use nalgebra::{ClosedAdd, ClosedMul, DMatrixSlice, DMatrixSliceMut, RealField, Scalar};
|
||||||
@ -71,7 +73,7 @@ where
|
|||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Panics if the dimensions of the matrices involved are not compatible with the expression.
|
/// Panics if the dimensions of the matrices involved are not compatible with the expression.
|
||||||
pub fn spmm_csc_prealloc<T>(
|
pub fn spmm_csc_prealloc_checked<T>(
|
||||||
beta: T,
|
beta: T,
|
||||||
c: &mut CscMatrix<T>,
|
c: &mut CscMatrix<T>,
|
||||||
alpha: T,
|
alpha: T,
|
||||||
@ -83,14 +85,65 @@ where
|
|||||||
{
|
{
|
||||||
assert_compatible_spmm_dims!(c, a, b);
|
assert_compatible_spmm_dims!(c, a, b);
|
||||||
|
|
||||||
use Op::{NoOp, Transpose};
|
use Op::NoOp;
|
||||||
|
|
||||||
match (&a, &b) {
|
match (&a, &b) {
|
||||||
(NoOp(ref a), NoOp(ref b)) => {
|
(NoOp(ref a), NoOp(ref b)) => {
|
||||||
// Note: We have to reverse the order for CSC matrices
|
// Note: We have to reverse the order for CSC matrices
|
||||||
spmm_cs_prealloc(beta, &mut c.cs, alpha, &b.cs, &a.cs)
|
spmm_cs_prealloc_checked(beta, &mut c.cs, alpha, &b.cs, &a.cs)
|
||||||
}
|
}
|
||||||
_ => {
|
_ => do_transposes(beta, c, alpha, a, b, spmm_csc_prealloc_checked),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Faster sparse-sparse matrix multiplication, `C <- beta * C + alpha * op(A) * op(B)`.
|
||||||
|
/// This will not return an error even if the patterns don't match.
|
||||||
|
/// Should be used for situations where pattern creation immediately preceeds multiplication.
|
||||||
|
///
|
||||||
|
/// Panics if the dimensions of the matrices involved are not compatible with the expression.
|
||||||
|
pub(crate) fn spmm_csc_prealloc_unchecked<T>(
|
||||||
|
beta: T,
|
||||||
|
c: &mut CscMatrix<T>,
|
||||||
|
alpha: T,
|
||||||
|
a: Op<&CscMatrix<T>>,
|
||||||
|
b: Op<&CscMatrix<T>>,
|
||||||
|
) -> Result<(), OperationError>
|
||||||
|
where
|
||||||
|
T: Scalar + ClosedAdd + ClosedMul + Zero + One,
|
||||||
|
{
|
||||||
|
assert_compatible_spmm_dims!(c, a, b);
|
||||||
|
|
||||||
|
use Op::NoOp;
|
||||||
|
|
||||||
|
match (&a, &b) {
|
||||||
|
(NoOp(ref a), NoOp(ref b)) => {
|
||||||
|
// Note: We have to reverse the order for CSC matrices
|
||||||
|
spmm_cs_prealloc_unchecked(beta, &mut c.cs, alpha, &b.cs, &a.cs)
|
||||||
|
}
|
||||||
|
_ => do_transposes(beta, c, alpha, a, b, spmm_csc_prealloc_unchecked),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_transposes<T, F>(
|
||||||
|
beta: T,
|
||||||
|
c: &mut CscMatrix<T>,
|
||||||
|
alpha: T,
|
||||||
|
a: Op<&CscMatrix<T>>,
|
||||||
|
b: Op<&CscMatrix<T>>,
|
||||||
|
caller: F,
|
||||||
|
) -> Result<(), OperationError>
|
||||||
|
where
|
||||||
|
T: Scalar + ClosedAdd + ClosedMul + Zero + One,
|
||||||
|
F: Fn(
|
||||||
|
T,
|
||||||
|
&mut CscMatrix<T>,
|
||||||
|
T,
|
||||||
|
Op<&CscMatrix<T>>,
|
||||||
|
Op<&CscMatrix<T>>,
|
||||||
|
) -> Result<(), OperationError>,
|
||||||
|
{
|
||||||
|
use Op::{NoOp, Transpose};
|
||||||
|
|
||||||
// Currently we handle transposition by explicitly precomputing transposed matrices
|
// Currently we handle transposition by explicitly precomputing transposed matrices
|
||||||
// and calling the operation again without transposition
|
// and calling the operation again without transposition
|
||||||
let a_ref: &CscMatrix<T> = a.inner_ref();
|
let a_ref: &CscMatrix<T> = a.inner_ref();
|
||||||
@ -101,15 +154,10 @@ where
|
|||||||
(NoOp(_), NoOp(_)) => unreachable!(),
|
(NoOp(_), NoOp(_)) => unreachable!(),
|
||||||
(Transpose(ref a), NoOp(_)) => (Owned(a.transpose()), Borrowed(b_ref)),
|
(Transpose(ref a), NoOp(_)) => (Owned(a.transpose()), Borrowed(b_ref)),
|
||||||
(NoOp(_), Transpose(ref b)) => (Borrowed(a_ref), Owned(b.transpose())),
|
(NoOp(_), Transpose(ref b)) => (Borrowed(a_ref), Owned(b.transpose())),
|
||||||
(Transpose(ref a), Transpose(ref b)) => {
|
(Transpose(ref a), Transpose(ref b)) => (Owned(a.transpose()), Owned(b.transpose())),
|
||||||
(Owned(a.transpose()), Owned(b.transpose()))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
caller(beta, c, alpha, NoOp(a.as_ref()), NoOp(b.as_ref()))
|
||||||
spmm_csc_prealloc(beta, c, alpha, NoOp(a.as_ref()), NoOp(b.as_ref()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Solve the lower triangular system `op(L) X = B`.
|
/// Solve the lower triangular system `op(L) X = B`.
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use crate::csr::CsrMatrix;
|
use crate::csr::CsrMatrix;
|
||||||
use crate::ops::serial::cs::{spadd_cs_prealloc, spmm_cs_dense, spmm_cs_prealloc};
|
use crate::ops::serial::cs::{
|
||||||
|
spadd_cs_prealloc, spmm_cs_dense, spmm_cs_prealloc_checked, spmm_cs_prealloc_unchecked,
|
||||||
|
};
|
||||||
use crate::ops::serial::OperationError;
|
use crate::ops::serial::OperationError;
|
||||||
use crate::ops::Op;
|
use crate::ops::Op;
|
||||||
use nalgebra::{ClosedAdd, ClosedMul, DMatrixSlice, DMatrixSliceMut, Scalar};
|
use nalgebra::{ClosedAdd, ClosedMul, DMatrixSlice, DMatrixSliceMut, Scalar};
|
||||||
@ -65,7 +67,7 @@ where
|
|||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Panics if the dimensions of the matrices involved are not compatible with the expression.
|
/// Panics if the dimensions of the matrices involved are not compatible with the expression.
|
||||||
pub fn spmm_csr_prealloc<T>(
|
pub fn spmm_csr_prealloc_checked<T>(
|
||||||
beta: T,
|
beta: T,
|
||||||
c: &mut CsrMatrix<T>,
|
c: &mut CsrMatrix<T>,
|
||||||
alpha: T,
|
alpha: T,
|
||||||
@ -77,15 +79,65 @@ where
|
|||||||
{
|
{
|
||||||
assert_compatible_spmm_dims!(c, a, b);
|
assert_compatible_spmm_dims!(c, a, b);
|
||||||
|
|
||||||
use Op::{NoOp, Transpose};
|
use Op::NoOp;
|
||||||
|
|
||||||
match (&a, &b) {
|
match (&a, &b) {
|
||||||
(NoOp(ref a), NoOp(ref b)) => spmm_cs_prealloc(beta, &mut c.cs, alpha, &a.cs, &b.cs),
|
(NoOp(ref a), NoOp(ref b)) => {
|
||||||
_ => {
|
spmm_cs_prealloc_checked(beta, &mut c.cs, alpha, &a.cs, &b.cs)
|
||||||
|
}
|
||||||
|
_ => do_transposes(beta, c, alpha, a, b, spmm_csr_prealloc_checked),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Faster sparse-sparse matrix multiplication, `C <- beta * C + alpha * op(A) * op(B)`.
|
||||||
|
/// This will not return an error even if the patterns don't match.
|
||||||
|
/// Should be used for situations where pattern creation immediately preceeds multiplication.
|
||||||
|
///
|
||||||
|
/// Panics if the dimensions of the matrices involved are not compatible with the expression.
|
||||||
|
pub(crate) fn spmm_csr_prealloc_unchecked<T>(
|
||||||
|
beta: T,
|
||||||
|
c: &mut CsrMatrix<T>,
|
||||||
|
alpha: T,
|
||||||
|
a: Op<&CsrMatrix<T>>,
|
||||||
|
b: Op<&CsrMatrix<T>>,
|
||||||
|
) -> Result<(), OperationError>
|
||||||
|
where
|
||||||
|
T: Scalar + ClosedAdd + ClosedMul + Zero + One,
|
||||||
|
{
|
||||||
|
assert_compatible_spmm_dims!(c, a, b);
|
||||||
|
|
||||||
|
use Op::NoOp;
|
||||||
|
|
||||||
|
match (&a, &b) {
|
||||||
|
(NoOp(ref a), NoOp(ref b)) => {
|
||||||
|
spmm_cs_prealloc_unchecked(beta, &mut c.cs, alpha, &a.cs, &b.cs)
|
||||||
|
}
|
||||||
|
_ => do_transposes(beta, c, alpha, a, b, spmm_csr_prealloc_unchecked),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_transposes<T, F>(
|
||||||
|
beta: T,
|
||||||
|
c: &mut CsrMatrix<T>,
|
||||||
|
alpha: T,
|
||||||
|
a: Op<&CsrMatrix<T>>,
|
||||||
|
b: Op<&CsrMatrix<T>>,
|
||||||
|
caller: F,
|
||||||
|
) -> Result<(), OperationError>
|
||||||
|
where
|
||||||
|
T: Scalar + ClosedAdd + ClosedMul + Zero + One,
|
||||||
|
F: Fn(
|
||||||
|
T,
|
||||||
|
&mut CsrMatrix<T>,
|
||||||
|
T,
|
||||||
|
Op<&CsrMatrix<T>>,
|
||||||
|
Op<&CsrMatrix<T>>,
|
||||||
|
) -> Result<(), OperationError>,
|
||||||
|
{
|
||||||
|
use Op::{NoOp, Transpose};
|
||||||
|
|
||||||
// Currently we handle transposition by explicitly precomputing transposed matrices
|
// Currently we handle transposition by explicitly precomputing transposed matrices
|
||||||
// and calling the operation again without transposition
|
// and calling the operation again without transposition
|
||||||
// TODO: At least use workspaces to allow control of allocations. Maybe
|
|
||||||
// consider implementing certain patterns (like A^T * B) explicitly
|
|
||||||
let a_ref: &CsrMatrix<T> = a.inner_ref();
|
let a_ref: &CsrMatrix<T> = a.inner_ref();
|
||||||
let b_ref: &CsrMatrix<T> = b.inner_ref();
|
let b_ref: &CsrMatrix<T> = b.inner_ref();
|
||||||
let (a, b) = {
|
let (a, b) = {
|
||||||
@ -94,13 +146,8 @@ where
|
|||||||
(NoOp(_), NoOp(_)) => unreachable!(),
|
(NoOp(_), NoOp(_)) => unreachable!(),
|
||||||
(Transpose(ref a), NoOp(_)) => (Owned(a.transpose()), Borrowed(b_ref)),
|
(Transpose(ref a), NoOp(_)) => (Owned(a.transpose()), Borrowed(b_ref)),
|
||||||
(NoOp(_), Transpose(ref b)) => (Borrowed(a_ref), Owned(b.transpose())),
|
(NoOp(_), Transpose(ref b)) => (Borrowed(a_ref), Owned(b.transpose())),
|
||||||
(Transpose(ref a), Transpose(ref b)) => {
|
(Transpose(ref a), Transpose(ref b)) => (Owned(a.transpose()), Owned(b.transpose())),
|
||||||
(Owned(a.transpose()), Owned(b.transpose()))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
caller(beta, c, alpha, NoOp(a.as_ref()), NoOp(b.as_ref()))
|
||||||
spmm_csr_prealloc(beta, c, alpha, NoOp(a.as_ref()), NoOp(b.as_ref()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user