Cholesky: add unchecked construction compatible with AoSoA SIMD.

This commit is contained in:
Sébastien Crozet 2020-05-23 15:13:13 +02:00 committed by sebcrozet
parent 423b4b27b0
commit 3359e25435
2 changed files with 398 additions and 30 deletions

View File

@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
use num::One; use num::One;
use simba::scalar::ComplexField; use simba::scalar::ComplexField;
use simba::simd::SimdComplexField;
use crate::allocator::Allocator; use crate::allocator::Allocator;
use crate::base::{DefaultAllocator, Matrix, MatrixMN, MatrixN, SquareMatrix, Vector}; use crate::base::{DefaultAllocator, Matrix, MatrixMN, MatrixN, SquareMatrix, Vector};
@ -23,29 +24,26 @@ use crate::storage::{Storage, StorageMut};
MatrixN<N, D>: Deserialize<'de>")) MatrixN<N, D>: Deserialize<'de>"))
)] )]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Cholesky<N: ComplexField, D: Dim> pub struct Cholesky<N: SimdComplexField, D: Dim>
where where DefaultAllocator: Allocator<N, D, D>
DefaultAllocator: Allocator<N, D, D>,
{ {
chol: MatrixN<N, D>, chol: MatrixN<N, D>,
} }
impl<N: ComplexField, D: Dim> Copy for Cholesky<N, D> impl<N: SimdComplexField, D: Dim> Copy for Cholesky<N, D>
where where
DefaultAllocator: Allocator<N, D, D>, DefaultAllocator: Allocator<N, D, D>,
MatrixN<N, D>: Copy, MatrixN<N, D>: Copy,
{ {
} }
impl<N: ComplexField, D: DimSub<Dynamic>> Cholesky<N, D> impl<N: SimdComplexField, D: Dim> Cholesky<N, D>
where where DefaultAllocator: Allocator<N, D, D>
DefaultAllocator: Allocator<N, D, D>,
{ {
/// Attempts to compute the Cholesky decomposition of `matrix`. /// Computes the Cholesky decomposition of `matrix` without checking that the matrix is definite-positive.
/// ///
/// Returns `None` if the input matrix is not definite-positive. The input matrix is assumed /// If the input matrix is not definite-positive, the decomposition may contain trash values (Inf, NaN, etc.)
/// to be symmetric and only the lower-triangular part is read. pub fn new_unchecked(mut matrix: MatrixN<N, D>) -> Self {
pub fn new(mut matrix: MatrixN<N, D>) -> Option<Self> {
assert!(matrix.is_square(), "The input matrix must be square."); assert!(matrix.is_square(), "The input matrix must be square.");
let n = matrix.nrows(); let n = matrix.nrows();
@ -57,29 +55,21 @@ where
let (mut col_j, col_k) = matrix.columns_range_pair_mut(j, k); let (mut col_j, col_k) = matrix.columns_range_pair_mut(j, k);
let mut col_j = col_j.rows_range_mut(j..); let mut col_j = col_j.rows_range_mut(j..);
let col_k = col_k.rows_range(j..); let col_k = col_k.rows_range(j..);
col_j.axpy(factor.simd_conjugate(), &col_k, N::one());
col_j.axpy(factor.conjugate(), &col_k, N::one());
} }
let diag = unsafe { *matrix.get_unchecked((j, j)) }; let diag = unsafe { *matrix.get_unchecked((j, j)) };
if !diag.is_zero() { let denom = diag.simd_sqrt();
if let Some(denom) = diag.try_sqrt() {
unsafe {
*matrix.get_unchecked_mut((j, j)) = denom;
}
let mut col = matrix.slice_range_mut(j + 1.., j); unsafe {
col /= denom; *matrix.get_unchecked_mut((j, j)) = denom;
continue;
}
} }
// The diagonal element is either zero or its square root could not let mut col = matrix.slice_range_mut(j + 1.., j);
// be taken (e.g. for negative real numbers). col /= denom;
return None;
} }
Some(Cholesky { chol: matrix }) Cholesky { chol: matrix }
} }
/// Retrieves the lower-triangular factor of the Cholesky decomposition with its strictly /// Retrieves the lower-triangular factor of the Cholesky decomposition with its strictly
@ -121,8 +111,8 @@ where
S2: StorageMut<N, R2, C2>, S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>, ShapeConstraint: SameNumberOfRows<R2, D>,
{ {
let _ = self.chol.solve_lower_triangular_mut(b); self.chol.solve_lower_triangular_unchecked_mut(b);
let _ = self.chol.ad_solve_lower_triangular_mut(b); self.chol.ad_solve_lower_triangular_unchecked_mut(b);
} }
/// Returns the solution of the system `self * x = b` where `self` is the decomposed matrix and /// Returns the solution of the system `self * x = b` where `self` is the decomposed matrix and
@ -146,6 +136,51 @@ where
self.solve_mut(&mut res); self.solve_mut(&mut res);
res res
} }
}
impl<N: ComplexField, D: Dim> Cholesky<N, D>
where DefaultAllocator: Allocator<N, D, D>
{
/// Attempts to compute the Cholesky decomposition of `matrix`.
///
/// Returns `None` if the input matrix is not definite-positive. The input matrix is assumed
/// to be symmetric and only the lower-triangular part is read.
pub fn new(mut matrix: MatrixN<N, D>) -> Option<Self> {
assert!(matrix.is_square(), "The input matrix must be square.");
let n = matrix.nrows();
for j in 0..n {
for k in 0..j {
let factor = unsafe { -*matrix.get_unchecked((j, k)) };
let (mut col_j, col_k) = matrix.columns_range_pair_mut(j, k);
let mut col_j = col_j.rows_range_mut(j..);
let col_k = col_k.rows_range(j..);
col_j.axpy(factor.conjugate(), &col_k, N::one());
}
let diag = unsafe { *matrix.get_unchecked((j, j)) };
if !diag.is_zero() {
if let Some(denom) = diag.try_sqrt() {
unsafe {
*matrix.get_unchecked_mut((j, j)) = denom;
}
let mut col = matrix.slice_range_mut(j + 1.., j);
col /= denom;
continue;
}
}
// The diagonal element is either zero or its square root could not
// be taken (e.g. for negative real numbers).
return None;
}
Some(Cholesky { chol: matrix })
}
/// Given the Cholesky decomposition of a matrix `M`, a scalar `sigma` and a vector `v`, /// Given the Cholesky decomposition of a matrix `M`, a scalar `sigma` and a vector `v`,
/// performs a rank one update such that we end up with the decomposition of `M + sigma * (v * v.adjoint())`. /// performs a rank one update such that we end up with the decomposition of `M + sigma * (v * v.adjoint())`.
@ -327,8 +362,7 @@ where
} }
impl<N: ComplexField, D: DimSub<Dynamic>, S: Storage<N, D, D>> SquareMatrix<N, D, S> impl<N: ComplexField, D: DimSub<Dynamic>, S: Storage<N, D, D>> SquareMatrix<N, D, S>
where where DefaultAllocator: Allocator<N, D, D>
DefaultAllocator: Allocator<N, D, D>,
{ {
/// Attempts to compute the Cholesky decomposition of this matrix. /// Attempts to compute the Cholesky decomposition of this matrix.
/// ///

View File

@ -1,4 +1,5 @@
use simba::scalar::ComplexField; use simba::scalar::ComplexField;
use simba::simd::SimdComplexField;
use crate::base::allocator::Allocator; use crate::base::allocator::Allocator;
use crate::base::constraint::{SameNumberOfRows, ShapeConstraint}; use crate::base::constraint::{SameNumberOfRows, ShapeConstraint};
@ -432,3 +433,336 @@ impl<N: ComplexField, D: Dim, S: Storage<N, D, D>> SquareMatrix<N, D, S> {
true true
} }
} }
/*
*
* SIMD-compatible unchecked versions.
*
*/
impl<N: SimdComplexField, D: Dim, S: Storage<N, D, D>> SquareMatrix<N, D, S> {
/// Computes the solution of the linear system `self . x = b` where `x` is the unknown and only
/// the lower-triangular part of `self` (including the diagonal) is considered not-zero.
#[inline]
pub fn solve_lower_triangular_unchecked<R2: Dim, C2: Dim, S2>(
&self,
b: &Matrix<N, R2, C2, S2>,
) -> MatrixMN<N, R2, C2>
where
S2: Storage<N, R2, C2>,
DefaultAllocator: Allocator<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let mut res = b.clone_owned();
self.solve_lower_triangular_unchecked_mut(&mut res);
res
}
/// Computes the solution of the linear system `self . x = b` where `x` is the unknown and only
/// the upper-triangular part of `self` (including the diagonal) is considered not-zero.
#[inline]
pub fn solve_upper_triangular_unchecked<R2: Dim, C2: Dim, S2>(
&self,
b: &Matrix<N, R2, C2, S2>,
) -> MatrixMN<N, R2, C2>
where
S2: Storage<N, R2, C2>,
DefaultAllocator: Allocator<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let mut res = b.clone_owned();
self.solve_upper_triangular_unchecked_mut(&mut res);
res
}
/// Solves the linear system `self . x = b` where `x` is the unknown and only the
/// lower-triangular part of `self` (including the diagonal) is considered not-zero.
pub fn solve_lower_triangular_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..b.ncols() {
self.solve_lower_triangular_vector_unchecked_mut(&mut b.column_mut(i));
}
}
fn solve_lower_triangular_vector_unchecked_mut<R2: Dim, S2>(&self, b: &mut Vector<N, R2, S2>)
where
S2: StorageMut<N, R2, U1>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let dim = self.nrows();
for i in 0..dim {
let coeff;
unsafe {
let diag = *self.get_unchecked((i, i));
coeff = *b.vget_unchecked(i) / diag;
*b.vget_unchecked_mut(i) = coeff;
}
b.rows_range_mut(i + 1..)
.axpy(-coeff, &self.slice_range(i + 1.., i), N::one());
}
}
// FIXME: add the same but for solving upper-triangular.
/// Solves the linear system `self . x = b` where `x` is the unknown and only the
/// lower-triangular part of `self` is considered not-zero. The diagonal is never read as it is
/// assumed to be equal to `diag`. Returns `false` and does not modify its inputs if `diag` is zero.
pub fn solve_lower_triangular_with_diag_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
diag: N,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let dim = self.nrows();
let cols = b.ncols();
for k in 0..cols {
let mut bcol = b.column_mut(k);
for i in 0..dim - 1 {
let coeff = unsafe { *bcol.vget_unchecked(i) } / diag;
bcol.rows_range_mut(i + 1..)
.axpy(-coeff, &self.slice_range(i + 1.., i), N::one());
}
}
}
/// Solves the linear system `self . x = b` where `x` is the unknown and only the
/// upper-triangular part of `self` (including the diagonal) is considered not-zero.
pub fn solve_upper_triangular_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..b.ncols() {
self.solve_upper_triangular_vector_unchecked_mut(&mut b.column_mut(i))
}
}
fn solve_upper_triangular_vector_unchecked_mut<R2: Dim, S2>(&self, b: &mut Vector<N, R2, S2>)
where
S2: StorageMut<N, R2, U1>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let dim = self.nrows();
for i in (0..dim).rev() {
let coeff;
unsafe {
let diag = *self.get_unchecked((i, i));
coeff = *b.vget_unchecked(i) / diag;
*b.vget_unchecked_mut(i) = coeff;
}
b.rows_range_mut(..i)
.axpy(-coeff, &self.slice_range(..i, i), N::one());
}
}
/*
*
* Transpose and adjoint versions
*
*/
/// Computes the solution of the linear system `self.transpose() . x = b` where `x` is the unknown and only
/// the lower-triangular part of `self` (including the diagonal) is considered not-zero.
#[inline]
pub fn tr_solve_lower_triangular_unchecked<R2: Dim, C2: Dim, S2>(
&self,
b: &Matrix<N, R2, C2, S2>,
) -> MatrixMN<N, R2, C2>
where
S2: Storage<N, R2, C2>,
DefaultAllocator: Allocator<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let mut res = b.clone_owned();
self.tr_solve_lower_triangular_unchecked_mut(&mut res);
res
}
/// Computes the solution of the linear system `self.transpose() . x = b` where `x` is the unknown and only
/// the upper-triangular part of `self` (including the diagonal) is considered not-zero.
#[inline]
pub fn tr_solve_upper_triangular_unchecked<R2: Dim, C2: Dim, S2>(
&self,
b: &Matrix<N, R2, C2, S2>,
) -> MatrixMN<N, R2, C2>
where
S2: Storage<N, R2, C2>,
DefaultAllocator: Allocator<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let mut res = b.clone_owned();
self.tr_solve_upper_triangular_unchecked_mut(&mut res);
res
}
/// Solves the linear system `self.transpose() . x = b` where `x` is the unknown and only the
/// lower-triangular part of `self` (including the diagonal) is considered not-zero.
pub fn tr_solve_lower_triangular_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..b.ncols() {
self.xx_solve_lower_triangular_vector_unchecked_mut(
&mut b.column_mut(i),
|e| e,
|a, b| a.dot(b),
)
}
}
/// Solves the linear system `self.transpose() . x = b` where `x` is the unknown and only the
/// upper-triangular part of `self` (including the diagonal) is considered not-zero.
pub fn tr_solve_upper_triangular_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..b.ncols() {
self.xx_solve_upper_triangular_vector_unchecked_mut(
&mut b.column_mut(i),
|e| e,
|a, b| a.dot(b),
)
}
}
/// Computes the solution of the linear system `self.adjoint() . x = b` where `x` is the unknown and only
/// the lower-triangular part of `self` (including the diagonal) is considered not-zero.
#[inline]
pub fn ad_solve_lower_triangular_unchecked<R2: Dim, C2: Dim, S2>(
&self,
b: &Matrix<N, R2, C2, S2>,
) -> MatrixMN<N, R2, C2>
where
S2: Storage<N, R2, C2>,
DefaultAllocator: Allocator<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let mut res = b.clone_owned();
self.ad_solve_lower_triangular_unchecked_mut(&mut res);
res
}
/// Computes the solution of the linear system `self.adjoint() . x = b` where `x` is the unknown and only
/// the upper-triangular part of `self` (including the diagonal) is considered not-zero.
#[inline]
pub fn ad_solve_upper_triangular_unchecked<R2: Dim, C2: Dim, S2>(
&self,
b: &Matrix<N, R2, C2, S2>,
) -> MatrixMN<N, R2, C2>
where
S2: Storage<N, R2, C2>,
DefaultAllocator: Allocator<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let mut res = b.clone_owned();
self.ad_solve_upper_triangular_unchecked_mut(&mut res);
res
}
/// Solves the linear system `self.adjoint() . x = b` where `x` is the unknown and only the
/// lower-triangular part of `self` (including the diagonal) is considered not-zero.
pub fn ad_solve_lower_triangular_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..b.ncols() {
self.xx_solve_lower_triangular_vector_unchecked_mut(
&mut b.column_mut(i),
|e| e.simd_conjugate(),
|a, b| a.dotc(b),
)
}
}
/// Solves the linear system `self.adjoint() . x = b` where `x` is the unknown and only the
/// upper-triangular part of `self` (including the diagonal) is considered not-zero.
pub fn ad_solve_upper_triangular_unchecked_mut<R2: Dim, C2: Dim, S2>(
&self,
b: &mut Matrix<N, R2, C2, S2>,
) where
S2: StorageMut<N, R2, C2>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..b.ncols() {
self.xx_solve_upper_triangular_vector_unchecked_mut(
&mut b.column_mut(i),
|e| e.simd_conjugate(),
|a, b| a.dotc(b),
)
}
}
#[inline(always)]
fn xx_solve_lower_triangular_vector_unchecked_mut<R2: Dim, S2>(
&self,
b: &mut Vector<N, R2, S2>,
conjugate: impl Fn(N) -> N,
dot: impl Fn(
&DVectorSlice<N, S::RStride, S::CStride>,
&DVectorSlice<N, S2::RStride, S2::CStride>,
) -> N,
) where
S2: StorageMut<N, R2, U1>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
let dim = self.nrows();
for i in (0..dim).rev() {
let dot = dot(&self.slice_range(i + 1.., i), &b.slice_range(i + 1.., 0));
unsafe {
let b_i = b.vget_unchecked_mut(i);
let diag = conjugate(*self.get_unchecked((i, i)));
*b_i = (*b_i - dot) / diag;
}
}
}
#[inline(always)]
fn xx_solve_upper_triangular_vector_unchecked_mut<R2: Dim, S2>(
&self,
b: &mut Vector<N, R2, S2>,
conjugate: impl Fn(N) -> N,
dot: impl Fn(
&DVectorSlice<N, S::RStride, S::CStride>,
&DVectorSlice<N, S2::RStride, S2::CStride>,
) -> N,
) where
S2: StorageMut<N, R2, U1>,
ShapeConstraint: SameNumberOfRows<R2, D>,
{
for i in 0..self.nrows() {
let dot = dot(&self.slice_range(..i, i), &b.slice_range(..i, 0));
unsafe {
let b_i = b.vget_unchecked_mut(i);
let diag = conjugate(*self.get_unchecked((i, i)));
*b_i = (*b_i - dot) / diag;
}
}
}
}