From 002e735c76eeff04cdc0020c45fb5c0d7ce17953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Crozet?= Date: Tue, 17 Mar 2020 17:58:36 +0100 Subject: [PATCH] Make blas, matrix, norm, and ops.rs compatible with SoA Simd. --- src/base/blas.rs | 198 ++++++++++++++++++++++++----------- src/base/matrix.rs | 246 +++++++++++++++++++++++++++----------------- src/base/norm.rs | 249 +++++++++++++++++++++++++++++---------------- src/base/ops.rs | 96 +++++++++-------- src/lib.rs | 19 ++-- 5 files changed, 515 insertions(+), 293 deletions(-) diff --git a/src/base/blas.rs b/src/base/blas.rs index d147bbcb..5565e561 100644 --- a/src/base/blas.rs +++ b/src/base/blas.rs @@ -1,3 +1,4 @@ +use crate::SimdComplexField; use alga::general::{ClosedAdd, ClosedMul, ComplexField}; #[cfg(feature = "std")] use matrixmultiply; @@ -11,8 +12,9 @@ use crate::base::constraint::{ }; use crate::base::dimension::{Dim, Dynamic, U1, U2, U3, U4}; use crate::base::storage::{Storage, StorageMut}; -use crate::base::{DefaultAllocator, Matrix, Scalar, SquareMatrix, Vector, DVectorSlice, VectorSliceN}; - +use crate::base::{ + DVectorSlice, DefaultAllocator, Matrix, Scalar, SquareMatrix, Vector, VectorSliceN, +}; // FIXME: find a way to avoid code duplication just for complex number support. impl> Vector { @@ -102,7 +104,7 @@ impl> Vector { /// ``` #[inline] pub fn iamax(&self) -> usize - where N: Signed { + where N: Signed { assert!(!self.is_empty(), "The input vector must not be empty."); let mut the_max = unsafe { self.vget_unchecked(0).abs() }; @@ -173,7 +175,7 @@ impl> Vector { /// ``` #[inline] pub fn iamin(&self) -> usize - where N: Signed { + where N: Signed { assert!(!self.is_empty(), "The input vector must not be empty."); let mut the_min = unsafe { self.vget_unchecked(0).abs() }; @@ -229,7 +231,6 @@ impl> Matrix { } } - impl> Matrix { /// Computes the index of the matrix component with the largest absolute value. /// @@ -267,10 +268,14 @@ impl> Matrix where N: Scalar + Zero + ClosedAdd + ClosedMul { #[inline(always)] - fn dotx(&self, rhs: &Matrix, conjugate: impl Fn(N) -> N) -> N - where - SB: Storage, - ShapeConstraint: DimEq + DimEq, + fn dotx( + &self, + rhs: &Matrix, + conjugate: impl Fn(N) -> N, + ) -> N + where + SB: Storage, + ShapeConstraint: DimEq + DimEq, { assert!( self.nrows() == rhs.nrows(), @@ -281,27 +286,36 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul // because the `for` loop below won't be very efficient on those. if (R::is::() || R2::is::()) && (C::is::() || C2::is::()) { unsafe { - let a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) * rhs.get_unchecked((0, 0)).inlined_clone(); - let b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) * rhs.get_unchecked((1, 0)).inlined_clone(); + let a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) + * rhs.get_unchecked((0, 0)).inlined_clone(); + let b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) + * rhs.get_unchecked((1, 0)).inlined_clone(); return a + b; } } if (R::is::() || R2::is::()) && (C::is::() || C2::is::()) { unsafe { - let a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) * rhs.get_unchecked((0, 0)).inlined_clone(); - let b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) * rhs.get_unchecked((1, 0)).inlined_clone(); - let c = conjugate(self.get_unchecked((2, 0)).inlined_clone()) * rhs.get_unchecked((2, 0)).inlined_clone(); + let a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) + * rhs.get_unchecked((0, 0)).inlined_clone(); + let b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) + * rhs.get_unchecked((1, 0)).inlined_clone(); + let c = conjugate(self.get_unchecked((2, 0)).inlined_clone()) + * rhs.get_unchecked((2, 0)).inlined_clone(); return a + b + c; } } if (R::is::() || R2::is::()) && (C::is::() || C2::is::()) { unsafe { - let mut a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) * rhs.get_unchecked((0, 0)).inlined_clone(); - let mut b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) * rhs.get_unchecked((1, 0)).inlined_clone(); - let c = conjugate(self.get_unchecked((2, 0)).inlined_clone()) * rhs.get_unchecked((2, 0)).inlined_clone(); - let d = conjugate(self.get_unchecked((3, 0)).inlined_clone()) * rhs.get_unchecked((3, 0)).inlined_clone(); + let mut a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) + * rhs.get_unchecked((0, 0)).inlined_clone(); + let mut b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) + * rhs.get_unchecked((1, 0)).inlined_clone(); + let c = conjugate(self.get_unchecked((2, 0)).inlined_clone()) + * rhs.get_unchecked((2, 0)).inlined_clone(); + let d = conjugate(self.get_unchecked((3, 0)).inlined_clone()) + * rhs.get_unchecked((3, 0)).inlined_clone(); a += c; b += d; @@ -341,14 +355,38 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul acc7 = N::zero(); while self.nrows() - i >= 8 { - acc0 += unsafe { conjugate(self.get_unchecked((i + 0, j)).inlined_clone()) * rhs.get_unchecked((i + 0, j)).inlined_clone() }; - acc1 += unsafe { conjugate(self.get_unchecked((i + 1, j)).inlined_clone()) * rhs.get_unchecked((i + 1, j)).inlined_clone() }; - acc2 += unsafe { conjugate(self.get_unchecked((i + 2, j)).inlined_clone()) * rhs.get_unchecked((i + 2, j)).inlined_clone() }; - acc3 += unsafe { conjugate(self.get_unchecked((i + 3, j)).inlined_clone()) * rhs.get_unchecked((i + 3, j)).inlined_clone() }; - acc4 += unsafe { conjugate(self.get_unchecked((i + 4, j)).inlined_clone()) * rhs.get_unchecked((i + 4, j)).inlined_clone() }; - acc5 += unsafe { conjugate(self.get_unchecked((i + 5, j)).inlined_clone()) * rhs.get_unchecked((i + 5, j)).inlined_clone() }; - acc6 += unsafe { conjugate(self.get_unchecked((i + 6, j)).inlined_clone()) * rhs.get_unchecked((i + 6, j)).inlined_clone() }; - acc7 += unsafe { conjugate(self.get_unchecked((i + 7, j)).inlined_clone()) * rhs.get_unchecked((i + 7, j)).inlined_clone() }; + acc0 += unsafe { + conjugate(self.get_unchecked((i + 0, j)).inlined_clone()) + * rhs.get_unchecked((i + 0, j)).inlined_clone() + }; + acc1 += unsafe { + conjugate(self.get_unchecked((i + 1, j)).inlined_clone()) + * rhs.get_unchecked((i + 1, j)).inlined_clone() + }; + acc2 += unsafe { + conjugate(self.get_unchecked((i + 2, j)).inlined_clone()) + * rhs.get_unchecked((i + 2, j)).inlined_clone() + }; + acc3 += unsafe { + conjugate(self.get_unchecked((i + 3, j)).inlined_clone()) + * rhs.get_unchecked((i + 3, j)).inlined_clone() + }; + acc4 += unsafe { + conjugate(self.get_unchecked((i + 4, j)).inlined_clone()) + * rhs.get_unchecked((i + 4, j)).inlined_clone() + }; + acc5 += unsafe { + conjugate(self.get_unchecked((i + 5, j)).inlined_clone()) + * rhs.get_unchecked((i + 5, j)).inlined_clone() + }; + acc6 += unsafe { + conjugate(self.get_unchecked((i + 6, j)).inlined_clone()) + * rhs.get_unchecked((i + 6, j)).inlined_clone() + }; + acc7 += unsafe { + conjugate(self.get_unchecked((i + 7, j)).inlined_clone()) + * rhs.get_unchecked((i + 7, j)).inlined_clone() + }; i += 8; } @@ -358,14 +396,16 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul res += acc3 + acc7; for k in i..self.nrows() { - res += unsafe { conjugate(self.get_unchecked((k, j)).inlined_clone()) * rhs.get_unchecked((k, j)).inlined_clone() } + res += unsafe { + conjugate(self.get_unchecked((k, j)).inlined_clone()) + * rhs.get_unchecked((k, j)).inlined_clone() + } } } res } - /// The dot product between two vectors or matrices (seen as vectors). /// /// This is equal to `self.transpose() * rhs`. For the sesquilinear complex dot product, use @@ -419,12 +459,12 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul /// ``` #[inline] pub fn dotc(&self, rhs: &Matrix) -> N - where - N: ComplexField, - SB: Storage, - ShapeConstraint: DimEq + DimEq, + where + N: SimdComplexField, + SB: Storage, + ShapeConstraint: DimEq + DimEq, { - self.dotx(rhs, ComplexField::conjugate) + self.dotx(rhs, N::simd_conjugate) } /// The dot product between the transpose of `self` and `rhs`. @@ -460,7 +500,10 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul for j in 0..self.nrows() { for i in 0..self.ncols() { - res += unsafe { self.get_unchecked((j, i)).inlined_clone() * rhs.get_unchecked((i, j)).inlined_clone() } + res += unsafe { + self.get_unchecked((j, i)).inlined_clone() + * rhs.get_unchecked((i, j)).inlined_clone() + } } } @@ -468,12 +511,25 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul } } -fn array_axcpy(y: &mut [N], a: N, x: &[N], c: N, beta: N, stride1: usize, stride2: usize, len: usize) -where N: Scalar + Zero + ClosedAdd + ClosedMul { +fn array_axcpy( + y: &mut [N], + a: N, + x: &[N], + c: N, + beta: N, + stride1: usize, + stride2: usize, + len: usize, +) where + N: Scalar + Zero + ClosedAdd + ClosedMul, +{ for i in 0..len { unsafe { let y = y.get_unchecked_mut(i * stride1); - *y = a.inlined_clone() * x.get_unchecked(i * stride2).inlined_clone() * c.inlined_clone() + beta.inlined_clone() * y.inlined_clone(); + *y = a.inlined_clone() + * x.get_unchecked(i * stride2).inlined_clone() + * c.inlined_clone() + + beta.inlined_clone() * y.inlined_clone(); } } } @@ -482,7 +538,9 @@ fn array_axc(y: &mut [N], a: N, x: &[N], c: N, stride1: usize, stride2: usize where N: Scalar + Zero + ClosedAdd + ClosedMul { for i in 0..len { unsafe { - *y.get_unchecked_mut(i * stride1) = a.inlined_clone() * x.get_unchecked(i * stride2).inlined_clone() * c.inlined_clone(); + *y.get_unchecked_mut(i * stride1) = a.inlined_clone() + * x.get_unchecked(i * stride2).inlined_clone() + * c.inlined_clone(); } } } @@ -613,7 +671,6 @@ where } } - #[inline(always)] fn xxgemv( &mut self, @@ -621,7 +678,10 @@ where a: &SquareMatrix, x: &Vector, beta: N, - dot: impl Fn(&DVectorSlice, &DVectorSlice) -> N, + dot: impl Fn( + &DVectorSlice, + &DVectorSlice, + ) -> N, ) where N: One, SB: Storage, @@ -660,8 +720,11 @@ where val = x.vget_unchecked(j).inlined_clone(); *self.vget_unchecked_mut(j) += alpha.inlined_clone() * dot; } - self.rows_range_mut(j + 1..) - .axpy(alpha.inlined_clone() * val, &col2.rows_range(j + 1..), N::one()); + self.rows_range_mut(j + 1..).axpy( + alpha.inlined_clone() * val, + &col2.rows_range(j + 1..), + N::one(), + ); } } @@ -765,7 +828,7 @@ where x: &Vector, beta: N, ) where - N: ComplexField, + N: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, @@ -773,7 +836,6 @@ where self.xxgemv(alpha, a, x, beta, |a, b| a.dotc(b)) } - #[inline(always)] fn gemv_xx( &mut self, @@ -809,12 +871,12 @@ where } else { for j in 0..ncols2 { let val = unsafe { self.vget_unchecked_mut(j) }; - *val = alpha.inlined_clone() * dot(&a.column(j), x) + beta.inlined_clone() * val.inlined_clone(); + *val = alpha.inlined_clone() * dot(&a.column(j), x) + + beta.inlined_clone() * val.inlined_clone(); } } } - /// Computes `self = alpha * a.transpose() * x + beta * self`, where `a` is a matrix, `x` a vector, and /// `alpha, beta` two scalars. /// @@ -876,7 +938,7 @@ where x: &Vector, beta: N, ) where - N: ComplexField, + N: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, @@ -914,7 +976,8 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul for j in 0..ncols1 { // FIXME: avoid bound checks. let val = unsafe { conjugate(y.vget_unchecked(j).inlined_clone()) }; - self.column_mut(j).axpy(alpha.inlined_clone() * val, x, beta.inlined_clone()); + self.column_mut(j) + .axpy(alpha.inlined_clone() * val, x, beta.inlined_clone()); } } @@ -975,12 +1038,12 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul y: &Vector, beta: N, ) where - N: ComplexField, + N: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { - self.gerx(alpha, x, y, beta, ComplexField::conjugate) + self.gerx(alpha, x, y, beta, SimdComplexField::simd_conjugate) } /// Computes `self = alpha * a * b + beta * self`, where `a, b, self` are matrices. @@ -1032,7 +1095,8 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul || R2::is::() || C2::is::() || R3::is::() - || C3::is::() { + || C3::is::() + { // matrixmultiply can be used only if the std feature is available. let nrows1 = self.nrows(); let (nrows2, ncols2) = a.shape(); @@ -1125,10 +1189,14 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul } } - for j1 in 0..ncols1 { // FIXME: avoid bound checks. - self.column_mut(j1).gemv(alpha.inlined_clone(), a, &b.column(j1), beta.inlined_clone()); + self.column_mut(j1).gemv( + alpha.inlined_clone(), + a, + &b.column(j1), + beta.inlined_clone(), + ); } } @@ -1185,11 +1253,15 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul for j1 in 0..ncols1 { // FIXME: avoid bound checks. - self.column_mut(j1).gemv_tr(alpha.inlined_clone(), a, &b.column(j1), beta.inlined_clone()); + self.column_mut(j1).gemv_tr( + alpha.inlined_clone(), + a, + &b.column(j1), + beta.inlined_clone(), + ); } } - /// Computes `self = alpha * a.adjoint() * b + beta * self`, where `a, b, self` are matrices. /// `alpha` and `beta` are scalar. /// @@ -1220,12 +1292,12 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul b: &Matrix, beta: N, ) where - N: ComplexField, + N: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: SameNumberOfRows - + SameNumberOfColumns - + AreMultipliable, + + SameNumberOfColumns + + AreMultipliable, { let (nrows1, ncols1) = self.shape(); let (nrows2, ncols2) = a.shape(); @@ -1386,12 +1458,12 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul y: &Vector, beta: N, ) where - N: ComplexField, + N: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { - self.xxgerx(alpha, x, y, beta, ComplexField::conjugate) + self.xxgerx(alpha, x, y, beta, SimdComplexField::simd_conjugate) } } @@ -1534,11 +1606,13 @@ where N: Scalar + Zero + One + ClosedAdd + ClosedMul DimEq + DimEq + DimEq + AreMultipliable, { work.gemv(N::one(), mid, &rhs.column(0), N::zero()); - self.column_mut(0).gemv_tr(alpha.inlined_clone(), &rhs, work, beta.inlined_clone()); + self.column_mut(0) + .gemv_tr(alpha.inlined_clone(), &rhs, work, beta.inlined_clone()); for j in 1..rhs.ncols() { work.gemv(N::one(), mid, &rhs.column(j), N::zero()); - self.column_mut(j).gemv_tr(alpha.inlined_clone(), &rhs, work, beta.inlined_clone()); + self.column_mut(j) + .gemv_tr(alpha.inlined_clone(), &rhs, work, beta.inlined_clone()); } } diff --git a/src/base/matrix.rs b/src/base/matrix.rs index b1e0653c..9cd28e7b 100644 --- a/src/base/matrix.rs +++ b/src/base/matrix.rs @@ -16,16 +16,20 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[cfg(feature = "abomonation-serialize")] use abomonation::Abomonation; -use alga::general::{ClosedAdd, ClosedMul, ClosedSub, RealField, Ring, ComplexField, Field}; +use alga::general::{ClosedAdd, ClosedMul, ClosedSub, Field, RealField, Ring}; +use alga::simd::SimdPartialOrd; use crate::base::allocator::{Allocator, SameShapeAllocator, SameShapeC, SameShapeR}; use crate::base::constraint::{DimEq, SameNumberOfColumns, SameNumberOfRows, ShapeConstraint}; use crate::base::dimension::{Dim, DimAdd, DimSum, IsNotStaticOne, U1, U2, U3}; -use crate::base::iter::{MatrixIter, MatrixIterMut, RowIter, RowIterMut, ColumnIter, ColumnIterMut}; +use crate::base::iter::{ + ColumnIter, ColumnIterMut, MatrixIter, MatrixIterMut, RowIter, RowIterMut, +}; use crate::base::storage::{ ContiguousStorage, ContiguousStorageMut, Owned, SameShapeStorage, Storage, StorageMut, }; use crate::base::{DefaultAllocator, MatrixMN, MatrixN, Scalar, Unit, VectorN}; +use crate::{SimdComplexField, SimdRealField}; /// A square matrix. pub type SquareMatrix = Matrix; @@ -431,6 +435,25 @@ impl> Matrix { res } + /// Similar to `self.iter().fold(init, f)` except that `init` is replaced by a closure. + /// + /// The initialization closure is given the first component of this matrix: + /// - If the matrix has no component (0 rows or 0 columns) then `init_f` is called with `None` + /// and its return value is the value returned by this method. + /// - If the matrix has has least one component, then `init_f` is called with the first component + /// to compute the initial value. Folding then continues on all the remaining components of the matrix. + #[inline] + pub fn fold_with( + &self, + init_f: impl FnOnce(Option<&N>) -> N2, + f: impl FnMut(N2, &N) -> N2, + ) -> N2 + { + let mut it = self.iter(); + let init = init_f(it.next()); + it.fold(init, f) + } + /// Returns a matrix containing the result of `f` applied to each of its entries. Unlike `map`, /// `f` also gets passed the row and column index, i.e. `f(row, col, value)`. #[inline] @@ -553,13 +576,18 @@ impl> Matrix { /// Folds a function `f` on each pairs of entries from `self` and `rhs`. #[inline] - pub fn zip_fold(&self, rhs: &Matrix, init: Acc, mut f: impl FnMut(Acc, N, N2) -> Acc) -> Acc - where - N2: Scalar, - R2: Dim, - C2: Dim, - S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns + pub fn zip_fold( + &self, + rhs: &Matrix, + init: Acc, + mut f: impl FnMut(Acc, N, N2) -> Acc, + ) -> Acc + where + N2: Scalar, + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, { let (nrows, ncols) = self.data.shape(); @@ -718,7 +746,8 @@ impl> Matrix { for j in 0..ncols { for i in 0..nrows { unsafe { - *self.get_unchecked_mut((i, j)) = slice.get_unchecked(i + j * nrows).inlined_clone(); + *self.get_unchecked_mut((i, j)) = + slice.get_unchecked(i + j * nrows).inlined_clone(); } } } @@ -774,7 +803,7 @@ impl> Matrix { // FIXME: rename `apply` to `apply_mut` and `apply_into` to `apply`? /// Returns `self` with each of its components replaced by the result of a closure `f` applied on it. #[inline] - pub fn apply_into N>(mut self, f: F) -> Self{ + pub fn apply_into N>(mut self, f: F) -> Self { self.apply(f); self } @@ -797,12 +826,17 @@ impl> Matrix { /// Replaces each component of `self` by the result of a closure `f` applied on its components /// joined with the components from `rhs`. #[inline] - pub fn zip_apply(&mut self, rhs: &Matrix, mut f: impl FnMut(N, N2) -> N) - where N2: Scalar, - R2: Dim, - C2: Dim, - S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { + pub fn zip_apply( + &mut self, + rhs: &Matrix, + mut f: impl FnMut(N, N2) -> N, + ) where + N2: Scalar, + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { let (nrows, ncols) = self.shape(); assert!( @@ -821,21 +855,26 @@ impl> Matrix { } } - /// Replaces each component of `self` by the result of a closure `f` applied on its components /// joined with the components from `b` and `c`. #[inline] - pub fn zip_zip_apply(&mut self, b: &Matrix, c: &Matrix, mut f: impl FnMut(N, N2, N3) -> N) - where N2: Scalar, - R2: Dim, - C2: Dim, - S2: Storage, - N3: Scalar, - R3: Dim, - C3: Dim, - S3: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { + pub fn zip_zip_apply( + &mut self, + b: &Matrix, + c: &Matrix, + mut f: impl FnMut(N, N2, N3) -> N, + ) where + N2: Scalar, + R2: Dim, + C2: Dim, + S2: Storage, + N3: Scalar, + R3: Dim, + C3: Dim, + S3: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { let (nrows, ncols) = self.shape(); assert!( @@ -914,7 +953,7 @@ impl> Matrix { } } -impl> Matrix { +impl> Matrix { /// Takes the adjoint (aka. conjugate-transpose) of `self` and store the result into `out`. #[inline] pub fn adjoint_to(&self, out: &mut Matrix) @@ -934,7 +973,7 @@ impl> Matrix { for i in 0..nrows { for j in 0..ncols { unsafe { - *out.get_unchecked_mut((j, i)) = self.get_unchecked((i, j)).conjugate(); + *out.get_unchecked_mut((j, i)) = self.get_unchecked((i, j)).simd_conjugate(); } } } @@ -959,11 +998,11 @@ impl> Matrix { #[deprecated(note = "Renamed `self.adjoint_to(out)`.")] #[inline] pub fn conjugate_transpose_to(&self, out: &mut Matrix) - where - R2: Dim, - C2: Dim, - SB: StorageMut, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + where + R2: Dim, + C2: Dim, + SB: StorageMut, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, { self.adjoint_to(out) } @@ -972,7 +1011,7 @@ impl> Matrix { #[deprecated(note = "Renamed `self.adjoint()`.")] #[inline] pub fn conjugate_transpose(&self) -> MatrixMN - where DefaultAllocator: Allocator { + where DefaultAllocator: Allocator { self.adjoint() } @@ -980,48 +1019,48 @@ impl> Matrix { #[inline] #[must_use = "Did you mean to use conjugate_mut()?"] pub fn conjugate(&self) -> MatrixMN - where DefaultAllocator: Allocator { - self.map(|e| e.conjugate()) + where DefaultAllocator: Allocator { + self.map(|e| e.simd_conjugate()) } /// Divides each component of the complex matrix `self` by the given real. #[inline] #[must_use = "Did you mean to use unscale_mut()?"] - pub fn unscale(&self, real: N::RealField) -> MatrixMN - where DefaultAllocator: Allocator { - self.map(|e| e.unscale(real)) + pub fn unscale(&self, real: N::SimdRealField) -> MatrixMN + where DefaultAllocator: Allocator { + self.map(|e| e.simd_unscale(real)) } /// Multiplies each component of the complex matrix `self` by the given real. #[inline] #[must_use = "Did you mean to use scale_mut()?"] - pub fn scale(&self, real: N::RealField) -> MatrixMN - where DefaultAllocator: Allocator { - self.map(|e| e.scale(real)) + pub fn scale(&self, real: N::SimdRealField) -> MatrixMN + where DefaultAllocator: Allocator { + self.map(|e| e.simd_scale(real)) } } -impl> Matrix { +impl> Matrix { /// The conjugate of the complex matrix `self` computed in-place. #[inline] pub fn conjugate_mut(&mut self) { - self.apply(|e| e.conjugate()) + self.apply(|e| e.simd_conjugate()) } /// Divides each component of the complex matrix `self` by the given real. #[inline] - pub fn unscale_mut(&mut self, real: N::RealField) { - self.apply(|e| e.unscale(real)) + pub fn unscale_mut(&mut self, real: N::SimdRealField) { + self.apply(|e| e.simd_unscale(real)) } /// Multiplies each component of the complex matrix `self` by the given real. #[inline] - pub fn scale_mut(&mut self, real: N::RealField) { - self.apply(|e| e.scale(real)) + pub fn scale_mut(&mut self, real: N::SimdRealField) { + self.apply(|e| e.simd_scale(real)) } } -impl> Matrix { +impl> Matrix { /// Sets `self` to its adjoint. #[deprecated(note = "Renamed to `self.adjoint_mut()`.")] pub fn conjugate_transform_mut(&mut self) { @@ -1042,8 +1081,8 @@ impl> Matrix { unsafe { let ref_ij = self.get_unchecked_mut((i, j)) as *mut N; let ref_ji = self.get_unchecked_mut((j, i)) as *mut N; - let conj_ij = (*ref_ij).conjugate(); - let conj_ji = (*ref_ji).conjugate(); + let conj_ij = (*ref_ij).simd_conjugate(); + let conj_ji = (*ref_ji).simd_conjugate(); *ref_ij = conj_ji; *ref_ji = conj_ij; } @@ -1051,7 +1090,7 @@ impl> Matrix { { let diag = unsafe { self.get_unchecked_mut((i, i)) }; - *diag = diag.conjugate(); + *diag = diag.simd_conjugate(); } } } @@ -1061,7 +1100,7 @@ impl> SquareMatrix { /// The diagonal of this matrix. #[inline] pub fn diagonal(&self) -> VectorN - where DefaultAllocator: Allocator { + where DefaultAllocator: Allocator { self.map_diagonal(|e| e) } @@ -1070,7 +1109,7 @@ impl> SquareMatrix { /// This is a more efficient version of `self.diagonal().map(f)` since this /// allocates only once. pub fn map_diagonal(&self, mut f: impl FnMut(N) -> N2) -> VectorN - where DefaultAllocator: Allocator { + where DefaultAllocator: Allocator { assert!( self.is_square(), "Unable to get the diagonal of a non-square matrix." @@ -1091,7 +1130,7 @@ impl> SquareMatrix { /// Computes a trace of a square matrix, i.e., the sum of its diagonal elements. #[inline] pub fn trace(&self) -> N - where N: Ring { + where N: Ring { assert!( self.is_square(), "Cannot compute the trace of non-square matrix." @@ -1108,12 +1147,15 @@ impl> SquareMatrix { } } -impl> SquareMatrix { +impl> SquareMatrix { /// The symmetric part of `self`, i.e., `0.5 * (self + self.transpose())`. #[inline] pub fn symmetric_part(&self) -> MatrixMN - where DefaultAllocator: Allocator { - assert!(self.is_square(), "Cannot compute the symmetric part of a non-square matrix."); + where DefaultAllocator: Allocator { + assert!( + self.is_square(), + "Cannot compute the symmetric part of a non-square matrix." + ); let mut tr = self.transpose(); tr += self; tr *= crate::convert::<_, N>(0.5); @@ -1123,8 +1165,11 @@ impl> SquareMatrix { /// The hermitian part of `self`, i.e., `0.5 * (self + self.adjoint())`. #[inline] pub fn hermitian_part(&self) -> MatrixMN - where DefaultAllocator: Allocator { - assert!(self.is_square(), "Cannot compute the hermitian part of a non-square matrix."); + where DefaultAllocator: Allocator { + assert!( + self.is_square(), + "Cannot compute the hermitian part of a non-square matrix." + ); let mut tr = self.adjoint(); tr += self; @@ -1133,20 +1178,24 @@ impl> SquareMatrix { } } -impl + IsNotStaticOne, S: Storage> Matrix { - +impl + IsNotStaticOne, S: Storage> + Matrix +{ /// Yields the homogeneous matrix for this matrix, i.e., appending an additional dimension and /// and setting the diagonal element to `1`. #[inline] pub fn to_homogeneous(&self) -> MatrixN> where DefaultAllocator: Allocator, DimSum> { - assert!(self.is_square(), "Only square matrices can currently be transformed to homogeneous coordinates."); + assert!( + self.is_square(), + "Only square matrices can currently be transformed to homogeneous coordinates." + ); let dim = DimSum::::from_usize(self.nrows() + 1); let mut res = MatrixN::identity_generic(dim, dim); - res.generic_slice_mut::((0, 0), self.data.shape()).copy_from(&self); + res.generic_slice_mut::((0, 0), self.data.shape()) + .copy_from(&self); res } - } impl, S: Storage> Vector { @@ -1347,7 +1396,8 @@ impl Eq for Matrix where N: Scalar + Eq, S: Storage, -{} +{ +} impl PartialEq> for Matrix where @@ -1357,7 +1407,7 @@ where R: Dim, R2: Dim, S: Storage, - S2: Storage + S2: Storage, { #[inline] fn eq(&self, right: &Matrix) -> bool { @@ -1377,7 +1427,9 @@ macro_rules! impl_fmt { #[cfg(feature = "std")] fn val_width(val: &N, f: &mut fmt::Formatter) -> usize { match f.precision() { - Some(precision) => format!($fmt_str_with_precision, val, precision).chars().count(), + Some(precision) => format!($fmt_str_with_precision, val, precision) + .chars() + .count(), None => format!($fmt_str_without_precision, val).chars().count(), } } @@ -1421,7 +1473,9 @@ macro_rules! impl_fmt { let pad = max_length_with_space - number_length; write!(f, " {:>thepad$}", "", thepad = pad)?; match f.precision() { - Some(precision) => write!(f, $fmt_str_with_precision, (*self)[(i, j)], precision)?, + Some(precision) => { + write!(f, $fmt_str_with_precision, (*self)[(i, j)], precision)? + } None => write!(f, $fmt_str_without_precision, (*self)[(i, j)])?, } } @@ -1451,13 +1505,16 @@ impl_fmt!(fmt::Pointer, "{:p}", "{:.1$p}"); #[test] fn lower_exp() { let test = crate::Matrix2::new(1e6, 2e5, 2e-5, 1.); - assert_eq!(format!("{:e}", test), r" + assert_eq!( + format!("{:e}", test), + r" ┌ ┐ │ 1e6 2e5 │ │ 2e-5 1e0 │ └ ┘ -") +" + ) } impl> Matrix { @@ -1477,7 +1534,8 @@ impl> Matrix { unsafe { self.get_unchecked((0, 0)).inlined_clone() * b.get_unchecked((1, 0)).inlined_clone() - - self.get_unchecked((1, 0)).inlined_clone() * b.get_unchecked((0, 0)).inlined_clone() + - self.get_unchecked((1, 0)).inlined_clone() + * b.get_unchecked((0, 0)).inlined_clone() } } @@ -1520,9 +1578,12 @@ impl> Matrix { let by = b.get_unchecked((1, 0)); let bz = b.get_unchecked((2, 0)); - *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone() - az.inlined_clone() * by.inlined_clone(); - *res.get_unchecked_mut((1, 0)) = az.inlined_clone() * bx.inlined_clone() - ax.inlined_clone() * bz.inlined_clone(); - *res.get_unchecked_mut((2, 0)) = ax.inlined_clone() * by.inlined_clone() - ay.inlined_clone() * bx.inlined_clone(); + *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone() + - az.inlined_clone() * by.inlined_clone(); + *res.get_unchecked_mut((1, 0)) = az.inlined_clone() * bx.inlined_clone() + - ax.inlined_clone() * bz.inlined_clone(); + *res.get_unchecked_mut((2, 0)) = ax.inlined_clone() * by.inlined_clone() + - ay.inlined_clone() * bx.inlined_clone(); res } @@ -1541,9 +1602,12 @@ impl> Matrix { let by = b.get_unchecked((0, 1)); let bz = b.get_unchecked((0, 2)); - *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone() - az.inlined_clone() * by.inlined_clone(); - *res.get_unchecked_mut((0, 1)) = az.inlined_clone() * bx.inlined_clone() - ax.inlined_clone() * bz.inlined_clone(); - *res.get_unchecked_mut((0, 2)) = ax.inlined_clone() * by.inlined_clone() - ay.inlined_clone() * bx.inlined_clone(); + *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone() + - az.inlined_clone() * by.inlined_clone(); + *res.get_unchecked_mut((0, 1)) = az.inlined_clone() * bx.inlined_clone() + - ax.inlined_clone() * bz.inlined_clone(); + *res.get_unchecked_mut((0, 2)) = ax.inlined_clone() * by.inlined_clone() + - ay.inlined_clone() * bx.inlined_clone(); res } @@ -1571,10 +1635,10 @@ where DefaultAllocator: Allocator } } -impl> Matrix { +impl> Matrix { /// The smallest angle between two vectors. #[inline] - pub fn angle(&self, other: &Matrix) -> N::RealField + pub fn angle(&self, other: &Matrix) -> N::SimdRealField where SB: Storage, ShapeConstraint: DimEq + DimEq, @@ -1584,17 +1648,11 @@ impl> Matrix { let n2 = other.norm(); if n1.is_zero() || n2.is_zero() { - N::RealField::zero() + N::SimdRealField::zero() } else { - let cang = prod.real() / (n1 * n2); - - if cang > N::RealField::one() { - N::RealField::zero() - } else if cang < -N::RealField::one() { - N::RealField::pi() - } else { - cang.acos() - } + let cang = prod.simd_real() / (n1 * n2); + cang.simd_clamp(-N::SimdRealField::one(), N::SimdRealField::one()) + .simd_acos() } } } @@ -1761,7 +1819,7 @@ where for j in 0..ncols { for i in 0..nrows { unsafe { - self.get_unchecked((i, j)).hash(state); + self.get_unchecked((i, j)).hash(state); } } } diff --git a/src/base/norm.rs b/src/base/norm.rs index 675530ec..9f38c1d7 100644 --- a/src/base/norm.rs +++ b/src/base/norm.rs @@ -1,25 +1,37 @@ use num::Zero; use crate::allocator::Allocator; -use crate::{RealField, ComplexField}; +use crate::base::{DefaultAllocator, Dim, Matrix, MatrixMN}; +use crate::constraint::{SameNumberOfColumns, SameNumberOfRows, ShapeConstraint}; use crate::storage::{Storage, StorageMut}; -use crate::base::{DefaultAllocator, Matrix, Dim, MatrixMN}; -use crate::constraint::{SameNumberOfRows, SameNumberOfColumns, ShapeConstraint}; - +use crate::{ComplexField, RealField, SimdComplexField, SimdRealField}; +use alga::simd::SimdPartialOrd; // FIXME: this should be be a trait on alga? /// A trait for abstract matrix norms. /// /// This may be moved to the alga crate in the future. -pub trait Norm { +pub trait Norm { /// Apply this norm to the given matrix. - fn norm(&self, m: &Matrix) -> N::RealField - where R: Dim, C: Dim, S: Storage; + fn norm(&self, m: &Matrix) -> N::SimdRealField + where + R: Dim, + C: Dim, + S: Storage; /// Use the metric induced by this norm to compute the metric distance between the two given matrices. - fn metric_distance(&self, m1: &Matrix, m2: &Matrix) -> N::RealField - where R1: Dim, C1: Dim, S1: Storage, - R2: Dim, C2: Dim, S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns; + fn metric_distance( + &self, + m1: &Matrix, + m2: &Matrix, + ) -> N::SimdRealField + where + R1: Dim, + C1: Dim, + S1: Storage, + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns; } /// Euclidean norm. @@ -29,81 +41,123 @@ pub struct LpNorm(pub i32); /// L-infinite norm aka. Chebytchev norm aka. uniform norm aka. suppremum norm. pub struct UniformNorm; -impl Norm for EuclideanNorm { +impl Norm for EuclideanNorm { #[inline] - fn norm(&self, m: &Matrix) -> N::RealField - where R: Dim, C: Dim, S: Storage { - m.norm_squared().sqrt() + fn norm(&self, m: &Matrix) -> N::SimdRealField + where + R: Dim, + C: Dim, + S: Storage, + { + m.norm_squared().simd_sqrt() } #[inline] - fn metric_distance(&self, m1: &Matrix, m2: &Matrix) -> N::RealField - where R1: Dim, C1: Dim, S1: Storage, - R2: Dim, C2: Dim, S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { - m1.zip_fold(m2, N::RealField::zero(), |acc, a, b| { + fn metric_distance( + &self, + m1: &Matrix, + m2: &Matrix, + ) -> N::SimdRealField + where + R1: Dim, + C1: Dim, + S1: Storage, + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { + m1.zip_fold(m2, N::SimdRealField::zero(), |acc, a, b| { let diff = a - b; - acc + diff.modulus_squared() - }).sqrt() + acc + diff.simd_modulus_squared() + }) + .simd_sqrt() } } -impl Norm for LpNorm { +impl Norm for LpNorm { #[inline] - fn norm(&self, m: &Matrix) -> N::RealField - where R: Dim, C: Dim, S: Storage { - m.fold(N::RealField::zero(), |a, b| { - a + b.modulus().powi(self.0) - }).powf(crate::convert(1.0 / (self.0 as f64))) + fn norm(&self, m: &Matrix) -> N::SimdRealField + where + R: Dim, + C: Dim, + S: Storage, + { + m.fold(N::SimdRealField::zero(), |a, b| { + a + b.simd_modulus().simd_powi(self.0) + }) + .simd_powf(crate::convert(1.0 / (self.0 as f64))) } #[inline] - fn metric_distance(&self, m1: &Matrix, m2: &Matrix) -> N::RealField - where R1: Dim, C1: Dim, S1: Storage, - R2: Dim, C2: Dim, S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { - m1.zip_fold(m2, N::RealField::zero(), |acc, a, b| { + fn metric_distance( + &self, + m1: &Matrix, + m2: &Matrix, + ) -> N::SimdRealField + where + R1: Dim, + C1: Dim, + S1: Storage, + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { + m1.zip_fold(m2, N::SimdRealField::zero(), |acc, a, b| { let diff = a - b; - acc + diff.modulus().powi(self.0) - }).powf(crate::convert(1.0 / (self.0 as f64))) + acc + diff.simd_modulus().simd_powi(self.0) + }) + .simd_powf(crate::convert(1.0 / (self.0 as f64))) } } -impl Norm for UniformNorm { +impl Norm for UniformNorm { #[inline] - fn norm(&self, m: &Matrix) -> N::RealField - where R: Dim, C: Dim, S: Storage { + fn norm(&self, m: &Matrix) -> N::SimdRealField + where + R: Dim, + C: Dim, + S: Storage, + { // NOTE: we don't use `m.amax()` here because for the complex // numbers this will return the max norm1 instead of the modulus. - m.fold(N::RealField::zero(), |acc, a| acc.max(a.modulus())) + m.fold(N::SimdRealField::zero(), |acc, a| { + acc.simd_max(a.simd_modulus()) + }) } #[inline] - fn metric_distance(&self, m1: &Matrix, m2: &Matrix) -> N::RealField - where R1: Dim, C1: Dim, S1: Storage, - R2: Dim, C2: Dim, S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { - m1.zip_fold(m2, N::RealField::zero(), |acc, a, b| { - let val = (a - b).modulus(); - if val > acc { - val - } else { - acc - } + fn metric_distance( + &self, + m1: &Matrix, + m2: &Matrix, + ) -> N::SimdRealField + where + R1: Dim, + C1: Dim, + S1: Storage, + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { + m1.zip_fold(m2, N::SimdRealField::zero(), |acc, a, b| { + let val = (a - b).simd_modulus(); + acc.simd_max(val) }) } } - -impl> Matrix { +impl> Matrix { /// The squared L2 norm of this vector. #[inline] - pub fn norm_squared(&self) -> N::RealField { - let mut res = N::RealField::zero(); + pub fn norm_squared(&self) -> N::SimdRealField { + let mut res = N::SimdRealField::zero(); for i in 0..self.ncols() { let col = self.column(i); - res += col.dotc(&col).real() + res += col.dotc(&col).simd_real() } res @@ -113,17 +167,21 @@ impl> Matrix { /// /// Use `.apply_norm` to apply a custom norm. #[inline] - pub fn norm(&self) -> N::RealField { - self.norm_squared().sqrt() + pub fn norm(&self) -> N::SimdRealField { + self.norm_squared().simd_sqrt() } /// Compute the distance between `self` and `rhs` using the metric induced by the euclidean norm. /// /// Use `.apply_metric_distance` to apply a custom norm. #[inline] - pub fn metric_distance(&self, rhs: &Matrix) -> N::RealField - where R2: Dim, C2: Dim, S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { + pub fn metric_distance(&self, rhs: &Matrix) -> N::SimdRealField + where + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { self.apply_metric_distance(rhs, &EuclideanNorm) } @@ -140,7 +198,7 @@ impl> Matrix { /// assert_eq!(v.apply_norm(&EuclideanNorm), v.norm()); /// ``` #[inline] - pub fn apply_norm(&self, norm: &impl Norm) -> N::RealField { + pub fn apply_norm(&self, norm: &impl Norm) -> N::SimdRealField { norm.norm(self) } @@ -159,9 +217,17 @@ impl> Matrix { /// assert_eq!(v1.apply_metric_distance(&v2, &EuclideanNorm), (v1 - v2).norm()); /// ``` #[inline] - pub fn apply_metric_distance(&self, rhs: &Matrix, norm: &impl Norm) -> N::RealField - where R2: Dim, C2: Dim, S2: Storage, - ShapeConstraint: SameNumberOfRows + SameNumberOfColumns { + pub fn apply_metric_distance( + &self, + rhs: &Matrix, + norm: &impl Norm, + ) -> N::SimdRealField + where + R2: Dim, + C2: Dim, + S2: Storage, + ShapeConstraint: SameNumberOfRows + SameNumberOfColumns, + { norm.metric_distance(self, rhs) } @@ -171,7 +237,7 @@ impl> Matrix { /// /// This function is simply implemented as a call to `norm()` #[inline] - pub fn magnitude(&self) -> N::RealField { + pub fn magnitude(&self) -> N::SimdRealField { self.norm() } @@ -181,18 +247,41 @@ impl> Matrix { /// /// This function is simply implemented as a call to `norm_squared()` #[inline] - pub fn magnitude_squared(&self) -> N::RealField { + pub fn magnitude_squared(&self) -> N::SimdRealField { self.norm_squared() } + /// Sets the magnitude of this vector. + #[inline] + pub fn set_magnitude(&mut self, magnitude: N::SimdRealField) + where S: StorageMut { + let n = self.norm(); + self.scale_mut(magnitude / n) + } + /// Returns a normalized version of this matrix. + #[inline] + #[must_use = "Did you mean to use normalize_mut()?"] + pub fn normalize(&self) -> MatrixMN + where DefaultAllocator: Allocator { + self.unscale(self.norm()) + } + + /// The Lp norm of this matrix. + #[inline] + pub fn lp_norm(&self, p: i32) -> N::SimdRealField { + self.apply_norm(&LpNorm(p)) + } +} + +impl> Matrix { /// Sets the magnitude of this vector unless it is smaller than `min_magnitude`. /// /// If `self.magnitude()` is smaller than `min_magnitude`, it will be left unchanged. /// Otherwise this is equivalent to: `*self = self.normalize() * magnitude. #[inline] pub fn try_set_magnitude(&mut self, magnitude: N::RealField, min_magnitude: N::RealField) - where S: StorageMut { + where S: StorageMut { let n = self.norm(); if n >= min_magnitude { @@ -200,19 +289,11 @@ impl> Matrix { } } - /// Returns a normalized version of this matrix. - #[inline] - #[must_use = "Did you mean to use normalize_mut()?"] - pub fn normalize(&self) -> MatrixMN - where DefaultAllocator: Allocator { - self.unscale(self.norm()) - } - /// Returns a normalized version of this matrix unless its norm as smaller or equal to `eps`. #[inline] #[must_use = "Did you mean to use try_normalize_mut()?"] pub fn try_normalize(&self, min_norm: N::RealField) -> Option> - where DefaultAllocator: Allocator { + where DefaultAllocator: Allocator { let n = self.norm(); if n <= min_norm { @@ -221,30 +302,26 @@ impl> Matrix { Some(self.unscale(n)) } } - - /// The Lp norm of this matrix. - #[inline] - pub fn lp_norm(&self, p: i32) -> N::RealField { - self.apply_norm(&LpNorm(p)) - } } - -impl> Matrix { +impl> Matrix { /// Normalizes this matrix in-place and returns its norm. #[inline] - pub fn normalize_mut(&mut self) -> N::RealField { + pub fn normalize_mut(&mut self) -> N::SimdRealField { let n = self.norm(); self.unscale_mut(n); n } +} +impl> Matrix { /// Normalizes this matrix in-place or does nothing if its norm is smaller or equal to `eps`. /// /// If the normalization succeeded, returns the old norm of this matrix. #[inline] - pub fn try_normalize_mut(&mut self, min_norm: N::RealField) -> Option { + pub fn try_normalize_mut(&mut self, min_norm: N::RealField) -> Option + where N: ComplexField { let n = self.norm(); if n <= min_norm { diff --git a/src/base/ops.rs b/src/base/ops.rs index 2a861afd..e0cd450c 100644 --- a/src/base/ops.rs +++ b/src/base/ops.rs @@ -1,11 +1,12 @@ use num::{One, Signed, Zero}; -use std::cmp::{PartialOrd, Ordering}; +use std::cmp::{Ordering, PartialOrd}; use std::iter; use std::ops::{ Add, AddAssign, Div, DivAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub, SubAssign, }; -use alga::general::{ComplexField, ClosedAdd, ClosedDiv, ClosedMul, ClosedNeg, ClosedSub}; +use alga::general::{ClosedAdd, ClosedDiv, ClosedMul, ClosedNeg, ClosedSub}; +use alga::simd::{SimdPartialOrd, SimdSigned}; use crate::base::allocator::{Allocator, SameShapeAllocator, SameShapeC, SameShapeR}; use crate::base::constraint::{ @@ -14,6 +15,7 @@ use crate::base::constraint::{ use crate::base::dimension::{Dim, DimMul, DimName, DimProd, Dynamic}; use crate::base::storage::{ContiguousStorageMut, Storage, StorageMut}; use crate::base::{DefaultAllocator, Matrix, MatrixMN, MatrixN, MatrixSum, Scalar, VectorSliceN}; +use crate::SimdComplexField; /* * @@ -445,7 +447,9 @@ where /// # use nalgebra::DMatrix; /// iter::empty::<&DMatrix>().sum::>(); // panics! /// ``` - fn sum>>(mut iter: I) -> MatrixMN { + fn sum>>( + mut iter: I, + ) -> MatrixMN { if let Some(first) = iter.next() { iter.fold(first.clone(), |acc, x| acc + x) } else { @@ -692,11 +696,11 @@ where /// Equivalent to `self.adjoint() * rhs`. #[inline] pub fn ad_mul(&self, rhs: &Matrix) -> MatrixMN - where - N: ComplexField, - SB: Storage, - DefaultAllocator: Allocator, - ShapeConstraint: SameNumberOfRows, + where + N: SimdComplexField, + SB: Storage, + DefaultAllocator: Allocator, + ShapeConstraint: SameNumberOfRows, { let mut res = unsafe { Matrix::new_uninitialized_generic(self.data.shape().1, rhs.data.shape().1) }; @@ -710,7 +714,10 @@ where &self, rhs: &Matrix, out: &mut Matrix, - dot: impl Fn(&VectorSliceN, &VectorSliceN) -> N, + dot: impl Fn( + &VectorSliceN, + &VectorSliceN, + ) -> N, ) where SB: Storage, SC: StorageMut, @@ -760,7 +767,7 @@ where rhs: &Matrix, out: &mut Matrix, ) where - N: ComplexField, + N: SimdComplexField, SB: Storage, SC: StorageMut, ShapeConstraint: SameNumberOfRows + DimEq + DimEq, @@ -813,7 +820,8 @@ where let coeff = self.get_unchecked((i1, j1)).inlined_clone(); for i2 in 0..nrows2.value() { - *data_res = coeff.inlined_clone() * rhs.get_unchecked((i2, j2)).inlined_clone(); + *data_res = coeff.inlined_clone() + * rhs.get_unchecked((i2, j2)).inlined_clone(); data_res = data_res.offset(1); } } @@ -868,23 +876,6 @@ where } impl> Matrix { - #[inline(always)] - fn xcmp(&self, abs: impl Fn(N) -> N2, ordering: Ordering) -> N2 - where N2: Scalar + PartialOrd + Zero { - let mut iter = self.iter(); - let mut max = iter.next().cloned().map_or(N2::zero(), &abs); - - for e in iter { - let ae = abs(e.inlined_clone()); - - if ae.partial_cmp(&max) == Some(ordering) { - max = ae; - } - } - - max - } - /// Returns the absolute value of the component with the largest absolute value. /// # Example /// ``` @@ -894,8 +885,11 @@ impl> Matrix { /// ``` #[inline] pub fn amax(&self) -> N - where N: PartialOrd + Signed { - self.xcmp(|e| e.abs(), Ordering::Greater) + where N: Zero + SimdSigned + SimdPartialOrd { + self.fold_with( + |e| e.unwrap_or(&N::zero()).simd_abs(), + |a, b| a.simd_max(b.simd_abs()), + ) } /// Returns the the 1-norm of the complex component with the largest 1-norm. @@ -908,9 +902,12 @@ impl> Matrix { /// Complex::new(1.0, 3.0)).camax(), 5.0); /// ``` #[inline] - pub fn camax(&self) -> N::RealField - where N: ComplexField { - self.xcmp(|e| e.norm1(), Ordering::Greater) + pub fn camax(&self) -> N::SimdRealField + where N: SimdComplexField { + self.fold_with( + |e| e.unwrap_or(&N::zero()).simd_norm1(), + |a, b| a.simd_max(b.simd_norm1()), + ) } /// Returns the component with the largest value. @@ -923,8 +920,11 @@ impl> Matrix { /// ``` #[inline] pub fn max(&self) -> N - where N: PartialOrd + Zero { - self.xcmp(|e| e, Ordering::Greater) + where N: SimdPartialOrd + Zero { + self.fold_with( + |e| e.map(|e| e.inlined_clone()).unwrap_or(N::zero()), + |a, b| a.simd_max(b.inlined_clone()), + ) } /// Returns the absolute value of the component with the smallest absolute value. @@ -936,8 +936,11 @@ impl> Matrix { /// ``` #[inline] pub fn amin(&self) -> N - where N: PartialOrd + Signed { - self.xcmp(|e| e.abs(), Ordering::Less) + where N: Zero + SimdPartialOrd + SimdSigned { + self.fold_with( + |e| e.map(|e| e.simd_abs()).unwrap_or(N::zero()), + |a, b| a.simd_min(b.simd_abs()), + ) } /// Returns the the 1-norm of the complex component with the smallest 1-norm. @@ -950,9 +953,15 @@ impl> Matrix { /// Complex::new(1.0, 3.0)).camin(), 3.0); /// ``` #[inline] - pub fn camin(&self) -> N::RealField - where N: ComplexField { - self.xcmp(|e| e.norm1(), Ordering::Less) + pub fn camin(&self) -> N::SimdRealField + where N: SimdComplexField { + self.fold_with( + |e| { + e.map(|e| e.simd_norm1()) + .unwrap_or(N::SimdRealField::zero()) + }, + |a, b| a.simd_min(b.simd_norm1()), + ) } /// Returns the component with the smallest value. @@ -965,7 +974,10 @@ impl> Matrix { /// ``` #[inline] pub fn min(&self) -> N - where N: PartialOrd + Zero { - self.xcmp(|e| e, Ordering::Less) + where N: SimdPartialOrd + Zero { + self.fold_with( + |e| e.map(|e| e.inlined_clone()).unwrap_or(N::zero()), + |a, b| a.simd_min(b.inlined_clone()), + ) } } diff --git a/src/lib.rs b/src/lib.rs index 055b10ff..754ccdfb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,8 +110,8 @@ extern crate generic_array; #[cfg(feature = "std")] extern crate matrixmultiply; extern crate num_complex; -extern crate num_traits as num; extern crate num_rational; +extern crate num_traits as num; extern crate rand; #[cfg(feature = "std")] extern crate rand_distr; @@ -141,30 +141,31 @@ pub mod linalg; #[cfg(feature = "sparse")] pub mod sparse; -#[cfg(feature = "std")] -#[deprecated( - note = "The 'core' module is being renamed to 'base' to avoid conflicts with the 'core' crate." -)] -pub use base as core; pub use crate::base::*; pub use crate::geometry::*; pub use crate::linalg::*; #[cfg(feature = "sparse")] pub use crate::sparse::*; +#[cfg(feature = "std")] +#[deprecated( + note = "The 'core' module is being renamed to 'base' to avoid conflicts with the 'core' crate." +)] +pub use base as core; use std::cmp::{self, Ordering, PartialOrd}; use alga::general::{ - Additive, AdditiveGroup, Identity, TwoSidedInverse, JoinSemilattice, Lattice, MeetSemilattice, - Multiplicative, SupersetOf, + Additive, AdditiveGroup, Identity, JoinSemilattice, Lattice, MeetSemilattice, Multiplicative, + SupersetOf, TwoSidedInverse, }; use alga::linear::SquareMatrix as AlgaSquareMatrix; use alga::linear::{EuclideanSpace, FiniteDimVectorSpace, InnerSpace, NormedSpace}; use num::Signed; -pub use alga::general::{Id, RealField, ComplexField}; #[allow(deprecated)] pub use alga::general::Real; +pub use alga::general::{ComplexField, Id, RealField}; +pub use alga::simd::{SimdComplexField, SimdRealField}; pub use num_complex::Complex; /*