use crate::SimdComplexField; #[cfg(feature = "std")] use matrixmultiply; use num::{One, Zero}; use simba::scalar::{ClosedAdd, ClosedMul}; #[cfg(feature = "std")] use std::mem; use crate::base::allocator::Allocator; use crate::base::constraint::{ AreMultipliable, DimEq, SameNumberOfColumns, SameNumberOfRows, ShapeConstraint, }; use crate::base::dimension::{Const, Dim, Dynamic, U1, U2, U3, U4}; use crate::base::storage::{Storage, StorageMut}; use crate::base::{ DVectorSlice, DefaultAllocator, Matrix, Scalar, SquareMatrix, Vector, VectorSlice, }; /// # Dot/scalar product impl> Matrix where T: Scalar + Zero + ClosedAdd + ClosedMul, { #[inline(always)] fn dotx( &self, rhs: &Matrix, conjugate: impl Fn(T) -> T, ) -> T where SB: Storage, ShapeConstraint: DimEq + DimEq, { assert!( self.nrows() == rhs.nrows(), "Dot product dimensions mismatch for shapes {:?} and {:?}: left rows != right rows.", self.shape(), rhs.shape(), ); assert!( self.ncols() == rhs.ncols(), "Dot product dimensions mismatch for shapes {:?} and {:?}: left cols != right cols.", self.shape(), rhs.shape(), ); // So we do some special cases for common fixed-size vectors of dimension lower than 8 // because the `for` loop below won't be very efficient on those. if (R::is::() || R2::is::()) && (C::is::() || C2::is::()) { unsafe { let a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) * rhs.get_unchecked((0, 0)).inlined_clone(); let b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) * rhs.get_unchecked((1, 0)).inlined_clone(); return a + b; } } if (R::is::() || R2::is::()) && (C::is::() || C2::is::()) { unsafe { let a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) * rhs.get_unchecked((0, 0)).inlined_clone(); let b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) * rhs.get_unchecked((1, 0)).inlined_clone(); let c = conjugate(self.get_unchecked((2, 0)).inlined_clone()) * rhs.get_unchecked((2, 0)).inlined_clone(); return a + b + c; } } if (R::is::() || R2::is::()) && (C::is::() || C2::is::()) { unsafe { let mut a = conjugate(self.get_unchecked((0, 0)).inlined_clone()) * rhs.get_unchecked((0, 0)).inlined_clone(); let mut b = conjugate(self.get_unchecked((1, 0)).inlined_clone()) * rhs.get_unchecked((1, 0)).inlined_clone(); let c = conjugate(self.get_unchecked((2, 0)).inlined_clone()) * rhs.get_unchecked((2, 0)).inlined_clone(); let d = conjugate(self.get_unchecked((3, 0)).inlined_clone()) * rhs.get_unchecked((3, 0)).inlined_clone(); a += c; b += d; return a + b; } } // All this is inspired from the "unrolled version" discussed in: // https://blog.theincredibleholk.org/blog/2012/12/10/optimizing-dot-product/ // // And this comment from bluss: // https://users.rust-lang.org/t/how-to-zip-two-slices-efficiently/2048/12 let mut res = T::zero(); // We have to define them outside of the loop (and not inside at first assignment) // otherwise vectorization won't kick in for some reason. let mut acc0; let mut acc1; let mut acc2; let mut acc3; let mut acc4; let mut acc5; let mut acc6; let mut acc7; for j in 0..self.ncols() { let mut i = 0; acc0 = T::zero(); acc1 = T::zero(); acc2 = T::zero(); acc3 = T::zero(); acc4 = T::zero(); acc5 = T::zero(); acc6 = T::zero(); acc7 = T::zero(); while self.nrows() - i >= 8 { acc0 += unsafe { conjugate(self.get_unchecked((i, j)).inlined_clone()) * rhs.get_unchecked((i, j)).inlined_clone() }; acc1 += unsafe { conjugate(self.get_unchecked((i + 1, j)).inlined_clone()) * rhs.get_unchecked((i + 1, j)).inlined_clone() }; acc2 += unsafe { conjugate(self.get_unchecked((i + 2, j)).inlined_clone()) * rhs.get_unchecked((i + 2, j)).inlined_clone() }; acc3 += unsafe { conjugate(self.get_unchecked((i + 3, j)).inlined_clone()) * rhs.get_unchecked((i + 3, j)).inlined_clone() }; acc4 += unsafe { conjugate(self.get_unchecked((i + 4, j)).inlined_clone()) * rhs.get_unchecked((i + 4, j)).inlined_clone() }; acc5 += unsafe { conjugate(self.get_unchecked((i + 5, j)).inlined_clone()) * rhs.get_unchecked((i + 5, j)).inlined_clone() }; acc6 += unsafe { conjugate(self.get_unchecked((i + 6, j)).inlined_clone()) * rhs.get_unchecked((i + 6, j)).inlined_clone() }; acc7 += unsafe { conjugate(self.get_unchecked((i + 7, j)).inlined_clone()) * rhs.get_unchecked((i + 7, j)).inlined_clone() }; i += 8; } res += acc0 + acc4; res += acc1 + acc5; res += acc2 + acc6; res += acc3 + acc7; for k in i..self.nrows() { res += unsafe { conjugate(self.get_unchecked((k, j)).inlined_clone()) * rhs.get_unchecked((k, j)).inlined_clone() } } } res } /// The dot product between two vectors or matrices (seen as vectors). /// /// This is equal to `self.transpose() * rhs`. For the sesquilinear complex dot product, use /// `self.dotc(rhs)`. /// /// Note that this is **not** the matrix multiplication as in, e.g., numpy. For matrix /// multiplication, use one of: `.gemm`, `.mul_to`, `.mul`, the `*` operator. /// /// # Examples: /// /// ``` /// # use nalgebra::{Vector3, Matrix2x3}; /// let vec1 = Vector3::new(1.0, 2.0, 3.0); /// let vec2 = Vector3::new(0.1, 0.2, 0.3); /// assert_eq!(vec1.dot(&vec2), 1.4); /// /// let mat1 = Matrix2x3::new(1.0, 2.0, 3.0, /// 4.0, 5.0, 6.0); /// let mat2 = Matrix2x3::new(0.1, 0.2, 0.3, /// 0.4, 0.5, 0.6); /// assert_eq!(mat1.dot(&mat2), 9.1); /// ``` /// #[inline] pub fn dot(&self, rhs: &Matrix) -> T where SB: Storage, ShapeConstraint: DimEq + DimEq, { self.dotx(rhs, |e| e) } /// The conjugate-linear dot product between two vectors or matrices (seen as vectors). /// /// This is equal to `self.adjoint() * rhs`. /// For real vectors, this is identical to `self.dot(&rhs)`. /// Note that this is **not** the matrix multiplication as in, e.g., numpy. For matrix /// multiplication, use one of: `.gemm`, `.mul_to`, `.mul`, the `*` operator. /// /// # Examples: /// /// ``` /// # use nalgebra::{Vector2, Complex}; /// let vec1 = Vector2::new(Complex::new(1.0, 2.0), Complex::new(3.0, 4.0)); /// let vec2 = Vector2::new(Complex::new(0.4, 0.3), Complex::new(0.2, 0.1)); /// assert_eq!(vec1.dotc(&vec2), Complex::new(2.0, -1.0)); /// /// // Note that for complex vectors, we generally have: /// // vec1.dotc(&vec2) != vec2.dot(&vec2) /// assert_ne!(vec1.dotc(&vec2), vec1.dot(&vec2)); /// ``` #[inline] pub fn dotc(&self, rhs: &Matrix) -> T where T: SimdComplexField, SB: Storage, ShapeConstraint: DimEq + DimEq, { self.dotx(rhs, T::simd_conjugate) } /// The dot product between the transpose of `self` and `rhs`. /// /// # Examples: /// /// ``` /// # use nalgebra::{Vector3, RowVector3, Matrix2x3, Matrix3x2}; /// let vec1 = Vector3::new(1.0, 2.0, 3.0); /// let vec2 = RowVector3::new(0.1, 0.2, 0.3); /// assert_eq!(vec1.tr_dot(&vec2), 1.4); /// /// let mat1 = Matrix2x3::new(1.0, 2.0, 3.0, /// 4.0, 5.0, 6.0); /// let mat2 = Matrix3x2::new(0.1, 0.4, /// 0.2, 0.5, /// 0.3, 0.6); /// assert_eq!(mat1.tr_dot(&mat2), 9.1); /// ``` #[inline] pub fn tr_dot(&self, rhs: &Matrix) -> T where SB: Storage, ShapeConstraint: DimEq + DimEq, { let (nrows, ncols) = self.shape(); assert_eq!( (ncols, nrows), rhs.shape(), "Transposed dot product dimension mismatch." ); let mut res = T::zero(); for j in 0..self.nrows() { for i in 0..self.ncols() { res += unsafe { self.get_unchecked((j, i)).inlined_clone() * rhs.get_unchecked((i, j)).inlined_clone() } } } res } } fn array_axcpy( y: &mut [T], a: T, x: &[T], c: T, beta: T, stride1: usize, stride2: usize, len: usize, ) where T: Scalar + Zero + ClosedAdd + ClosedMul, { for i in 0..len { unsafe { let y = y.get_unchecked_mut(i * stride1); *y = a.inlined_clone() * x.get_unchecked(i * stride2).inlined_clone() * c.inlined_clone() + beta.inlined_clone() * y.inlined_clone(); } } } fn array_axc(y: &mut [T], a: T, x: &[T], c: T, stride1: usize, stride2: usize, len: usize) where T: Scalar + Zero + ClosedAdd + ClosedMul, { for i in 0..len { unsafe { *y.get_unchecked_mut(i * stride1) = a.inlined_clone() * x.get_unchecked(i * stride2).inlined_clone() * c.inlined_clone(); } } } /// # BLAS functions impl Vector where T: Scalar + Zero + ClosedAdd + ClosedMul, S: StorageMut, { /// Computes `self = a * x * c + b * self`. /// /// If `b` is zero, `self` is never read from. /// /// # Examples: /// /// ``` /// # use nalgebra::Vector3; /// let mut vec1 = Vector3::new(1.0, 2.0, 3.0); /// let vec2 = Vector3::new(0.1, 0.2, 0.3); /// vec1.axcpy(5.0, &vec2, 2.0, 5.0); /// assert_eq!(vec1, Vector3::new(6.0, 12.0, 18.0)); /// ``` #[inline] pub fn axcpy(&mut self, a: T, x: &Vector, c: T, b: T) where SB: Storage, ShapeConstraint: DimEq, { assert_eq!(self.nrows(), x.nrows(), "Axcpy: mismatched vector shapes."); let rstride1 = self.strides().0; let rstride2 = x.strides().0; let y = self.data.as_mut_slice(); let x = x.data.as_slice(); if !b.is_zero() { array_axcpy(y, a, x, c, b, rstride1, rstride2, x.len()); } else { array_axc(y, a, x, c, rstride1, rstride2, x.len()); } } /// Computes `self = a * x + b * self`. /// /// If `b` is zero, `self` is never read from. /// /// # Examples: /// /// ``` /// # use nalgebra::Vector3; /// let mut vec1 = Vector3::new(1.0, 2.0, 3.0); /// let vec2 = Vector3::new(0.1, 0.2, 0.3); /// vec1.axpy(10.0, &vec2, 5.0); /// assert_eq!(vec1, Vector3::new(6.0, 12.0, 18.0)); /// ``` #[inline] pub fn axpy(&mut self, a: T, x: &Vector, b: T) where T: One, SB: Storage, ShapeConstraint: DimEq, { assert_eq!(self.nrows(), x.nrows(), "Axpy: mismatched vector shapes."); self.axcpy(a, x, T::one(), b) } /// Computes `self = alpha * a * x + beta * self`, where `a` is a matrix, `x` a vector, and /// `alpha, beta` two scalars. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2}; /// let mut vec1 = Vector2::new(1.0, 2.0); /// let vec2 = Vector2::new(0.1, 0.2); /// let mat = Matrix2::new(1.0, 2.0, /// 3.0, 4.0); /// vec1.gemv(10.0, &mat, &vec2, 5.0); /// assert_eq!(vec1, Vector2::new(10.0, 21.0)); /// ``` #[inline] pub fn gemv( &mut self, alpha: T, a: &Matrix, x: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { let dim1 = self.nrows(); let (nrows2, ncols2) = a.shape(); let dim3 = x.nrows(); assert!( ncols2 == dim3 && dim1 == nrows2, "Gemv: dimensions mismatch." ); if ncols2 == 0 { // NOTE: we can't just always multiply by beta // because we documented the guaranty that `self` is // never read if `beta` is zero. if beta.is_zero() { self.fill(T::zero()); } else { *self *= beta; } return; } // TODO: avoid bound checks. let col2 = a.column(0); let val = unsafe { x.vget_unchecked(0).inlined_clone() }; self.axcpy(alpha.inlined_clone(), &col2, val, beta); for j in 1..ncols2 { let col2 = a.column(j); let val = unsafe { x.vget_unchecked(j).inlined_clone() }; self.axcpy(alpha.inlined_clone(), &col2, val, T::one()); } } #[inline(always)] fn xxgemv( &mut self, alpha: T, a: &SquareMatrix, x: &Vector, beta: T, dot: impl Fn( &DVectorSlice, &DVectorSlice, ) -> T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { let dim1 = self.nrows(); let dim2 = a.nrows(); let dim3 = x.nrows(); assert!( a.is_square(), "Symmetric cgemv: the input matrix must be square." ); assert!( dim2 == dim3 && dim1 == dim2, "Symmetric cgemv: dimensions mismatch." ); if dim2 == 0 { return; } // TODO: avoid bound checks. let col2 = a.column(0); let val = unsafe { x.vget_unchecked(0).inlined_clone() }; self.axpy(alpha.inlined_clone() * val, &col2, beta); self[0] += alpha.inlined_clone() * dot(&a.slice_range(1.., 0), &x.rows_range(1..)); for j in 1..dim2 { let col2 = a.column(j); let dot = dot(&col2.rows_range(j..), &x.rows_range(j..)); let val; unsafe { val = x.vget_unchecked(j).inlined_clone(); *self.vget_unchecked_mut(j) += alpha.inlined_clone() * dot; } self.rows_range_mut(j + 1..).axpy( alpha.inlined_clone() * val, &col2.rows_range(j + 1..), T::one(), ); } } /// Computes `self = alpha * a * x + beta * self`, where `a` is a **symmetric** matrix, `x` a /// vector, and `alpha, beta` two scalars. DEPRECATED: use `sygemv` instead. #[inline] #[deprecated(note = "This is renamed `sygemv` to match the original BLAS terminology.")] pub fn gemv_symm( &mut self, alpha: T, a: &SquareMatrix, x: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { self.sygemv(alpha, a, x, beta) } /// Computes `self = alpha * a * x + beta * self`, where `a` is a **symmetric** matrix, `x` a /// vector, and `alpha, beta` two scalars. /// /// For hermitian matrices, use `.hegemv` instead. /// If `beta` is zero, `self` is never read. If `self` is read, only its lower-triangular part /// (including the diagonal) is actually read. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2}; /// let mat = Matrix2::new(1.0, 2.0, /// 2.0, 4.0); /// let mut vec1 = Vector2::new(1.0, 2.0); /// let vec2 = Vector2::new(0.1, 0.2); /// vec1.sygemv(10.0, &mat, &vec2, 5.0); /// assert_eq!(vec1, Vector2::new(10.0, 20.0)); /// /// /// // The matrix upper-triangular elements can be garbage because it is never /// // read by this method. Therefore, it is not necessary for the caller to /// // fill the matrix struct upper-triangle. /// let mat = Matrix2::new(1.0, 9999999.9999999, /// 2.0, 4.0); /// let mut vec1 = Vector2::new(1.0, 2.0); /// vec1.sygemv(10.0, &mat, &vec2, 5.0); /// assert_eq!(vec1, Vector2::new(10.0, 20.0)); /// ``` #[inline] pub fn sygemv( &mut self, alpha: T, a: &SquareMatrix, x: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { self.xxgemv(alpha, a, x, beta, |a, b| a.dot(b)) } /// Computes `self = alpha * a * x + beta * self`, where `a` is an **hermitian** matrix, `x` a /// vector, and `alpha, beta` two scalars. /// /// If `beta` is zero, `self` is never read. If `self` is read, only its lower-triangular part /// (including the diagonal) is actually read. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2, Complex}; /// let mat = Matrix2::new(Complex::new(1.0, 0.0), Complex::new(2.0, -0.1), /// Complex::new(2.0, 1.0), Complex::new(4.0, 0.0)); /// let mut vec1 = Vector2::new(Complex::new(1.0, 2.0), Complex::new(3.0, 4.0)); /// let vec2 = Vector2::new(Complex::new(0.1, 0.2), Complex::new(0.3, 0.4)); /// vec1.sygemv(Complex::new(10.0, 20.0), &mat, &vec2, Complex::new(5.0, 15.0)); /// assert_eq!(vec1, Vector2::new(Complex::new(-48.0, 44.0), Complex::new(-75.0, 110.0))); /// /// /// // The matrix upper-triangular elements can be garbage because it is never /// // read by this method. Therefore, it is not necessary for the caller to /// // fill the matrix struct upper-triangle. /// /// let mat = Matrix2::new(Complex::new(1.0, 0.0), Complex::new(99999999.9, 999999999.9), /// Complex::new(2.0, 1.0), Complex::new(4.0, 0.0)); /// let mut vec1 = Vector2::new(Complex::new(1.0, 2.0), Complex::new(3.0, 4.0)); /// let vec2 = Vector2::new(Complex::new(0.1, 0.2), Complex::new(0.3, 0.4)); /// vec1.sygemv(Complex::new(10.0, 20.0), &mat, &vec2, Complex::new(5.0, 15.0)); /// assert_eq!(vec1, Vector2::new(Complex::new(-48.0, 44.0), Complex::new(-75.0, 110.0))); /// ``` #[inline] pub fn hegemv( &mut self, alpha: T, a: &SquareMatrix, x: &Vector, beta: T, ) where T: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { self.xxgemv(alpha, a, x, beta, |a, b| a.dotc(b)) } #[inline(always)] fn gemv_xx( &mut self, alpha: T, a: &Matrix, x: &Vector, beta: T, dot: impl Fn(&VectorSlice, &Vector) -> T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { let dim1 = self.nrows(); let (nrows2, ncols2) = a.shape(); let dim3 = x.nrows(); assert!( nrows2 == dim3 && dim1 == ncols2, "Gemv: dimensions mismatch." ); if ncols2 == 0 { return; } if beta.is_zero() { for j in 0..ncols2 { let val = unsafe { self.vget_unchecked_mut(j) }; *val = alpha.inlined_clone() * dot(&a.column(j), x) } } else { for j in 0..ncols2 { let val = unsafe { self.vget_unchecked_mut(j) }; *val = alpha.inlined_clone() * dot(&a.column(j), x) + beta.inlined_clone() * val.inlined_clone(); } } } /// Computes `self = alpha * a.transpose() * x + beta * self`, where `a` is a matrix, `x` a vector, and /// `alpha, beta` two scalars. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2}; /// let mat = Matrix2::new(1.0, 3.0, /// 2.0, 4.0); /// let mut vec1 = Vector2::new(1.0, 2.0); /// let vec2 = Vector2::new(0.1, 0.2); /// let expected = mat.transpose() * vec2 * 10.0 + vec1 * 5.0; /// /// vec1.gemv_tr(10.0, &mat, &vec2, 5.0); /// assert_eq!(vec1, expected); /// ``` #[inline] pub fn gemv_tr( &mut self, alpha: T, a: &Matrix, x: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { self.gemv_xx(alpha, a, x, beta, |a, b| a.dot(b)) } /// Computes `self = alpha * a.adjoint() * x + beta * self`, where `a` is a matrix, `x` a vector, and /// `alpha, beta` two scalars. /// /// For real matrices, this is the same as `.gemv_tr`. /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2, Complex}; /// let mat = Matrix2::new(Complex::new(1.0, 2.0), Complex::new(3.0, 4.0), /// Complex::new(5.0, 6.0), Complex::new(7.0, 8.0)); /// let mut vec1 = Vector2::new(Complex::new(1.0, 2.0), Complex::new(3.0, 4.0)); /// let vec2 = Vector2::new(Complex::new(0.1, 0.2), Complex::new(0.3, 0.4)); /// let expected = mat.adjoint() * vec2 * Complex::new(10.0, 20.0) + vec1 * Complex::new(5.0, 15.0); /// /// vec1.gemv_ad(Complex::new(10.0, 20.0), &mat, &vec2, Complex::new(5.0, 15.0)); /// assert_eq!(vec1, expected); /// ``` #[inline] pub fn gemv_ad( &mut self, alpha: T, a: &Matrix, x: &Vector, beta: T, ) where T: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + AreMultipliable, { self.gemv_xx(alpha, a, x, beta, |a, b| a.dotc(b)) } } impl> Matrix where T: Scalar + Zero + ClosedAdd + ClosedMul, { #[inline(always)] fn gerx( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, conjugate: impl Fn(T) -> T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { let (nrows1, ncols1) = self.shape(); let dim2 = x.nrows(); let dim3 = y.nrows(); assert!( nrows1 == dim2 && ncols1 == dim3, "ger: dimensions mismatch." ); for j in 0..ncols1 { // TODO: avoid bound checks. let val = unsafe { conjugate(y.vget_unchecked(j).inlined_clone()) }; self.column_mut(j) .axpy(alpha.inlined_clone() * val, x, beta.inlined_clone()); } } /// Computes `self = alpha * x * y.transpose() + beta * self`. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2x3, Vector2, Vector3}; /// let mut mat = Matrix2x3::repeat(4.0); /// let vec1 = Vector2::new(1.0, 2.0); /// let vec2 = Vector3::new(0.1, 0.2, 0.3); /// let expected = vec1 * vec2.transpose() * 10.0 + mat * 5.0; /// /// mat.ger(10.0, &vec1, &vec2, 5.0); /// assert_eq!(mat, expected); /// ``` #[inline] pub fn ger( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { self.gerx(alpha, x, y, beta, |e| e) } /// Computes `self = alpha * x * y.adjoint() + beta * self`. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{Matrix2x3, Vector2, Vector3, Complex}; /// let mut mat = Matrix2x3::repeat(Complex::new(4.0, 5.0)); /// let vec1 = Vector2::new(Complex::new(1.0, 2.0), Complex::new(3.0, 4.0)); /// let vec2 = Vector3::new(Complex::new(0.6, 0.5), Complex::new(0.4, 0.5), Complex::new(0.2, 0.1)); /// let expected = vec1 * vec2.adjoint() * Complex::new(10.0, 20.0) + mat * Complex::new(5.0, 15.0); /// /// mat.gerc(Complex::new(10.0, 20.0), &vec1, &vec2, Complex::new(5.0, 15.0)); /// assert_eq!(mat, expected); /// ``` #[inline] pub fn gerc( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, ) where T: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { self.gerx(alpha, x, y, beta, SimdComplexField::simd_conjugate) } /// Computes `self = alpha * a * b + beta * self`, where `a, b, self` are matrices. /// `alpha` and `beta` are scalar. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{Matrix2x3, Matrix3x4, Matrix2x4}; /// let mut mat1 = Matrix2x4::identity(); /// let mat2 = Matrix2x3::new(1.0, 2.0, 3.0, /// 4.0, 5.0, 6.0); /// let mat3 = Matrix3x4::new(0.1, 0.2, 0.3, 0.4, /// 0.5, 0.6, 0.7, 0.8, /// 0.9, 1.0, 1.1, 1.2); /// let expected = mat2 * mat3 * 10.0 + mat1 * 5.0; /// /// mat1.gemm(10.0, &mat2, &mat3, 5.0); /// assert_relative_eq!(mat1, expected); /// ``` #[inline] pub fn gemm( &mut self, alpha: T, a: &Matrix, b: &Matrix, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: SameNumberOfRows + SameNumberOfColumns + AreMultipliable, { let ncols1 = self.ncols(); #[cfg(feature = "std")] { // We assume large matrices will be Dynamic but small matrices static. // We could use matrixmultiply for large statically-sized matrices but the performance // threshold to activate it would be different from SMALL_DIM because our code optimizes // better for statically-sized matrices. if R1::is::() || C1::is::() || R2::is::() || C2::is::() || R3::is::() || C3::is::() { // matrixmultiply can be used only if the std feature is available. let nrows1 = self.nrows(); let (nrows2, ncols2) = a.shape(); let (nrows3, ncols3) = b.shape(); // Threshold determined empirically. const SMALL_DIM: usize = 5; if nrows1 > SMALL_DIM && ncols1 > SMALL_DIM && nrows2 > SMALL_DIM && ncols2 > SMALL_DIM { assert_eq!( ncols2, nrows3, "gemm: dimensions mismatch for multiplication." ); assert_eq!( (nrows1, ncols1), (nrows2, ncols3), "gemm: dimensions mismatch for addition." ); // NOTE: this case should never happen because we enter this // codepath only when ncols2 > SMALL_DIM. Though we keep this // here just in case if in the future we change the conditions to // enter this codepath. if ncols2 == 0 { // NOTE: we can't just always multiply by beta // because we documented the guaranty that `self` is // never read if `beta` is zero. if beta.is_zero() { self.fill(T::zero()); } else { *self *= beta; } return; } if T::is::() { let (rsa, csa) = a.strides(); let (rsb, csb) = b.strides(); let (rsc, csc) = self.strides(); unsafe { matrixmultiply::sgemm( nrows2, ncols2, ncols3, mem::transmute_copy(&alpha), a.data.ptr() as *const f32, rsa as isize, csa as isize, b.data.ptr() as *const f32, rsb as isize, csb as isize, mem::transmute_copy(&beta), self.data.ptr_mut() as *mut f32, rsc as isize, csc as isize, ); } return; } else if T::is::() { let (rsa, csa) = a.strides(); let (rsb, csb) = b.strides(); let (rsc, csc) = self.strides(); unsafe { matrixmultiply::dgemm( nrows2, ncols2, ncols3, mem::transmute_copy(&alpha), a.data.ptr() as *const f64, rsa as isize, csa as isize, b.data.ptr() as *const f64, rsb as isize, csb as isize, mem::transmute_copy(&beta), self.data.ptr_mut() as *mut f64, rsc as isize, csc as isize, ); } return; } } } } for j1 in 0..ncols1 { // TODO: avoid bound checks. self.column_mut(j1).gemv( alpha.inlined_clone(), a, &b.column(j1), beta.inlined_clone(), ); } } /// Computes `self = alpha * a.transpose() * b + beta * self`, where `a, b, self` are matrices. /// `alpha` and `beta` are scalar. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{Matrix3x2, Matrix3x4, Matrix2x4}; /// let mut mat1 = Matrix2x4::identity(); /// let mat2 = Matrix3x2::new(1.0, 4.0, /// 2.0, 5.0, /// 3.0, 6.0); /// let mat3 = Matrix3x4::new(0.1, 0.2, 0.3, 0.4, /// 0.5, 0.6, 0.7, 0.8, /// 0.9, 1.0, 1.1, 1.2); /// let expected = mat2.transpose() * mat3 * 10.0 + mat1 * 5.0; /// /// mat1.gemm_tr(10.0, &mat2, &mat3, 5.0); /// assert_eq!(mat1, expected); /// ``` #[inline] pub fn gemm_tr( &mut self, alpha: T, a: &Matrix, b: &Matrix, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: SameNumberOfRows + SameNumberOfColumns + AreMultipliable, { let (nrows1, ncols1) = self.shape(); let (nrows2, ncols2) = a.shape(); let (nrows3, ncols3) = b.shape(); assert_eq!( nrows2, nrows3, "gemm: dimensions mismatch for multiplication." ); assert_eq!( (nrows1, ncols1), (ncols2, ncols3), "gemm: dimensions mismatch for addition." ); for j1 in 0..ncols1 { // TODO: avoid bound checks. self.column_mut(j1).gemv_tr( alpha.inlined_clone(), a, &b.column(j1), beta.inlined_clone(), ); } } /// Computes `self = alpha * a.adjoint() * b + beta * self`, where `a, b, self` are matrices. /// `alpha` and `beta` are scalar. /// /// If `beta` is zero, `self` is never read. /// /// # Examples: /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{Matrix3x2, Matrix3x4, Matrix2x4, Complex}; /// let mut mat1 = Matrix2x4::identity(); /// let mat2 = Matrix3x2::new(Complex::new(1.0, 4.0), Complex::new(7.0, 8.0), /// Complex::new(2.0, 5.0), Complex::new(9.0, 10.0), /// Complex::new(3.0, 6.0), Complex::new(11.0, 12.0)); /// let mat3 = Matrix3x4::new(Complex::new(0.1, 1.3), Complex::new(0.2, 1.4), Complex::new(0.3, 1.5), Complex::new(0.4, 1.6), /// Complex::new(0.5, 1.7), Complex::new(0.6, 1.8), Complex::new(0.7, 1.9), Complex::new(0.8, 2.0), /// Complex::new(0.9, 2.1), Complex::new(1.0, 2.2), Complex::new(1.1, 2.3), Complex::new(1.2, 2.4)); /// let expected = mat2.adjoint() * mat3 * Complex::new(10.0, 20.0) + mat1 * Complex::new(5.0, 15.0); /// /// mat1.gemm_ad(Complex::new(10.0, 20.0), &mat2, &mat3, Complex::new(5.0, 15.0)); /// assert_eq!(mat1, expected); /// ``` #[inline] pub fn gemm_ad( &mut self, alpha: T, a: &Matrix, b: &Matrix, beta: T, ) where T: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: SameNumberOfRows + SameNumberOfColumns + AreMultipliable, { let (nrows1, ncols1) = self.shape(); let (nrows2, ncols2) = a.shape(); let (nrows3, ncols3) = b.shape(); assert_eq!( nrows2, nrows3, "gemm: dimensions mismatch for multiplication." ); assert_eq!( (nrows1, ncols1), (ncols2, ncols3), "gemm: dimensions mismatch for addition." ); for j1 in 0..ncols1 { // TODO: avoid bound checks. self.column_mut(j1).gemv_ad(alpha, a, &b.column(j1), beta); } } } impl> Matrix where T: Scalar + Zero + ClosedAdd + ClosedMul, { #[inline(always)] fn xxgerx( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, conjugate: impl Fn(T) -> T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { let dim1 = self.nrows(); let dim2 = x.nrows(); let dim3 = y.nrows(); assert!( self.is_square(), "Symmetric ger: the input matrix must be square." ); assert!(dim1 == dim2 && dim1 == dim3, "ger: dimensions mismatch."); for j in 0..dim1 { let val = unsafe { conjugate(y.vget_unchecked(j).inlined_clone()) }; let subdim = Dynamic::new(dim1 - j); // TODO: avoid bound checks. self.generic_slice_mut((j, j), (subdim, Const::<1>)).axpy( alpha.inlined_clone() * val, &x.rows_range(j..), beta.inlined_clone(), ); } } /// Computes `self = alpha * x * y.transpose() + beta * self`, where `self` is a **symmetric** /// matrix. /// /// If `beta` is zero, `self` is never read. The result is symmetric. Only the lower-triangular /// (including the diagonal) part of `self` is read/written. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2}; /// let mut mat = Matrix2::identity(); /// let vec1 = Vector2::new(1.0, 2.0); /// let vec2 = Vector2::new(0.1, 0.2); /// let expected = vec1 * vec2.transpose() * 10.0 + mat * 5.0; /// mat.m12 = 99999.99999; // This component is on the upper-triangular part and will not be read/written. /// /// mat.ger_symm(10.0, &vec1, &vec2, 5.0); /// assert_eq!(mat.lower_triangle(), expected.lower_triangle()); /// assert_eq!(mat.m12, 99999.99999); // This was untouched. #[inline] #[deprecated(note = "This is renamed `syger` to match the original BLAS terminology.")] pub fn ger_symm( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { self.syger(alpha, x, y, beta) } /// Computes `self = alpha * x * y.transpose() + beta * self`, where `self` is a **symmetric** /// matrix. /// /// For hermitian complex matrices, use `.hegerc` instead. /// If `beta` is zero, `self` is never read. The result is symmetric. Only the lower-triangular /// (including the diagonal) part of `self` is read/written. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2}; /// let mut mat = Matrix2::identity(); /// let vec1 = Vector2::new(1.0, 2.0); /// let vec2 = Vector2::new(0.1, 0.2); /// let expected = vec1 * vec2.transpose() * 10.0 + mat * 5.0; /// mat.m12 = 99999.99999; // This component is on the upper-triangular part and will not be read/written. /// /// mat.syger(10.0, &vec1, &vec2, 5.0); /// assert_eq!(mat.lower_triangle(), expected.lower_triangle()); /// assert_eq!(mat.m12, 99999.99999); // This was untouched. #[inline] pub fn syger( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, ) where T: One, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { self.xxgerx(alpha, x, y, beta, |e| e) } /// Computes `self = alpha * x * y.adjoint() + beta * self`, where `self` is an **hermitian** /// matrix. /// /// If `beta` is zero, `self` is never read. The result is symmetric. Only the lower-triangular /// (including the diagonal) part of `self` is read/written. /// /// # Examples: /// /// ``` /// # use nalgebra::{Matrix2, Vector2, Complex}; /// let mut mat = Matrix2::identity(); /// let vec1 = Vector2::new(Complex::new(1.0, 3.0), Complex::new(2.0, 4.0)); /// let vec2 = Vector2::new(Complex::new(0.2, 0.4), Complex::new(0.1, 0.3)); /// let expected = vec1 * vec2.adjoint() * Complex::new(10.0, 20.0) + mat * Complex::new(5.0, 15.0); /// mat.m12 = Complex::new(99999.99999, 88888.88888); // This component is on the upper-triangular part and will not be read/written. /// /// mat.hegerc(Complex::new(10.0, 20.0), &vec1, &vec2, Complex::new(5.0, 15.0)); /// assert_eq!(mat.lower_triangle(), expected.lower_triangle()); /// assert_eq!(mat.m12, Complex::new(99999.99999, 88888.88888)); // This was untouched. #[inline] pub fn hegerc( &mut self, alpha: T, x: &Vector, y: &Vector, beta: T, ) where T: SimdComplexField, SB: Storage, SC: Storage, ShapeConstraint: DimEq + DimEq, { self.xxgerx(alpha, x, y, beta, SimdComplexField::simd_conjugate) } } impl> SquareMatrix where T: Scalar + Zero + One + ClosedAdd + ClosedMul, { /// Computes the quadratic form `self = alpha * lhs * mid * lhs.transpose() + beta * self`. /// /// This uses the provided workspace `work` to avoid allocations for intermediate results. /// /// # Examples: /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{DMatrix, DVector}; /// // Note that all those would also work with statically-sized matrices. /// // We use DMatrix/DVector since that's the only case where pre-allocating the /// // workspace is actually useful (assuming the same workspace is re-used for /// // several computations) because it avoids repeated dynamic allocations. /// let mut mat = DMatrix::identity(2, 2); /// let lhs = DMatrix::from_row_slice(2, 3, &[1.0, 2.0, 3.0, /// 4.0, 5.0, 6.0]); /// let mid = DMatrix::from_row_slice(3, 3, &[0.1, 0.2, 0.3, /// 0.5, 0.6, 0.7, /// 0.9, 1.0, 1.1]); /// // The random shows that values on the workspace do not /// // matter as they will be overwritten. /// let mut workspace = DVector::new_random(2); /// let expected = &lhs * &mid * lhs.transpose() * 10.0 + &mat * 5.0; /// /// mat.quadform_tr_with_workspace(&mut workspace, 10.0, &lhs, &mid, 5.0); /// assert_relative_eq!(mat, expected); pub fn quadform_tr_with_workspace( &mut self, work: &mut Vector, alpha: T, lhs: &Matrix, mid: &SquareMatrix, beta: T, ) where D2: Dim, R3: Dim, C3: Dim, D4: Dim, S2: StorageMut, S3: Storage, S4: Storage, ShapeConstraint: DimEq + DimEq + DimEq + DimEq, { work.gemv(T::one(), lhs, &mid.column(0), T::zero()); self.ger(alpha.inlined_clone(), work, &lhs.column(0), beta); for j in 1..mid.ncols() { work.gemv(T::one(), lhs, &mid.column(j), T::zero()); self.ger(alpha.inlined_clone(), work, &lhs.column(j), T::one()); } } /// Computes the quadratic form `self = alpha * lhs * mid * lhs.transpose() + beta * self`. /// /// This allocates a workspace vector of dimension D1 for intermediate results. /// If `D1` is a type-level integer, then the allocation is performed on the stack. /// Use `.quadform_tr_with_workspace(...)` instead to avoid allocations. /// /// # Examples: /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{Matrix2, Matrix3, Matrix2x3, Vector2}; /// let mut mat = Matrix2::identity(); /// let lhs = Matrix2x3::new(1.0, 2.0, 3.0, /// 4.0, 5.0, 6.0); /// let mid = Matrix3::new(0.1, 0.2, 0.3, /// 0.5, 0.6, 0.7, /// 0.9, 1.0, 1.1); /// let expected = lhs * mid * lhs.transpose() * 10.0 + mat * 5.0; /// /// mat.quadform_tr(10.0, &lhs, &mid, 5.0); /// assert_relative_eq!(mat, expected); pub fn quadform_tr( &mut self, alpha: T, lhs: &Matrix, mid: &SquareMatrix, beta: T, ) where R3: Dim, C3: Dim, D4: Dim, S3: Storage, S4: Storage, ShapeConstraint: DimEq + DimEq + DimEq, DefaultAllocator: Allocator, { let mut work = unsafe { crate::unimplemented_or_uninitialized_generic!(self.data.shape().0, Const::<1>) }; self.quadform_tr_with_workspace(&mut work, alpha, lhs, mid, beta) } /// Computes the quadratic form `self = alpha * rhs.transpose() * mid * rhs + beta * self`. /// /// This uses the provided workspace `work` to avoid allocations for intermediate results. /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{DMatrix, DVector}; /// // Note that all those would also work with statically-sized matrices. /// // We use DMatrix/DVector since that's the only case where pre-allocating the /// // workspace is actually useful (assuming the same workspace is re-used for /// // several computations) because it avoids repeated dynamic allocations. /// let mut mat = DMatrix::identity(2, 2); /// let rhs = DMatrix::from_row_slice(3, 2, &[1.0, 2.0, /// 3.0, 4.0, /// 5.0, 6.0]); /// let mid = DMatrix::from_row_slice(3, 3, &[0.1, 0.2, 0.3, /// 0.5, 0.6, 0.7, /// 0.9, 1.0, 1.1]); /// // The random shows that values on the workspace do not /// // matter as they will be overwritten. /// let mut workspace = DVector::new_random(3); /// let expected = rhs.transpose() * &mid * &rhs * 10.0 + &mat * 5.0; /// /// mat.quadform_with_workspace(&mut workspace, 10.0, &mid, &rhs, 5.0); /// assert_relative_eq!(mat, expected); pub fn quadform_with_workspace( &mut self, work: &mut Vector, alpha: T, mid: &SquareMatrix, rhs: &Matrix, beta: T, ) where D2: Dim, D3: Dim, R4: Dim, C4: Dim, S2: StorageMut, S3: Storage, S4: Storage, ShapeConstraint: DimEq + DimEq + DimEq + AreMultipliable, { work.gemv(T::one(), mid, &rhs.column(0), T::zero()); self.column_mut(0) .gemv_tr(alpha.inlined_clone(), &rhs, work, beta.inlined_clone()); for j in 1..rhs.ncols() { work.gemv(T::one(), mid, &rhs.column(j), T::zero()); self.column_mut(j) .gemv_tr(alpha.inlined_clone(), &rhs, work, beta.inlined_clone()); } } /// Computes the quadratic form `self = alpha * rhs.transpose() * mid * rhs + beta * self`. /// /// This allocates a workspace vector of dimension D2 for intermediate results. /// If `D2` is a type-level integer, then the allocation is performed on the stack. /// Use `.quadform_with_workspace(...)` instead to avoid allocations. /// /// ``` /// # #[macro_use] extern crate approx; /// # use nalgebra::{Matrix2, Matrix3x2, Matrix3}; /// let mut mat = Matrix2::identity(); /// let rhs = Matrix3x2::new(1.0, 2.0, /// 3.0, 4.0, /// 5.0, 6.0); /// let mid = Matrix3::new(0.1, 0.2, 0.3, /// 0.5, 0.6, 0.7, /// 0.9, 1.0, 1.1); /// let expected = rhs.transpose() * mid * rhs * 10.0 + mat * 5.0; /// /// mat.quadform(10.0, &mid, &rhs, 5.0); /// assert_relative_eq!(mat, expected); pub fn quadform( &mut self, alpha: T, mid: &SquareMatrix, rhs: &Matrix, beta: T, ) where D2: Dim, R3: Dim, C3: Dim, S2: Storage, S3: Storage, ShapeConstraint: DimEq + DimEq + AreMultipliable, DefaultAllocator: Allocator, { let mut work = unsafe { crate::unimplemented_or_uninitialized_generic!(mid.data.shape().0, Const::<1>) }; self.quadform_with_workspace(&mut work, alpha, mid, rhs, beta) } }