blas.rs works now!

2021-07-15 23:56:58 -05:00 · 2021-07-15 23:56:58 -05:00 · df9b6f5f64
commit df9b6f5f64
parent bbd045d216
11 changed files with 695 additions and 415 deletions
--- a/src/base/allocator.rs
+++ b/src/base/allocator.rs
@ -49,7 +49,7 @@ pub trait Allocator<T, R: Dim, C: Dim = U1>:
 /// A matrix reallocator. Changes the size of the memory buffer that initially contains (RFrom ×
 /// CFrom) elements to a smaller or larger size (RTo, CTo).
 pub trait Reallocator<T, RFrom: Dim, CFrom: Dim, RTo: Dim, CTo: Dim>:
-    InnerAllocator<T, RFrom, CFrom> + InnerAllocator<T, RTo, CTo>
+    Allocator<T, RFrom, CFrom> + Allocator<T, RTo, CTo>
 {
    /// Reallocates a buffer of shape `(RTo, CTo)`, possibly reusing a previously allocated buffer
    /// `buf`. Data stored by `buf` are linearly copied to the output:
@ -75,7 +75,7 @@ pub type SameShapeC<C1, C2> = <ShapeConstraint as SameNumberOfColumns<C1, C2>>::
 // TODO: Bad name.
 /// Restricts the given number of rows and columns to be respectively the same.
 pub trait SameShapeAllocator<T, R1: Dim, C1: Dim, R2: Dim, C2: Dim>:
-    InnerAllocator<T, R1, C1> + InnerAllocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>
+    Allocator<T, R1, C1> + Allocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>
 where
    ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>,
 {
@ -85,7 +85,7 @@ impl<T, R1: Dim, R2: Dim, C1: Dim, C2: Dim> SameShapeAllocator<T, R1, C1, R2, C2
    for DefaultAllocator
 where
    DefaultAllocator:
-        InnerAllocator<T, R1, C1> + InnerAllocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>,
+        Allocator<T, R1, C1> + Allocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>,
    ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>,
 {
 }
@ -93,8 +93,8 @@ where
 // XXX: Bad name.
 /// Restricts the given number of rows to be equal.
 pub trait SameShapeVectorAllocator<T, R1: Dim, R2: Dim>:
-    InnerAllocator<T, R1>
-    + InnerAllocator<T, SameShapeR<R1, R2>>
+    Allocator<T, R1>
+    + Allocator<T, SameShapeR<R1, R2>>
    + SameShapeAllocator<T, R1, U1, R2, U1>
 where
    ShapeConstraint: SameNumberOfRows<R1, R2>,
@ -103,7 +103,7 @@ where

 impl<T, R1: Dim, R2: Dim> SameShapeVectorAllocator<T, R1, R2> for DefaultAllocator
 where
-    DefaultAllocator: InnerAllocator<T, R1, U1> + InnerAllocator<T, SameShapeR<R1, R2>>,
+    DefaultAllocator: Allocator<T, R1, U1> + Allocator<T, SameShapeR<R1, R2>>,
    ShapeConstraint: SameNumberOfRows<R1, R2>,
 {
 }
--- a/src/base/blas.rs
+++ b/src/base/blas.rs
@ -1,4 +1,12 @@
-use crate::{OVector, SimdComplexField};
+//! Implements a subset of the Basic Linear Algebra Subprograms (BLAS), a
+//! standard and highly optimized set of basic vector and matrix operations.
+//!
+//! To avoid unsoundness due to mishandling of uninitialized data, we divide our
+//! methods into two groups: those that take in a `&mut` to a matrix, and those
+//! that return an owned matrix that would otherwise result from setting a
+//! parameter to zero in the other methods.
+
+use crate::{OMatrix, OVector, SimdComplexField};
 #[cfg(feature = "std")]
 use matrixmultiply;
 use num::{One, Zero};
@ -279,72 +287,16 @@ where
    }
 }

-#[allow(clippy::too_many_arguments)]
-fn array_axcpy<T>(
-    y: &mut [T],
-    a: T,
-    x: &[T],
-    c: T,
-    beta: T,
-    stride1: usize,
-    stride2: usize,
-    len: usize,
-) where
-    T: Scalar + Zero + ClosedAdd + ClosedMul,
-{
-    for i in 0..len {
-        unsafe {
-            let y = y.get_unchecked_mut(i * stride1);
-            *y = a.inlined_clone()
-                * x.get_unchecked(i * stride2).inlined_clone()
-                * c.inlined_clone()
-                + beta.inlined_clone() * y.inlined_clone();
-        }
-    }
-}
-
-fn array_axc<T>(y: &mut [T], a: T, x: &[T], c: T, stride1: usize, stride2: usize, len: usize)
-where
-    T: Scalar + Zero + ClosedAdd + ClosedMul,
-{
-    for i in 0..len {
-        unsafe {
-            *y.get_unchecked_mut(i * stride1) = a.inlined_clone()
-                * x.get_unchecked(i * stride2).inlined_clone()
-                * c.inlined_clone();
-        }
-    }
-}
-
-fn array_axc_uninit<T>(
-    y: &mut [MaybeUninit<T>],
-    a: T,
-    x: &[T],
-    c: T,
-    stride1: usize,
-    stride2: usize,
-    len: usize,
-) where
-    T: Scalar + Zero + ClosedAdd + ClosedMul,
-{
-    for i in 0..len {
-        unsafe {
-            *y.get_unchecked_mut(i * stride1) = MaybeUninit::new(
-                a.inlined_clone()
-                    * x.get_unchecked(i * stride2).inlined_clone()
-                    * c.inlined_clone(),
-            );
-        }
-    }
-}
-
 /// # BLAS functions
 impl<T, D: Dim, S> Vector<T, D, S>
 where
    T: Scalar + Zero + ClosedAdd + ClosedMul,
    S: StorageMut<T, D>,
 {
-    /// Computes `self = a * x * c + b * self`.
+    /// Computes `self = a * x * c + b * self`, where `a`, `b`, `c` are scalars,
+    /// and `x` is a vector of the same size as `self`.
+    ///
+    /// For commutative scalars, this is equivalent to an [`axpy`] call.
    ///
    /// If `b` is zero, `self` is never read from.
    ///
@ -376,9 +328,24 @@ where
            let x = x.data.as_slice_unchecked();

            if !b.is_zero() {
-                array_axcpy(y, a, x, c, b, rstride1, rstride2, x.len());
+                for i in 0..x.len() {
+                    unsafe {
+                        let y = y.get_unchecked_mut(i * rstride1);
+                        *y = a.inlined_clone()
+                            * x.get_unchecked(i * rstride2).inlined_clone()
+                            * c.inlined_clone()
+                            + b.inlined_clone() * y.inlined_clone();
+                    }
+                }
            } else {
-                array_axc(y, a, x, c, rstride1, rstride2, x.len());
+                for i in 0..x.len() {
+                    unsafe {
+                        let y = y.get_unchecked_mut(i * rstride1);
+                        *y = a.inlined_clone()
+                            * x.get_unchecked(i * rstride2).inlined_clone()
+                            * c.inlined_clone();
+                    }
+                }
            }
        }
    }
@ -746,49 +713,55 @@ where
    }
 }

-impl<T, D: Dim> OVector<MaybeUninit<T>, D>
+impl<T, D: Dim, S> Vector<MaybeUninit<T>, D, S>
 where
    T: Scalar + Zero + ClosedAdd + ClosedMul,
-    DefaultAllocator: Allocator<T, D>,
+    S: StorageMut<MaybeUninit<T>, D>,
 {
-    pub fn axc<D2: Dim, SB>(&mut self, a: T, x: &Vector<T, D2, SB>, c: T) -> OVector<T, D>
+    /// Computes `alpha * a * x`, where `a` is a matrix, `x` a vector, and
+    /// `alpha` is a scalar.
+    ///
+    /// # Safety
+    /// `self` must be completely uninitialized, or data leaks will occur. After
+    /// this method is called, all entries in `self` will be initialized.
+    pub fn axc<D2: Dim, S2>(&mut self, a: T, x: &Vector<T, D2, S2>, c: T)
    where
-        SB: Storage<T, D2>,
+        S2: Storage<T, D2>,
        ShapeConstraint: DimEq<D, D2>,
    {
-        assert_eq!(self.nrows(), x.nrows(), "Axcpy: mismatched vector shapes.");
-
        let rstride1 = self.strides().0;
        let rstride2 = x.strides().0;

        unsafe {
-            // SAFETY: the conversion to slices is OK because we access the
-            //         elements taking the strides into account.
            let y = self.data.as_mut_slice_unchecked();
            let x = x.data.as_slice_unchecked();

-            array_axc_uninit(y, a, x, c, rstride1, rstride2, x.len());
-            self.assume_init()
+            for i in 0..y.len() {
+                *y.get_unchecked_mut(i * rstride1) = MaybeUninit::new(
+                    a.inlined_clone()
+                        * x.get_unchecked(i * rstride2).inlined_clone()
+                        * c.inlined_clone(),
+                );
+            }
        }
    }

-    /// Computes `self = alpha * a * x, where `a` is a matrix, `x` a vector, and
+    /// Computes `alpha * a * x`, where `a` is a matrix, `x` a vector, and
    /// `alpha` is a scalar.
    ///
-    /// By the time this method returns, `self` will have been initialized.
+    /// Initializes `self`.
    #[inline]
-    pub fn gemv_uninit<R2: Dim, C2: Dim, D3: Dim, SB, SC>(
-        mut self,
+    pub fn gemv_z<R2: Dim, C2: Dim, D3: Dim, SB, SC>(
+        &mut self,
        alpha: T,
        a: &Matrix<T, R2, C2, SB>,
        x: &Vector<T, D3, SC>,
-        beta: T,
-    ) -> OVector<T, D>
-    where
+    ) where
        T: One,
        SB: Storage<T, R2, C2>,
        SC: Storage<T, D3>,
        ShapeConstraint: DimEq<D, R2> + AreMultipliable<R2, C2, D3, U1>,
+        // DefaultAllocator: Allocator<T, D>,
    {
        let dim1 = self.nrows();
        let (nrows2, ncols2) = a.shape();
@ -801,22 +774,169 @@ where

        if ncols2 == 0 {
            self.fill_fn(|| MaybeUninit::new(T::zero()));
-            return self.assume_init();
+            return;
        }

        // TODO: avoid bound checks.
        let col2 = a.column(0);
        let val = unsafe { x.vget_unchecked(0).inlined_clone() };
-        let res = self.axc(alpha.inlined_clone(), &col2, val);
+        self.axc(alpha.inlined_clone(), &col2, val);

-        for j in 1..ncols2 {
-            let col2 = a.column(j);
-            let val = unsafe { x.vget_unchecked(j).inlined_clone() };
+        // Safety: axc initializes self.
+        unsafe {
+            let mut init = self.assume_init_mut();

-            res.axcpy(alpha.inlined_clone(), &col2, val, T::one());
+            for j in 1..ncols2 {
+                let col2 = a.column(j);
+                let val = unsafe { x.vget_unchecked(j).inlined_clone() };
+                init.axcpy(alpha.inlined_clone(), &col2, val, T::one());
+            }
+        }
+    }
+}
+
+impl<T, R1: Dim, C1: Dim> OMatrix<T, R1, C1>
+where
+    T: Scalar + Zero + One + ClosedAdd + ClosedMul,
+    DefaultAllocator: Allocator<T, R1, C1>,
+{
+    /// Computes `alpha * a * b`, where `a` and `b` are matrices, and `alpha` is
+    /// a scalar.
+    ///
+    /// # Examples:
+    ///
+    /// ```
+    /// # #[macro_use] extern crate approx;
+    /// # use nalgebra::{Matrix2x3, Matrix3x4, Matrix2x4};
+    /// let mut mat1 = Matrix2x4::identity();
+    /// let mat2 = Matrix2x3::new(1.0, 2.0, 3.0,
+    ///                           4.0, 5.0, 6.0);
+    /// let mat3 = Matrix3x4::new(0.1, 0.2, 0.3, 0.4,
+    ///                           0.5, 0.6, 0.7, 0.8,
+    ///                           0.9, 1.0, 1.1, 1.2);
+    /// let expected = mat2 * mat3 * 10.0 + mat1 * 5.0;
+    ///
+    /// mat1.gemm(10.0, &mat2, &mat3, 5.0);
+    /// assert_relative_eq!(mat1, expected);
+    /// ```
+    #[inline]
+    pub fn gemm_z<R2: Dim, C2: Dim, R3: Dim, C3: Dim, SB, SC>(
+        alpha: T,
+        a: &Matrix<T, R2, C2, SB>,
+        b: &Matrix<T, R3, C3, SC>,
+    ) -> Self
+    where
+        SB: Storage<T, R2, C2>,
+        SC: Storage<T, R3, C3>,
+        ShapeConstraint: SameNumberOfRows<R1, R2>
+            + SameNumberOfColumns<C1, C3>
+            + AreMultipliable<R2, C2, R3, C3>,
+    {
+        let (nrows1, ncols1) = a.shape();
+        let (nrows2, ncols2) = b.shape();
+
+        assert_eq!(
+            ncols1, nrows2,
+            "gemm: dimensions mismatch for multiplication."
+        );
+
+        let mut res =
+            Matrix::new_uninitialized_generic(R1::from_usize(nrows1), C1::from_usize(ncols2));
+
+        #[cfg(feature = "std")]
+        {
+            // We assume large matrices will be Dynamic but small matrices static.
+            // We could use matrixmultiply for large statically-sized matrices but the performance
+            // threshold to activate it would be different from SMALL_DIM because our code optimizes
+            // better for statically-sized matrices.
+            if R1::is::<Dynamic>()
+                || C1::is::<Dynamic>()
+                || R2::is::<Dynamic>()
+                || C2::is::<Dynamic>()
+                || R3::is::<Dynamic>()
+                || C3::is::<Dynamic>()
+            {
+                // matrixmultiply can be used only if the std feature is available.
+
+                // Threshold determined empirically.
+                const SMALL_DIM: usize = 5;
+
+                if nrows1 > SMALL_DIM
+                    && ncols1 > SMALL_DIM
+                    && nrows2 > SMALL_DIM
+                    && ncols2 > SMALL_DIM
+                {
+                    // NOTE: this case should never happen because we enter this
+                    // codepath only when ncols2 > SMALL_DIM. Though we keep this
+                    // here just in case if in the future we change the conditions to
+                    // enter this codepath.
+                    if ncols1 == 0 {
+                        // NOTE: we can't just always multiply by beta
+                        // because we documented the guaranty that `self` is
+                        // never read if `beta` is zero.
+
+                        // Safety: this buffer is empty.
+                        return res.assume_init();
+                    }
+
+                    let (rsa, csa) = a.strides();
+                    let (rsb, csb) = b.strides();
+                    let (rsc, csc) = res.strides();
+
+                    if T::is::<f32>() {
+                        unsafe {
+                            matrixmultiply::sgemm(
+                                nrows1,
+                                ncols1,
+                                ncols2,
+                                mem::transmute_copy(&alpha),
+                                a.data.ptr() as *const f32,
+                                rsa as isize,
+                                csa as isize,
+                                b.data.ptr() as *const f32,
+                                rsb as isize,
+                                csb as isize,
+                                0.0,
+                                res.data.ptr_mut() as *mut f32,
+                                rsc as isize,
+                                csc as isize,
+                            );
+
+                            return res.assume_init();
+                        }
+                    } else if T::is::<f64>() {
+                        unsafe {
+                            matrixmultiply::dgemm(
+                                nrows1,
+                                ncols1,
+                                ncols2,
+                                mem::transmute_copy(&alpha),
+                                a.data.ptr() as *const f64,
+                                rsa as isize,
+                                csa as isize,
+                                b.data.ptr() as *const f64,
+                                rsb as isize,
+                                csb as isize,
+                                0.0,
+                                res.data.ptr_mut() as *mut f64,
+                                rsc as isize,
+                                csc as isize,
+                            );
+
+                            return res.assume_init();
+                        }
+                    }
+                }
+            }
        }

-        res
+        for j1 in 0..ncols1 {
+            // TODO: avoid bound checks.
+            res.column_mut(j1)
+                .gemv_z(alpha.inlined_clone(), a, &b.column(j1));
+        }
+
+        unsafe { res.assume_init() }
    }
 }

@ -1372,49 +1492,6 @@ where
    ///
    /// mat.quadform_tr_with_workspace(&mut workspace, 10.0, &lhs, &mid, 5.0);
    /// assert_relative_eq!(mat, expected);
-    pub fn quadform_tr_with_workspace<D2: Dim, R3: Dim, C3: Dim, S3, D4: Dim, S4>(
-        &mut self,
-        work: &mut OVector<MaybeUninit<T>, D2>,
-        alpha: T,
-        lhs: &Matrix<T, R3, C3, S3>,
-        mid: &SquareMatrix<T, D4, S4>,
-        beta: T,
-    ) where
-        S3: Storage<T, R3, C3>,
-        S4: Storage<T, D4, D4>,
-        ShapeConstraint: DimEq<D1, D2> + DimEq<D1, R3> + DimEq<D2, R3> + DimEq<C3, D4>,
-        DefaultAllocator: Allocator<T, D2>,
-    {
-        let work = work.gemv_uninit(T::one(), lhs, &mid.column(0), T::zero());
-        self.ger(alpha.inlined_clone(), &work, &lhs.column(0), beta);
-
-        for j in 1..mid.ncols() {
-            work.gemv(T::one(), lhs, &mid.column(j), T::zero());
-            self.ger(alpha.inlined_clone(), &work, &lhs.column(j), T::one());
-        }
-    }
-
-    /// Computes the quadratic form `self = alpha * lhs * mid * lhs.transpose() + beta * self`.
-    ///
-    /// This allocates a workspace vector of dimension D1 for intermediate results.
-    /// If `D1` is a type-level integer, then the allocation is performed on the stack.
-    /// Use `.quadform_tr_with_workspace(...)` instead to avoid allocations.
-    ///
-    /// # Examples:
-    ///
-    /// ```
-    /// # #[macro_use] extern crate approx;
-    /// # use nalgebra::{Matrix2, Matrix3, Matrix2x3, Vector2};
-    /// let mut mat = Matrix2::identity();
-    /// let lhs = Matrix2x3::new(1.0, 2.0, 3.0,
-    ///                          4.0, 5.0, 6.0);
-    /// let mid = Matrix3::new(0.1, 0.2, 0.3,
-    ///                        0.5, 0.6, 0.7,
-    ///                        0.9, 1.0, 1.1);
-    /// let expected = lhs * mid * lhs.transpose() * 10.0 + mat * 5.0;
-    ///
-    /// mat.quadform_tr(10.0, &lhs, &mid, 5.0);
-    /// assert_relative_eq!(mat, expected);
    pub fn quadform_tr<R3: Dim, C3: Dim, S3, D4: Dim, S4>(
        &mut self,
        alpha: T,
@ -1424,11 +1501,19 @@ where
    ) where
        S3: Storage<T, R3, C3>,
        S4: Storage<T, D4, D4>,
-        ShapeConstraint: DimEq<D1, D1> + DimEq<D1, R3> + DimEq<C3, D4>,
-        DefaultAllocator: Allocator<T, D1>,
+        ShapeConstraint: DimEq<D1, R3> + DimEq<C3, D4>,
+        DefaultAllocator: Allocator<T, R3>,
    {
-        let mut work = Matrix::new_uninitialized_generic(self.data.shape().0, Const::<1>);
-        self.quadform_tr_with_workspace(&mut work, alpha, lhs, mid, beta)
+        let work = Matrix::new_uninitialized_generic(R3::from_usize(self.shape().0), Const::<1>);
+        work.gemv_z(T::one(), lhs, &mid.column(0));
+        let work = unsafe { work.assume_init() };
+
+        self.ger(alpha.inlined_clone(), &work, &lhs.column(0), beta);
+
+        for j in 1..mid.ncols() {
+            work.gemv(T::one(), lhs, &mid.column(j), T::zero());
+            self.ger(alpha.inlined_clone(), &work, &lhs.column(j), T::one());
+        }
    }

    /// Computes the quadratic form `self = alpha * rhs.transpose() * mid * rhs + beta * self`.
@ -1454,11 +1539,10 @@ where
    /// let mut workspace = DVector::new_random(3);
    /// let expected = rhs.transpose() * &mid * &rhs * 10.0 + &mat * 5.0;
    ///
-    /// mat.quadform_with_workspace(&mut workspace, 10.0, &mid, &rhs, 5.0);
+    /// mat.quadform(&mut workspace, 10.0, &mid, &rhs, 5.0);
    /// assert_relative_eq!(mat, expected);
-    pub fn quadform_with_workspace<D2: Dim, D3: Dim, S3, R4: Dim, C4: Dim, S4>(
+    pub fn quadform<D3: Dim, S3, R4: Dim, C4: Dim, S4>(
        &mut self,
-        work: &mut OVector<MaybeUninit<T>, D2>,
        alpha: T,
        mid: &SquareMatrix<T, D3, S3>,
        rhs: &Matrix<T, R4, C4, S4>,
@ -1466,54 +1550,21 @@ where
    ) where
        S3: Storage<T, D3, D3>,
        S4: Storage<T, R4, C4>,
-        ShapeConstraint:
-            DimEq<D3, R4> + DimEq<D1, C4> + DimEq<D2, D3> + AreMultipliable<C4, R4, D2, U1>,
-        DefaultAllocator: Allocator<T, D2>,
+        ShapeConstraint: DimEq<R4, D3> + DimEq<D3, R4> + DimEq<D1, C4>,
+        DefaultAllocator: Allocator<T, D3>,
    {
-        let work = work.gemv_uninit(T::one(), mid, &rhs.column(0), T::zero());
+        // TODO: figure out why type inference wasn't doing its job.
+        let work = Matrix::new_uninitialized_generic(D3::from_usize(self.shape().0), Const::<1>);
+        work.gemv_z::<D3, D3, R4, S3, _>(T::one(), mid, &rhs.column(0));
+        let work = unsafe { work.assume_init() };
+
        self.column_mut(0)
            .gemv_tr(alpha.inlined_clone(), rhs, &work, beta.inlined_clone());

        for j in 1..rhs.ncols() {
-            work.gemv(T::one(), mid, &rhs.column(j), T::zero());
+            work.gemv::<D3, D3, R4, S3, _>(T::one(), mid, &rhs.column(j), T::zero());
            self.column_mut(j)
                .gemv_tr(alpha.inlined_clone(), rhs, &work, beta.inlined_clone());
        }
    }
-
-    /// Computes the quadratic form `self = alpha * rhs.transpose() * mid * rhs + beta * self`.
-    ///
-    /// This allocates a workspace vector of dimension D2 for intermediate results.
-    /// If `D2` is a type-level integer, then the allocation is performed on the stack.
-    /// Use `.quadform_with_workspace(...)` instead to avoid allocations.
-    ///
-    /// ```
-    /// # #[macro_use] extern crate approx;
-    /// # use nalgebra::{Matrix2, Matrix3x2, Matrix3};
-    /// let mut mat = Matrix2::identity();
-    /// let rhs = Matrix3x2::new(1.0, 2.0,
-    ///                          3.0, 4.0,
-    ///                          5.0, 6.0);
-    /// let mid = Matrix3::new(0.1, 0.2, 0.3,
-    ///                        0.5, 0.6, 0.7,
-    ///                        0.9, 1.0, 1.1);
-    /// let expected = rhs.transpose() * mid * rhs * 10.0 + mat * 5.0;
-    ///
-    /// mat.quadform(10.0, &mid, &rhs, 5.0);
-    /// assert_relative_eq!(mat, expected);
-    pub fn quadform<D2: Dim, S2, R3: Dim, C3: Dim, S3>(
-        &mut self,
-        alpha: T,
-        mid: &SquareMatrix<T, D2, S2>,
-        rhs: &Matrix<T, R3, C3, S3>,
-        beta: T,
-    ) where
-        S2: Storage<T, D2, D2>,
-        S3: Storage<T, R3, C3>,
-        ShapeConstraint: DimEq<D2, R3> + DimEq<D1, C3> + AreMultipliable<C3, R3, D2, U1>,
-        DefaultAllocator: Allocator<T, D2>,
-    {
-        let mut work = Matrix::new_uninitialized_generic(mid.data.shape().0, Const::<1>);
-        self.quadform_with_workspace(&mut work, alpha, mid, rhs, beta)
-    }
 }
--- a/src/base/construction.rs
+++ b/src/base/construction.rs
@ -18,15 +18,14 @@ use typenum::{self, Cmp, Greater};

 use simba::scalar::{ClosedAdd, ClosedMul};

-use crate::{base::allocator::Allocator};
+use crate::base::allocator::{Allocator, InnerAllocator};
 use crate::base::dimension::{Dim, DimName, Dynamic, ToTypenum};
 use crate::base::storage::Storage;
 use crate::base::{
    ArrayStorage, Const, DefaultAllocator, Matrix, OMatrix, OVector, Scalar, Unit, Vector,
 };

-/// When "no_unsound_assume_init" is enabled, expands to `unimplemented!()` instead of `new_uninitialized_generic().assume_init()`.
-/// Intended as a placeholder, each callsite should be refactored to use uninitialized memory soundly
+/// OBJECTIVE: GET RID OF THIS!
 #[macro_export]
 macro_rules! unimplemented_or_uninitialized_generic {
    ($nrows:expr, $ncols:expr) => {{
@ -99,7 +98,7 @@ where
            "Matrix init. error: the slice did not contain the right number of elements."
        );

-        let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
+        let mut res = Self::new_uninitialized_generic(nrows, ncols);
        let mut iter = slice.iter();

        for i in 0..nrows.value() {
@ -117,7 +116,10 @@ where
    /// Creates a matrix with its elements filled with the components provided by a slice. The
    /// components must have the same layout as the matrix data storage (i.e. column-major).
    #[inline]
-    pub fn from_column_slice_generic(nrows: R, ncols: C, slice: &[T]) -> Self where T:Clone{
+    pub fn from_column_slice_generic(nrows: R, ncols: C, slice: &[T]) -> Self
+    where
+        T: Clone,
+    {
        Self::from_iterator_generic(nrows, ncols, slice.iter().cloned())
    }

@ -128,7 +130,7 @@ where
    where
        F: FnMut(usize, usize) -> T,
    {
-        let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
+        let mut res = Self::new_uninitialized_generic(nrows, ncols);

        for j in 0..ncols.value() {
            for i in 0..nrows.value() {
@ -139,7 +141,7 @@ where
        }

        // Safety: all entries have been initialized.
-        unsafe { res.assume_init()}
+        unsafe { res.assume_init() }
    }

    /// Creates a new identity matrix.
@ -352,7 +354,7 @@ where
    #[inline]
    pub fn from_diagonal<SB: Storage<T, D>>(diag: &Vector<T, D, SB>) -> Self
    where
-        T: Zero+Scalar,
+        T: Zero + Scalar,
    {
        let (dim, _) = diag.data.shape();
        let mut res = Self::zeros_generic(dim, dim);
@ -374,12 +376,6 @@ where
 */
 macro_rules! impl_constructors(
    ($($Dims: ty),*; $(=> $DimIdent: ident: $DimBound: ident),*; $($gargs: expr),*; $($args: ident),*) => {
-        /// Creates a new uninitialized matrix or vector.
-        #[inline]
-        pub unsafe fn new_uninitialized($($args: usize),*) -> MaybeUninit<Self> {
-            Self::new_uninitialized_generic($($gargs),*)
-        }
-
        /// Creates a matrix or vector with all its elements set to `elem`.
        ///
        /// # Example
@ -518,8 +514,7 @@ macro_rules! impl_constructors(
        ///         dm[(1, 0)] == 3 && dm[(1, 1)] == 4 && dm[(1, 2)] == 5);
        /// ```
        #[inline]
-        pub fn from_fn<F>($($args: usize,)* f: F) -> Self
-            where F: FnMut(usize, usize) -> T {
+        pub fn from_fn<F: FnMut(usize, usize) -> T>($($args: usize,)* f: F) -> Self {
            Self::from_fn_generic($($gargs, )* f)
        }

@ -543,7 +538,9 @@ macro_rules! impl_constructors(
        /// ```
        #[inline]
        pub fn identity($($args: usize,)*) -> Self
-            where T: Zero + One {
+        where
+            T: Zero + One + Scalar
+        {
            Self::identity_generic($($gargs),* )
        }

@ -566,7 +563,9 @@ macro_rules! impl_constructors(
        /// ```
        #[inline]
        pub fn from_diagonal_element($($args: usize,)* elt: T) -> Self
-            where T: Zero + One {
+        where
+            T: Zero + One + Scalar
+        {
            Self::from_diagonal_element_generic($($gargs, )* elt)
        }

@ -593,7 +592,9 @@ macro_rules! impl_constructors(
        /// ```
        #[inline]
        pub fn from_partial_diagonal($($args: usize,)* elts: &[T]) -> Self
-            where T: Zero {
+        where
+            T: Zero + Scalar
+        {
            Self::from_partial_diagonal_generic($($gargs, )* elts)
        }

@ -612,7 +613,9 @@ macro_rules! impl_constructors(
        #[inline]
        #[cfg(feature = "rand")]
        pub fn new_random($($args: usize),*) -> Self
-            where Standard: Distribution<T> {
+        where
+            Standard: Distribution<T>
+        {
            Self::new_random_generic($($gargs),*)
        }
    }
@ -630,6 +633,17 @@ where
    ); // Arguments for non-generic constructors.
 }

+impl<T, R: DimName, C: DimName> OMatrix<MaybeUninit<T>, R, C>
+where
+    DefaultAllocator: Allocator<T, R, C>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized() -> Self {
+        Self::new_uninitialized_generic(R::name(), C::name())
+    }
+}
+
 /// # Constructors of matrices with a dynamic number of columns
 impl<T, R: DimName> OMatrix<T, R, Dynamic>
 where
@ -641,6 +655,17 @@ where
                   ncols);
 }

+impl<T, R: DimName> OMatrix<MaybeUninit<T>, R, Dynamic>
+where
+    DefaultAllocator: Allocator<T, R, Dynamic>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized(ncols: usize) -> Self {
+        Self::new_uninitialized_generic(R::name(), Dynamic::new(ncols))
+    }
+}
+
 /// # Constructors of dynamic vectors and matrices with a dynamic number of rows
 impl<T, C: DimName> OMatrix<T, Dynamic, C>
 where
@ -652,6 +677,17 @@ where
                   nrows);
 }

+impl<T, C: DimName> OMatrix<MaybeUninit<T>, Dynamic, C>
+where
+    DefaultAllocator: Allocator<T, Dynamic, C>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized(nrows: usize) -> Self {
+        Self::new_uninitialized_generic(Dynamic::new(nrows), C::name())
+    }
+}
+
 /// # Constructors of fully dynamic matrices
 impl<T> OMatrix<T, Dynamic, Dynamic>
 where
@ -663,6 +699,17 @@ where
                   nrows, ncols);
 }

+impl<T> OMatrix<MaybeUninit<T>, Dynamic, Dynamic>
+where
+    DefaultAllocator: Allocator<T, Dynamic, Dynamic>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized(nrows: usize, ncols: usize) -> Self {
+        Self::new_uninitialized_generic(Dynamic::new(nrows), Dynamic::new(ncols))
+    }
+}
+
 /*
 *
 * Constructors that don't necessarily require all dimensions
@ -701,7 +748,10 @@ macro_rules! impl_constructors_from_data(
            ///         dm[(1, 0)] == 3 && dm[(1, 1)] == 4 && dm[(1, 2)] == 5);
            /// ```
            #[inline]
-            pub fn from_row_slice($($args: usize,)* $data: &[T]) -> Self {
+            pub fn from_row_slice($($args: usize,)* $data: &[T]) -> Self
+            where
+                T: Clone
+            {
                Self::from_row_slice_generic($($gargs, )* $data)
            }

@ -728,7 +778,10 @@ macro_rules! impl_constructors_from_data(
            ///         dm[(1, 0)] == 1 && dm[(1, 1)] == 3 && dm[(1, 2)] == 5);
            /// ```
            #[inline]
-            pub fn from_column_slice($($args: usize,)* $data: &[T]) -> Self {
+            pub fn from_column_slice($($args: usize,)* $data: &[T]) -> Self
+            where
+                T: Clone
+            {
                Self::from_column_slice_generic($($gargs, )* $data)
            }

--- a/src/base/conversion.rs
+++ b/src/base/conversion.rs
@ -27,14 +27,10 @@ use crate::constraint::DimEq;
 use crate::{IsNotStaticOne, RowSVector, SMatrix, SVector};

 // TODO: too bad this won't work for slice conversions.
-impl<T1, T2, R1, C1, R2, C2> SubsetOf<OMatrix<T2, R2, C2>> for OMatrix<T1, R1, C1>
+impl<T1, T2, R1: Dim, C1: Dim, R2: Dim, C2: Dim> SubsetOf<OMatrix<T2, R2, C2>>
+    for OMatrix<T1, R1, C1>
 where
-    R1: Dim,
-    C1: Dim,
-    R2: Dim,
-    C2: Dim,
-    T1: Scalar,
-    T2: Scalar + SupersetOf<T1>,
+    T2: SupersetOf<T1>,
    DefaultAllocator:
        Allocator<T2, R2, C2> + Allocator<T1, R1, C1> + SameShapeAllocator<T1, R1, C1, R2, C2>,
    ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>,
@ -45,7 +41,7 @@ where
        let nrows2 = R2::from_usize(nrows);
        let ncols2 = C2::from_usize(ncols);

-        let mut res = OMatrix::<T2, R2, C2>::new_uninitialized_generic(nrows2, ncols2);
+        let mut res = Matrix::new_uninitialized_generic(nrows2, ncols2);

        for i in 0..nrows {
            for j in 0..ncols {
@ -57,7 +53,7 @@ where
        }

        // Safety: all entries have been initialized.
-        unsafe { Matrix::assume_init(res) }
+        unsafe { res.assume_init() }
    }

    #[inline]
--- a/src/base/default_allocator.rs
+++ b/src/base/default_allocator.rs
@ -77,9 +77,13 @@ impl<T, const R: usize, const C: usize> Allocator<T, Const<R>, Const<C>> for Def
    unsafe fn assume_init(
        uninit: <Self as InnerAllocator<MaybeUninit<T>, Const<R>, Const<C>>>::Buffer,
    ) -> Owned<T, Const<R>, Const<C>> {
-        // Safety: MaybeUninit<T> has the same alignment and layout as T, and by
-        // extension so do arrays based on these.
-        mem::transmute(uninit)
+        // SAFETY:
+        // * The caller guarantees that all elements of the array are initialized
+        // * `MaybeUninit<T>` and T are guaranteed to have the same layout
+        // * MaybeUnint does not drop, so there are no double-frees
+        // * `ArrayStorage` is transparent.
+        // And thus the conversion is safe
+        ArrayStorage((&uninit as *const _ as *const [_; C]).read())
    }
 }

@ -205,7 +209,7 @@ where
        );

        // Safety: TODO
-        <Self as Allocator<_, RTO, CTO>>::assume_init(res)
+        <Self as Allocator<_, Const<RTO>, Const<CTO>>>::assume_init(res)
    }
 }

--- a/src/base/edition.rs
+++ b/src/base/edition.rs
@ -4,6 +4,7 @@ use std::cmp;
 use std::iter::ExactSizeIterator;
 #[cfg(any(feature = "std", feature = "alloc"))]
 use std::mem;
+use std::mem::MaybeUninit;
 use std::ptr;

 use crate::base::allocator::{Allocator, Reallocator};
@ -49,13 +50,10 @@ impl<T: Scalar + Zero, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
    where
        I: IntoIterator<Item = &'a usize>,
        I::IntoIter: ExactSizeIterator + Clone,
-        DefaultAllocator: Allocator<T, Dynamic, C>,
    {
        let irows = irows.into_iter();
        let ncols = self.data.shape().1;
-        let mut res = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(Dynamic::new(irows.len()), ncols)
-        };
+        let mut res =  OMatrix::<T, Dynamic, C>::new_uninitialized_generic(Dynamic::new(irows.len()), ncols);

        // First, check that all the indices from irows are valid.
        // This will allow us to use unchecked access in the inner loop.
@ -71,12 +69,12 @@ impl<T: Scalar + Zero, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
            for (destination, source) in irows.clone().enumerate() {
                unsafe {
                    *res.vget_unchecked_mut(destination) =
-                        src.vget_unchecked(*source).inlined_clone()
+                        MaybeUninit::new(src.vget_unchecked(*source).inlined_clone());
                }
            }
        }

-        res
+        unsafe { res.assume_init() }
    }

    /// Creates a new matrix by extracting the given set of columns from `self`.
@ -90,15 +88,19 @@ impl<T: Scalar + Zero, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
    {
        let icols = icols.into_iter();
        let nrows = self.data.shape().0;
-        let mut res = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(nrows, Dynamic::new(icols.len()))
-        };
+        let mut res = Matrix::new_uninitialized_generic(nrows, Dynamic::new(icols.len()));

        for (destination, source) in icols.enumerate() {
-            res.column_mut(destination).copy_from(&self.column(*source))
+            for (d, s) in res
+                .column_mut(destination)
+                .iter_mut()
+                .zip(self.column(*source).iter())
+            {
+                *d = MaybeUninit::new(s.clone());
+            }
        }

-        res
+        unsafe { res.assume_init() }
    }
 }

@ -190,7 +192,10 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {

    /// Sets all the diagonal elements of this matrix to `val`.
    #[inline]
-    pub fn fill_diagonal(&mut self, val: T) {
+    pub fn fill_diagonal(&mut self, val: T)
+    where
+        T: Clone,
+    {
        let (nrows, ncols) = self.shape();
        let n = cmp::min(nrows, ncols);

@ -201,19 +206,25 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {

    /// Sets all the elements of the selected row to `val`.
    #[inline]
-    pub fn fill_row(&mut self, i: usize, val: T) {
+    pub fn fill_row(&mut self, i: usize, val: T)
+    where
+        T: Clone,
+    {
        assert!(i < self.nrows(), "Row index out of bounds.");
        for j in 0..self.ncols() {
-            unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+            unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
        }
    }

    /// Sets all the elements of the selected column to `val`.
    #[inline]
-    pub fn fill_column(&mut self, j: usize, val: T) {
+    pub fn fill_column(&mut self, j: usize, val: T)
+    where
+        T: Clone,
+    {
        assert!(j < self.ncols(), "Row index out of bounds.");
        for i in 0..self.nrows() {
-            unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+            unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
        }
    }

@ -225,10 +236,13 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
    /// * If `shift > 1`, then the diagonal and the first `shift - 1` subdiagonals are left
    /// untouched.
    #[inline]
-    pub fn fill_lower_triangle(&mut self, val: T, shift: usize) {
+    pub fn fill_lower_triangle(&mut self, val: T, shift: usize)
+    where
+        T: Clone,
+    {
        for j in 0..self.ncols() {
            for i in (j + shift)..self.nrows() {
-                unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+                unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
            }
        }
    }
@ -241,12 +255,15 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
    /// * If `shift > 1`, then the diagonal and the first `shift - 1` superdiagonals are left
    /// untouched.
    #[inline]
-    pub fn fill_upper_triangle(&mut self, val: T, shift: usize) {
+    pub fn fill_upper_triangle(&mut self, val: T, shift: usize)
+    where
+        T: Clone,
+    {
        for j in shift..self.ncols() {
            // TODO: is there a more efficient way to avoid the min ?
            // (necessary for rectangular matrices)
            for i in 0..cmp::min(j + 1 - shift, self.nrows()) {
-                unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+                unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
            }
        }
    }
@ -921,9 +938,8 @@ impl<T: Scalar> OMatrix<T, Dynamic, Dynamic> {
    where
        DefaultAllocator: Reallocator<T, Dynamic, Dynamic, Dynamic, Dynamic>,
    {
-        let placeholder = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(Dynamic::new(0), Dynamic::new(0))
-        };
+        let placeholder =
+            Matrix::new_uninitialized_generic(Dynamic::new(0), Dynamic::new(0)).assume_init();
        let old = mem::replace(self, placeholder);
        let new = old.resize(new_nrows, new_ncols, val);
        let _ = mem::replace(self, new);
@ -946,9 +962,7 @@ where
    where
        DefaultAllocator: Reallocator<T, Dynamic, C, Dynamic, C>,
    {
-        let placeholder = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(Dynamic::new(0), self.data.shape().1)
-        };
+        let placeholder = Matrix::from_fn_generic(Dynamic::new(0), self.data.shape().1, |_, _| val);
        let old = mem::replace(self, placeholder);
        let new = old.resize_vertically(new_nrows, val);
        let _ = mem::replace(self, new);
@ -971,9 +985,7 @@ where
    where
        DefaultAllocator: Reallocator<T, R, Dynamic, R, Dynamic>,
    {
-        let placeholder = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(self.data.shape().0, Dynamic::new(0))
-        };
+        let placeholder = Matrix::from_fn_generic(self.data.shape().0, Dynamic::new(0), |_, _| val);
        let old = mem::replace(self, placeholder);
        let new = old.resize_horizontally(new_ncols, val);
        let _ = mem::replace(self, new);
--- a/src/base/matrix.rs
+++ b/src/base/matrix.rs
@ -29,7 +29,7 @@ use crate::base::storage::{
    ContiguousStorage, ContiguousStorageMut, Owned, SameShapeStorage, Storage, StorageMut,
 };
 use crate::base::{Const, DefaultAllocator, OMatrix, OVector, Scalar, Unit};
-use crate::{ArrayStorage, SMatrix, SimdComplexField};
+use crate::{ArrayStorage, MatrixSlice, MatrixSliceMut, SMatrix, SimdComplexField};

 #[cfg(any(feature = "std", feature = "alloc"))]
 use crate::{DMatrix, DVector, Dynamic, VecStorage};
@ -347,16 +347,13 @@ impl<T, R, C, S> Matrix<T, R, C, S> {
    }
 }

-impl<T, R: Dim, C: Dim> OMatrix<MaybeUninit<T>, R, C>
+impl<T, R: Dim, C: Dim> OMatrix<T, R, C>
 where
    DefaultAllocator: Allocator<T, R, C>,
 {
    /// Allocates a matrix with the given number of rows and columns without initializing its content.
-    ///
-    /// Note: calling `Self::new_uninitialized_generic` is often **not** what you want to do. Consider
-    /// calling `Matrix::new_uninitialized_generic` instead.
-    pub fn new_uninitialized_generic(nrows: R, ncols: C) -> Self {
-        Self {
+    pub fn new_uninitialized_generic(nrows: R, ncols: C) -> OMatrix<MaybeUninit<T>, R, C> {
+        OMatrix {
            data: <DefaultAllocator as Allocator<T, R, C>>::allocate_uninitialized(nrows, ncols),
            _phantoms: PhantomData,
        }
@ -376,6 +373,24 @@ where
    }
 }

+impl<T, R: Dim, C: Dim, S> Matrix<MaybeUninit<T>, R, C, S> {
+    /// Creates a full slice from `self` and assumes it to be initialized.
+    pub unsafe fn assume_init_ref(&self) -> MatrixSlice<T, R, C, S::RStride, S::CStride>
+    where
+        S: Storage<MaybeUninit<T>, R, C>,
+    {
+        self.full_slice().slice_assume_init()
+    }
+
+    /// Creates a full mutable slice from `self` and assumes it to be initialized.
+    pub unsafe fn assume_init_mut(&mut self) -> MatrixSliceMut<T, R, C, S::RStride, S::CStride>
+    where
+        S: StorageMut<MaybeUninit<T>, R, C>,
+    {
+        self.full_slice_mut().slice_assume_init()
+    }
+}
+
 impl<T, const R: usize, const C: usize> SMatrix<T, R, C> {
    /// Creates a new statically-allocated matrix from the given [ArrayStorage].
    ///
@ -428,6 +443,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {

    /// Creates a new uninitialized matrix with the given uninitialized data
    pub unsafe fn from_uninitialized_data(data: MaybeUninit<S>) -> MaybeUninit<Self> {
+        // BEEP BEEP this doesn't seem good
        let res: Matrix<T, R, C, MaybeUninit<S>> = Matrix {
            data,
            _phantoms: PhantomData,
@ -493,6 +509,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
    /// let slice = mat.slice_with_steps((0, 0), (5, 3), (1, 2));
    /// // The column strides is the number of steps (here 2) multiplied by the corresponding dimension.
    /// assert_eq!(mat.strides(), (1, 10));
+    /// ```
    #[inline]
    #[must_use]
    pub fn strides(&self) -> (usize, usize) {
@ -657,7 +674,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
            }
        }

-        unsafe { res.assume_init()}
+        unsafe { res.assume_init() }
    }

    /// Transposes `self` and store the result into `out`, which will become
@ -815,7 +832,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
    {
        let (nrows, ncols) = self.data.shape();

-        let mut res = OMatrix::<N3, R, C>::new_uninitialized_generic(nrows, ncols);
+        let mut res = OMatrix::new_uninitialized_generic(nrows, ncols);

        assert_eq!(
            (nrows.value(), ncols.value()),
@ -1201,13 +1218,25 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
        }
    }

-    /// Fills this matrix with the content of another one. Both must have the same shape.
+    /// Fills this matrix with the content of another one via clones. Both must have the same shape.
    #[inline]
    pub fn copy_from<R2: Dim, C2: Dim, SB>(&mut self, other: &Matrix<T, R2, C2, SB>)
    where
        T: Clone,
        SB: Storage<T, R2, C2>,
        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+    {
+        self.copy_from_fn(other, T::clone)
+    }
+
+    /// Fills this matrix with the content of another one, after applying a function to
+    /// the references of the entries of the other matrix. Both must have the same shape.
+    #[inline]
+    pub fn copy_from_fn<U, R2: Dim, C2: Dim, SB, F>(&mut self, other: &Matrix<U, R2, C2, SB>, f: F)
+    where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+        F: FnMut(&U) -> T,
    {
        assert!(
            self.shape() == other.shape(),
@ -1217,19 +1246,68 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
        for j in 0..self.ncols() {
            for i in 0..self.nrows() {
                unsafe {
-                    *self.get_unchecked_mut((i, j)) = other.get_unchecked((i, j)).clone();
+                    *self.get_unchecked_mut((i, j)) = f(other.get_unchecked((i, j)));
                }
            }
        }
    }

-    /// Fills this matrix with the content of the transpose another one.
+    /// Fills this matrix with the content of another one, after applying a function to
+    /// the entries of the other matrix. Both must have the same shape.
+    #[inline]
+    pub fn move_from<R2: Dim, C2: Dim, SB>(&mut self, other: Matrix<T, R2, C2, SB>)
+    where
+        SB: Storage<T, R2, C2>,
+        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+    {
+        self.move_from_fn(other, |e| e)
+    }
+
+    /// Fills this matrix with the content of another one via moves. Both must have the same shape.
+    #[inline]
+    pub fn move_from_fn<U, R2: Dim, C2: Dim, SB, F>(&mut self, other: Matrix<U, R2, C2, SB>, f: F)
+    where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+        F: FnMut(U) -> T,
+    {
+        assert!(
+            self.shape() == other.shape(),
+            "Unable to move from a matrix with a different shape."
+        );
+
+        for j in 0..self.ncols() {
+            for i in 0..self.nrows() {
+                unsafe {
+                    *self.get_unchecked_mut((i, j)) = f(*other.get_unchecked((i, j)));
+                }
+            }
+        }
+    }
+
+    /// Fills this matrix with the content of the transpose another one via clones.
    #[inline]
    pub fn tr_copy_from<R2: Dim, C2: Dim, SB>(&mut self, other: &Matrix<T, R2, C2, SB>)
    where
        T: Clone,
        SB: Storage<T, R2, C2>,
        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+    {
+        self.tr_copy_from_fn(other, T::clone)
+    }
+
+    /// Fills this matrix with the content of the transpose of another one, after applying
+    /// a function to the references of the entries of the other matrix. Both must have the
+    /// same shape.
+    #[inline]
+    pub fn tr_copy_from_fn<U, R2: Dim, C2: Dim, SB, F>(
+        &mut self,
+        other: &Matrix<U, R2, C2, SB>,
+        f: F,
+    ) where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+        F: FnMut(&U) -> T,
    {
        let (nrows, ncols) = self.shape();
        assert!(
@ -1240,7 +1318,44 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
        for j in 0..ncols {
            for i in 0..nrows {
                unsafe {
-                    *self.get_unchecked_mut((i, j)) = other.get_unchecked((j, i)).clone();
+                    *self.get_unchecked_mut((i, j)) = f(other.get_unchecked((j, i)));
+                }
+            }
+        }
+    }
+
+    /// Fills this matrix with the content of the transpose another one via moves.
+    #[inline]
+    pub fn tr_move_from<R2: Dim, C2: Dim, SB>(&mut self, other: Matrix<T, R2, C2, SB>)
+    where
+        SB: Storage<T, R2, C2>,
+        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+    {
+        self.tr_move_from_fn(other, |e| e)
+    }
+
+    /// Fills this matrix with the content of the transpose of another one, after applying
+    /// a function to the entries of the other matrix. Both must have the same shape.
+    #[inline]
+    pub fn tr_move_from_fn<U, R2: Dim, C2: Dim, SB, F>(
+        &mut self,
+        other: Matrix<U, R2, C2, SB>,
+        f: F,
+    ) where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+        F: FnMut(U) -> T,
+    {
+        let (nrows, ncols) = self.shape();
+        assert!(
+            (ncols, nrows) == other.shape(),
+            "Unable to move from a matrix with incompatible shape."
+        );
+
+        for j in 0..ncols {
+            for i in 0..nrows {
+                unsafe {
+                    *self.get_unchecked_mut((i, j)) = f(*other.get_unchecked((j, i)));
                }
            }
        }
@ -1316,11 +1431,9 @@ impl<T, D: Dim, S: StorageMut<T, D, D>> Matrix<T, D, D, S> {
 impl<T: SimdComplexField, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
    /// Takes the adjoint (aka. conjugate-transpose) of `self` and store the result into `out`.
    #[inline]
-    pub fn adjoint_to<R2, C2, SB>(&self, out: &mut Matrix<MaybeUninit<T>, R2, C2, SB>)
+    pub fn adjoint_to<R2: Dim, C2: Dim, SB>(&self, out: &mut Matrix<MaybeUninit<T>, R2, C2, SB>)
    where
-        R2: Dim,
-        C2: Dim,
-        SB: StorageMut<T, R2, C2>,
+        SB: StorageMut<MaybeUninit<T>, R2, C2>,
        ShapeConstraint: SameNumberOfRows<R, C2> + SameNumberOfColumns<C, R2>,
    {
        let (nrows, ncols) = self.shape();
@ -1348,23 +1461,20 @@ impl<T: SimdComplexField, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S
        DefaultAllocator: Allocator<T, C, R>,
    {
        let (nrows, ncols) = self.data.shape();
+        let mut res = OMatrix::new_uninitialized_generic(ncols, nrows);
+        self.adjoint_to(&mut res);

-        unsafe {
-            let mut res = OMatrix::new_uninitialized_generic(ncols, nrows);
-            self.adjoint_to(&mut res);
-
-            res
-        }
+        unsafe { res.assume_init() }
    }

    /// Takes the conjugate and transposes `self` and store the result into `out`.
    #[deprecated(note = "Renamed `self.adjoint_to(out)`.")]
    #[inline]
-    pub fn conjugate_transpose_to<R2, C2, SB>(&self, out: &mut Matrix<T, R2, C2, SB>)
-    where
-        R2: Dim,
-        C2: Dim,
-        SB: StorageMut<T, R2, C2>,
+    pub fn conjugate_transpose_to<R2: Dim, C2: Dim, SB>(
+        &self,
+        out: &mut Matrix<MaybeUninit<T>, R2, C2, SB>,
+    ) where
+        SB: StorageMut<MaybeUninit<T>, R2, C2>,
        ShapeConstraint: SameNumberOfRows<R, C2> + SameNumberOfColumns<C, R2>,
    {
        self.adjoint_to(out)
@ -1495,7 +1605,7 @@ impl<T, D: Dim, S: Storage<T, D, D>> SquareMatrix<T, D, S> {
        );

        let dim = self.data.shape().0;
-        let mut res = OVector::<T2, D>::new_uninitialized_generic(dim, Const::<1>);
+        let mut res = OVector::new_uninitialized_generic(dim, Const::<1>);

        for i in 0..dim.value() {
            unsafe {
@ -1505,7 +1615,7 @@ impl<T, D: Dim, S: Storage<T, D, D>> SquareMatrix<T, D, S> {
        }

        // Safety: we have initialized all entries.
-        unsafe { Matrix::assume_init(res) }
+        unsafe { res.assume_init() }
    }

    /// Computes a trace of a square matrix, i.e., the sum of its diagonal elements.
@ -1630,13 +1740,12 @@ impl<T: Clone + Zero, D: DimAdd<U1>, S: Storage<T, D>> Vector<T, D, S> {
    {
        let len = self.len();
        let hnrows = DimSum::<D, U1>::from_usize(len + 1);
-        let mut res: OVector<T, _> =
-            unsafe { crate::unimplemented_or_uninitialized_generic!(hnrows, Const::<1>) };
+        let mut res = OVector::new_uninitialized_generic(hnrows, Const::<1>);
        res.generic_slice_mut((0, 0), self.data.shape())
-            .copy_from(self);
-        res[(len, 0)] = element;
+            .copy_from_fn(self, |e| MaybeUninit::new(e.clone()));
+        res[(len, 0)] = MaybeUninit::new(element);

-        res
+        unsafe { res.assume_init() }
    }
 }

@ -1953,10 +2062,11 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
    /// dynamically-sized matrices and statically-sized 3D matrices.
    #[inline]
    #[must_use]
-    pub fn cross<R2, C2, SB>(&self, b: &Matrix<T, R2, C2, SB>) -> MatrixCross<T, R, C, R2, C2>
+    pub fn cross<R2: Dim, C2: Dim, SB>(
+        &self,
+        b: &Matrix<T, R2, C2, SB>,
+    ) -> MatrixCross<T, R, C, R2, C2>
    where
-        R2: Dim,
-        C2: Dim,
        SB: Storage<T, R2, C2>,
        DefaultAllocator: SameShapeAllocator<T, R, C, R2, C2>,
        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
@ -1974,8 +2084,7 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
                // TODO: soooo ugly!
                let nrows = SameShapeR::<R, R2>::from_usize(3);
                let ncols = SameShapeC::<C, C2>::from_usize(1);
-                let mut res: MatrixCross<T, R, C, R2, C2> =
-                    crate::unimplemented_or_uninitialized_generic!(nrows, ncols);
+                let mut res = Matrix::new_uninitialized_generic(nrows, ncols);

                let ax = self.get_unchecked((0, 0));
                let ay = self.get_unchecked((1, 0));
@ -1985,22 +2094,27 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
                let by = b.get_unchecked((1, 0));
                let bz = b.get_unchecked((2, 0));

-                *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone()
-                    - az.inlined_clone() * by.inlined_clone();
-                *res.get_unchecked_mut((1, 0)) = az.inlined_clone() * bx.inlined_clone()
-                    - ax.inlined_clone() * bz.inlined_clone();
-                *res.get_unchecked_mut((2, 0)) = ax.inlined_clone() * by.inlined_clone()
-                    - ay.inlined_clone() * bx.inlined_clone();
+                *res.get_unchecked_mut((0, 0)) = MaybeUninit::new(
+                    ay.inlined_clone() * bz.inlined_clone()
+                        - az.inlined_clone() * by.inlined_clone(),
+                );
+                *res.get_unchecked_mut((1, 0)) = MaybeUninit::new(
+                    az.inlined_clone() * bx.inlined_clone()
+                        - ax.inlined_clone() * bz.inlined_clone(),
+                );
+                *res.get_unchecked_mut((2, 0)) = MaybeUninit::new(
+                    ax.inlined_clone() * by.inlined_clone()
+                        - ay.inlined_clone() * bx.inlined_clone(),
+                );

-                res
+                res.assume_init()
            }
        } else {
            unsafe {
                // TODO: ugly!
                let nrows = SameShapeR::<R, R2>::from_usize(1);
                let ncols = SameShapeC::<C, C2>::from_usize(3);
-                let mut res: MatrixCross<T, R, C, R2, C2> =
-                    crate::unimplemented_or_uninitialized_generic!(nrows, ncols);
+                let mut res = Matrix::new_uninitialized_generic(nrows, ncols);

                let ax = self.get_unchecked((0, 0));
                let ay = self.get_unchecked((0, 1));
@ -2010,14 +2124,20 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
                let by = b.get_unchecked((0, 1));
                let bz = b.get_unchecked((0, 2));

-                *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone()
-                    - az.inlined_clone() * by.inlined_clone();
-                *res.get_unchecked_mut((0, 1)) = az.inlined_clone() * bx.inlined_clone()
-                    - ax.inlined_clone() * bz.inlined_clone();
-                *res.get_unchecked_mut((0, 2)) = ax.inlined_clone() * by.inlined_clone()
-                    - ay.inlined_clone() * bx.inlined_clone();
+                *res.get_unchecked_mut((0, 0)) = MaybeUninit::new(
+                    ay.inlined_clone() * bz.inlined_clone()
+                        - az.inlined_clone() * by.inlined_clone(),
+                );
+                *res.get_unchecked_mut((0, 1)) = MaybeUninit::new(
+                    az.inlined_clone() * bx.inlined_clone()
+                        - ax.inlined_clone() * bz.inlined_clone(),
+                );
+                *res.get_unchecked_mut((0, 2)) = MaybeUninit::new(
+                    ax.inlined_clone() * by.inlined_clone()
+                        - ay.inlined_clone() * bx.inlined_clone(),
+                );

-                res
+                res.assume_init()
            }
        }
    }
--- a/src/base/matrix_slice.rs
+++ b/src/base/matrix_slice.rs
@ -1,4 +1,5 @@
 use std::marker::PhantomData;
+use std::mem::MaybeUninit;
 use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo};
 use std::slice;

@ -218,6 +219,22 @@ macro_rules! storage_impl(

 storage_impl!(SliceStorage, SliceStorageMut);

+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    SliceStorage<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn assume_init(self) -> SliceStorage<'a, T, R, C, RStride, CStride> {
+        Self::from_raw_parts(self.ptr as *const T, self.shape, self.strides)
+    }
+}
+
+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    SliceStorageMut<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn assume_init(self) -> SliceStorageMut<'a, T, R, C, RStride, CStride> {
+        Self::from_raw_parts(self.ptr as *mut T, self.shape, self.strides)
+    }
+}
+
 unsafe impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim> StorageMut<T, R, C>
    for SliceStorageMut<'a, T, R, C, RStride, CStride>
 {
@ -242,10 +259,12 @@ unsafe impl<'a, T, R: Dim, CStride: Dim> ContiguousStorage<T, R, U1>
    for SliceStorage<'a, T, R, U1, U1, CStride>
 {
 }
+
 unsafe impl<'a, T, R: Dim, CStride: Dim> ContiguousStorage<T, R, U1>
    for SliceStorageMut<'a, T, R, U1, U1, CStride>
 {
 }
+
 unsafe impl<'a, T, R: Dim, CStride: Dim> ContiguousStorageMut<T, R, U1>
    for SliceStorageMut<'a, T, R, U1, U1, CStride>
 {
@ -255,10 +274,12 @@ unsafe impl<'a, T, R: DimName, C: Dim + IsNotStaticOne> ContiguousStorage<T, R,
    for SliceStorage<'a, T, R, C, U1, R>
 {
 }
+
 unsafe impl<'a, T, R: DimName, C: Dim + IsNotStaticOne> ContiguousStorage<T, R, C>
    for SliceStorageMut<'a, T, R, C, U1, R>
 {
 }
+
 unsafe impl<'a, T, R: DimName, C: Dim + IsNotStaticOne> ContiguousStorageMut<T, R, C>
    for SliceStorageMut<'a, T, R, C, U1, R>
 {
@ -312,6 +333,7 @@ macro_rules! matrix_slice_impl(
     $fixed_slice_with_steps: ident,
     $generic_slice: ident,
     $generic_slice_with_steps: ident,
+     $full_slice: ident,
     $rows_range_pair: ident,
     $columns_range_pair: ident) => {
        /*
@ -370,7 +392,7 @@ macro_rules! matrix_slice_impl(
        pub fn $rows_generic<RSlice: Dim>($me: $Me, row_start: usize, nrows: RSlice)
            -> $MatrixSlice<T, RSlice, C, S::RStride, S::CStride> {

-            let my_shape   = $me.data.shape();
+            let my_shape  = $me.data.shape();
            $me.assert_slice_index((row_start, 0), (nrows.value(), my_shape.1.value()), (0, 0));

            let shape = (nrows, my_shape.1);
@ -388,12 +410,12 @@ macro_rules! matrix_slice_impl(
            -> $MatrixSlice<T, RSlice, C, Dynamic, S::CStride>
            where RSlice: Dim {

-            let my_shape   = $me.data.shape();
+            let my_shape = $me.data.shape();
            let my_strides = $me.data.strides();
            $me.assert_slice_index((row_start, 0), (nrows.value(), my_shape.1.value()), (step, 0));

            let strides = (Dynamic::new((step + 1) * my_strides.0.value()), my_strides.1);
-            let shape   = (nrows, my_shape.1);
+            let shape = (nrows, my_shape.1);

            unsafe {
                let data = $SliceStorage::new_with_strides_unchecked($data, (row_start, 0), shape, strides);
@ -468,20 +490,19 @@ macro_rules! matrix_slice_impl(
            }
        }

-
        /// Extracts from this matrix `ncols` columns skipping `step` columns. Both argument may
        /// or may not be values known at compile-time.
        #[inline]
        pub fn $columns_generic_with_step<CSlice: Dim>($me: $Me, first_col: usize, ncols: CSlice, step: usize)
            -> $MatrixSlice<T, R, CSlice, S::RStride, Dynamic> {

-            let my_shape   = $me.data.shape();
+            let my_shape = $me.data.shape();
            let my_strides = $me.data.strides();

            $me.assert_slice_index((0, first_col), (my_shape.0.value(), ncols.value()), (0, step));

            let strides = (my_strides.0, Dynamic::new((step + 1) * my_strides.1.value()));
-            let shape   = (my_shape.0, ncols);
+            let shape = (my_shape.0, ncols);

            unsafe {
                let data = $SliceStorage::new_with_strides_unchecked($data, (0, first_col), shape, strides);
@ -509,7 +530,6 @@ macro_rules! matrix_slice_impl(
            }
        }

-
        /// Slices this matrix starting at its component `(start.0, start.1)` and with
        /// `(shape.0, shape.1)` components. Each row (resp. column) of the sliced matrix is
        /// separated by `steps.0` (resp. `steps.1`) ignored rows (resp. columns) of the
@ -550,11 +570,9 @@ macro_rules! matrix_slice_impl(

        /// Creates a slice that may or may not have a fixed size and stride.
        #[inline]
-        pub fn $generic_slice<RSlice, CSlice>($me: $Me, start: (usize, usize), shape: (RSlice, CSlice))
+        pub fn $generic_slice<RSlice: Dim, CSlice: Dim>($me: $Me, start: (usize, usize), shape: (RSlice, CSlice))
            -> $MatrixSlice<T, RSlice, CSlice, S::RStride, S::CStride>
-            where RSlice: Dim,
-                  CSlice: Dim {
-
+        {
            $me.assert_slice_index(start, (shape.0.value(), shape.1.value()), (0, 0));

            unsafe {
@ -585,6 +603,12 @@ macro_rules! matrix_slice_impl(
            }
        }

+        /// Returns a slice containing the entire matrix.
+        pub fn $full_slice($me: $Me) -> $MatrixSlice<T, R, C, S::RStride, S::CStride> {
+            let (nrows, ncols) = $me.shape();
+            $me.generic_slice((0, 0), (R::from_usize(nrows), C::from_usize(ncols)))
+        }
+
        /*
         *
         * Splitting.
@ -697,6 +721,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
     fixed_slice_with_steps,
     generic_slice,
     generic_slice_with_steps,
+     full_slice,
     rows_range_pair,
     columns_range_pair);
 }
@ -727,10 +752,27 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
     fixed_slice_with_steps_mut,
     generic_slice_mut,
     generic_slice_with_steps_mut,
+     full_slice_mut,
     rows_range_pair_mut,
     columns_range_pair_mut);
 }

+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    MatrixSlice<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn slice_assume_init(self) -> MatrixSlice<'a, T, R, C, RStride, CStride> {
+        Matrix::from_data(self.data.assume_init())
+    }
+}
+
+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    MatrixSliceMut<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn slice_assume_init(self) -> MatrixSliceMut<'a, T, R, C, RStride, CStride> {
+        Matrix::from_data(self.data.assume_init())
+    }
+}
+
 /// A range with a size that may be known at compile-time.
 ///
 /// This may be:
--- a/src/base/ops.rs
+++ b/src/base/ops.rs
@ -24,7 +24,7 @@ use crate::SimdComplexField;
 * Indexing.
 *
 */
-impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Index<usize> for Matrix<T, R, C, S> {
+impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Index<usize> for Matrix<T, R, C, S> {
    type Output = T;

    #[inline]
@ -36,7 +36,6 @@ impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Index<usize> for Matrix<T,

 impl<T, R: Dim, C: Dim, S> Index<(usize, usize)> for Matrix<T, R, C, S>
 where
-    T: Scalar,
    S: Storage<T, R, C>,
 {
    type Output = T;
@ -54,7 +53,7 @@ where
 }

 // Mutable versions.
-impl<T: Scalar, R: Dim, C: Dim, S: StorageMut<T, R, C>> IndexMut<usize> for Matrix<T, R, C, S> {
+impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> IndexMut<usize> for Matrix<T, R, C, S> {
    #[inline]
    fn index_mut(&mut self, i: usize) -> &mut T {
        let ij = self.vector_to_matrix_index(i);
@ -64,7 +63,6 @@ impl<T: Scalar, R: Dim, C: Dim, S: StorageMut<T, R, C>> IndexMut<usize> for Matr

 impl<T, R: Dim, C: Dim, S> IndexMut<(usize, usize)> for Matrix<T, R, C, S>
 where
-    T: Scalar,
    S: StorageMut<T, R, C>,
 {
    #[inline]
@ -139,15 +137,15 @@ macro_rules! componentwise_binop_impl(
     $TraitAssign: ident, $method_assign: ident, $method_assign_statically_unchecked: ident,
     $method_assign_statically_unchecked_rhs: ident;
     $method_to: ident, $method_to_statically_unchecked: ident) => {
-
        impl<T, R1: Dim, C1: Dim, SA: Storage<T, R1, C1>> Matrix<T, R1, C1, SA>
-            where T: Scalar + $bound {
-
+        where
+            T: Scalar + $bound
+        {
            /*
             *
             * Methods without dimension checking at compile-time.
-             * This is useful for code reuse because the sum representative system does not plays
-             * easily with static checks.
+             * This is useful for code reuse because the sum representative system does not play
+             * nicely with static checks.
             *
             */
            #[inline]
@ -155,7 +153,7 @@ macro_rules! componentwise_binop_impl(
                &self, rhs: &Matrix<T, R2, C2, SB>, out: &mut Matrix<MaybeUninit<T>, R3, C3, SC>
            ) where
                SB: Storage<T, R2, C2>,
-                SC: StorageMut<T, R3, C3> + StorageMut<MaybeUninit<T>, R3, C3>
+                SC: StorageMut<MaybeUninit<T>, R3, C3>
            {
                assert_eq!(self.shape(), rhs.shape(), "Matrix addition/subtraction dimensions mismatch.");
                assert_eq!(self.shape(), out.shape(), "Matrix addition/subtraction output dimensions mismatch.");
@ -184,13 +182,13 @@ macro_rules! componentwise_binop_impl(
                }
            }

-
            #[inline]
-            fn $method_assign_statically_unchecked<R2, C2, SB>(&mut self, rhs: &Matrix<T, R2, C2, SB>)
-                where R2: Dim,
-                      C2: Dim,
-                      SA: StorageMut<T, R1, C1>,
-                      SB: Storage<T, R2, C2> {
+            fn $method_assign_statically_unchecked<R2: Dim, C2: Dim, SB>(
+                &mut self, rhs: &Matrix<T, R2, C2, SB>
+            ) where
+                SA: StorageMut<T, R1, C1>,
+                SB: Storage<T, R2, C2>
+            {
                assert_eq!(self.shape(), rhs.shape(), "Matrix addition/subtraction dimensions mismatch.");

                // This is the most common case and should be deduced at compile-time.
@ -213,12 +211,12 @@ macro_rules! componentwise_binop_impl(
                }
            }

-
            #[inline]
-            fn $method_assign_statically_unchecked_rhs<R2, C2, SB>(&self, rhs: &mut Matrix<T, R2, C2, SB>)
-                where R2: Dim,
-                      C2: Dim,
-                      SB: StorageMut<T, R2, C2> {
+            fn $method_assign_statically_unchecked_rhs<R2: Dim, C2: Dim, SB>(
+                &self, rhs: &mut Matrix<T, R2, C2, SB>
+            ) where
+                SB: StorageMut<T, R2, C2>
+            {
                assert_eq!(self.shape(), rhs.shape(), "Matrix addition/subtraction dimensions mismatch.");

                // This is the most common case and should be deduced at compile-time.
@ -253,14 +251,19 @@ macro_rules! componentwise_binop_impl(
             */
            /// Equivalent to `self + rhs` but stores the result into `out` to avoid allocations.
            #[inline]
-            pub fn $method_to<R2: Dim, C2: Dim, SB,
-                              R3: Dim, C3: Dim, SC>(&self,
-                                                    rhs: &Matrix<T, R2, C2, SB>,
-                                                    out: &mut Matrix<T, R3, C3, SC>)
-                where SB: Storage<T, R2, C2>,
-                      SC: StorageMut<T, R3, C3>,
-                      ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> +
-                                       SameNumberOfRows<R1, R3> + SameNumberOfColumns<C1, C3> {
+            pub fn $method_to<R2: Dim, C2: Dim, SB, R3: Dim, C3: Dim, SC>(
+                &self,
+                rhs: &Matrix<T, R2, C2, SB>,
+                out: &mut Matrix<MaybeUninit<T>, R3, C3, SC>
+            ) where
+                SB: Storage<T, R2, C2>,
+                SC: StorageMut<MaybeUninit<T>, R3, C3>,
+                ShapeConstraint:
+                    SameNumberOfRows<R1, R2> +
+                    SameNumberOfColumns<C1, C2> +
+                    SameNumberOfRows<R1, R3> +
+                    SameNumberOfColumns<C1, C3>
+            {
                self.$method_to_statically_unchecked(rhs, out)
            }
        }
@ -283,13 +286,14 @@ macro_rules! componentwise_binop_impl(
            }
        }

-        impl<'a, T, R1, C1, R2, C2, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: Storage<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  DefaultAllocator: SameShapeAllocator<T, R2, C2, R1, C1>,
-                  ShapeConstraint:  SameNumberOfRows<R2, R1> + SameNumberOfColumns<C2, C1> {
+        impl<'a, T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: Storage<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            DefaultAllocator: SameShapeAllocator<T, R2, C2, R1, C1>,
+            ShapeConstraint:  SameNumberOfRows<R2, R1> + SameNumberOfColumns<C2, C1>
+        {
            type Output = MatrixSum<T, R2, C2, R1, C1>;

            #[inline]
@ -301,13 +305,14 @@ macro_rules! componentwise_binop_impl(
            }
        }

-        impl<T, R1, C1, R2, C2, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: Storage<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
-                  ShapeConstraint:  SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
+        impl<T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: Storage<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
+            ShapeConstraint:  SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
            type Output = MatrixSum<T, R1, C1, R2, C2>;

            #[inline]
@ -316,49 +321,48 @@ macro_rules! componentwise_binop_impl(
            }
        }

-        impl<'a, 'b, T, R1, C1, R2, C2, SA, SB> $Trait<&'b Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: Storage<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
-                  ShapeConstraint:  SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
+        impl<'a, 'b, T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $Trait<&'b Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: Storage<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
+            ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
            type Output = MatrixSum<T, R1, C1, R2, C2>;

            #[inline]
            fn $method(self, rhs: &'b Matrix<T, R2, C2, SB>) -> Self::Output {
-                let mut res = unsafe {
-                    let (nrows, ncols) = self.shape();
-                    let nrows: SameShapeR<R1, R2> = Dim::from_usize(nrows);
-                    let ncols: SameShapeC<C1, C2> = Dim::from_usize(ncols);
-                    crate::unimplemented_or_uninitialized_generic!(nrows, ncols)
-                };
+                let (nrows, ncols) = self.shape();
+                let nrows: SameShapeR<R1, R2> = Dim::from_usize(nrows);
+                let ncols: SameShapeC<C1, C2> = Dim::from_usize(ncols);
+                let mut res = Matrix::new_uninitialized_generic(nrows, ncols);

                self.$method_to_statically_unchecked(rhs, &mut res);
-                res
+                unsafe { res.assume_init() }
            }
        }

-        impl<'b, T, R1, C1, R2, C2, SA, SB> $TraitAssign<&'b Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: StorageMut<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
-
+        impl<'b, T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $TraitAssign<&'b Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: StorageMut<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
            #[inline]
            fn $method_assign(&mut self, rhs: &'b Matrix<T, R2, C2, SB>) {
                self.$method_assign_statically_unchecked(rhs)
            }
        }

-        impl<T, R1, C1, R2, C2, SA, SB> $TraitAssign<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: StorageMut<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
-
+        impl<T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $TraitAssign<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: StorageMut<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
            #[inline]
            fn $method_assign(&mut self, rhs: Matrix<T, R2, C2, SB>) {
                self.$method_assign(&rhs)
@ -576,9 +580,9 @@ where

    #[inline]
    fn mul(self, rhs: &'b Matrix<T, R2, C2, SB>) -> Self::Output {
-        let mut res =Matrix::new_uninitialized_generic(self.data.shape().0, rhs.data.shape().1);
-                self.mul_to(rhs, &mut res);
-      unsafe{  res.assume_init()}
+        let mut res = Matrix::new_uninitialized_generic(self.data.shape().0, rhs.data.shape().1);
+        self.mul_to(rhs, &mut res);
+        unsafe { res.assume_init() }
    }
 }

@ -636,11 +640,8 @@ where
 // TODO: this is too restrictive:
 //    − we can't use `a *= b` when `a` is a mutable slice.
 //    − we can't use `a *= b` when C2 is not equal to C1.
-impl<T, R1, C1, R2, SA, SB> MulAssign<Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
+impl<T, R1: Dim, C1: Dim, R2: Dim, SA, SB> MulAssign<Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
 where
-    R1: Dim,
-    C1: Dim,
-    R2: Dim,
    T: Scalar + Zero + One + ClosedAdd + ClosedMul,
    SB: Storage<T, R2, C1>,
    SA: ContiguousStorageMut<T, R1, C1> + Clone,
@ -653,11 +654,8 @@ where
    }
 }

-impl<'b, T, R1, C1, R2, SA, SB> MulAssign<&'b Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
+impl<'b, T, R1: Dim, C1: Dim, R2: Dim, SA, SB> MulAssign<&'b Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
 where
-    R1: Dim,
-    C1: Dim,
-    R2: Dim,
    T: Scalar + Zero + One + ClosedAdd + ClosedMul,
    SB: Storage<T, R2, C1>,
    SA: ContiguousStorageMut<T, R1, C1> + Clone,
@ -697,7 +695,7 @@ where
    pub fn ad_mul<R2: Dim, C2: Dim, SB>(&self, rhs: &Matrix<T, R2, C2, SB>) -> OMatrix<T, C1, C2>
    where
        T: SimdComplexField,
-        SB: Storage<MaybeUninit<T>, R2, C2>,
+        SB: Storage<T, R2, C2>,
        DefaultAllocator: Allocator<T, C1, C2>,
        ShapeConstraint: SameNumberOfRows<R1, R2>,
    {
@ -746,7 +744,9 @@ where
        for i in 0..ncols1 {
            for j in 0..ncols2 {
                let dot = dot(&self.column(i), &rhs.column(j));
-                unsafe { *out.get_unchecked_mut((i, j)) = MaybeUninit::new(dot) ;}
+                unsafe {
+                    *out.get_unchecked_mut((i, j)) = MaybeUninit::new(dot);
+                }
            }
        }
    }
@ -786,16 +786,16 @@ where
    #[inline]
    pub fn mul_to<R2: Dim, C2: Dim, SB, R3: Dim, C3: Dim, SC>(
        &self,
-        rhs: &Matrix<MaybeUninit<T>, R2, C2, SB>,
-        out: &mut Matrix<T, R3, C3, SC>,
+        rhs: &Matrix<T, R2, C2, SB>,
+        out: &mut Matrix<MaybeUninit<T>, R3, C3, SC>,
    ) where
        SB: Storage<T, R2, C2>,
-        SC: StorageMut<T, R3, C3>,
+        SC: StorageMut<MaybeUninit<T>, R3, C3>,
        ShapeConstraint: SameNumberOfRows<R3, R1>
            + SameNumberOfColumns<C3, C2>
            + AreMultipliable<R1, C1, R2, C2>,
    {
-        out.gemm(T::one(), self, rhs, T::zero());
+        out.gemm_z(T::one(), self, rhs);
    }

    /// The kronecker product of two matrices (aka. tensor product of the corresponding linear
--- a/src/base/statistics.rs
+++ b/src/base/statistics.rs
@ -1,3 +1,5 @@
+use std::mem::MaybeUninit;
+
 use crate::allocator::Allocator;
 use crate::storage::Storage;
 use crate::{Const, DefaultAllocator, Dim, Matrix, OVector, RowOVector, Scalar, VectorSlice, U1};
@ -18,13 +20,12 @@ impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
        DefaultAllocator: Allocator<T, U1, C>,
    {
        let ncols = self.data.shape().1;
-        let mut res: RowOVector<T, C> =
-            unsafe { crate::unimplemented_or_uninitialized_generic!(Const::<1>, ncols) };
+        let mut res = RowOVector::new_uninitialized_generic(Const::<1>, ncols);

        for i in 0..ncols.value() {
            // TODO: avoid bound checking of column.
            unsafe {
-                *res.get_unchecked_mut((0, i)) = f(self.column(i));
+                *res.get_unchecked_mut((0, i)) =MaybeUninit::new( f(self.column(i)));
            }
        }

@ -45,17 +46,16 @@ impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
        DefaultAllocator: Allocator<T, C>,
    {
        let ncols = self.data.shape().1;
-        let mut res: OVector<T, C> =
-            unsafe { crate::unimplemented_or_uninitialized_generic!(ncols, Const::<1>) };
+        let mut res = Matrix::new_uninitialized_generic(ncols, Const::<1>);

        for i in 0..ncols.value() {
            // TODO: avoid bound checking of column.
            unsafe {
-                *res.vget_unchecked_mut(i) = f(self.column(i));
+                *res.vget_unchecked_mut(i) = MaybeUninit::new(f(self.column(i)));
            }
        }

-        res
+        unsafe { res.assume_init() }
    }

    /// Returns a column vector resulting from the folding of `f` on each column of this matrix.
--- a/src/linalg/permutation_sequence.rs
+++ b/src/linalg/permutation_sequence.rs
@ -1,3 +1,5 @@
+use std::mem::MaybeUninit;
+
 #[cfg(feature = "serde-serialize-no-std")]
 use serde::{Deserialize, Serialize};

@ -8,7 +10,7 @@ use crate::allocator::Allocator;
 use crate::base::{DefaultAllocator, Matrix, OVector, Scalar};
 #[cfg(any(feature = "std", feature = "alloc"))]
 use crate::dimension::Dynamic;
-use crate::dimension::{Const, Dim, DimName};
+use crate::dimension::{ Dim, DimName};
 use crate::storage::StorageMut;

 /// A sequence of row or column permutations.
@ -29,13 +31,13 @@ where
    DefaultAllocator: Allocator<(usize, usize), D>,
 {
    len: usize,
-    ipiv: OVector<(usize, usize), D>,
+    ipiv: OVector<MaybeUninit<(usize, usize)>, D>,
 }

 impl<D: Dim> Copy for PermutationSequence<D>
 where
    DefaultAllocator: Allocator<(usize, usize), D>,
-    OVector<(usize, usize), D>: Copy,
+    OVector<MaybeUninit<(usize, usize)>, D>: Copy,
 {
 }

@ -72,7 +74,7 @@ where
        unsafe {
            Self {
                len: 0,
-                ipiv: crate::unimplemented_or_uninitialized_generic!(dim, Const::<1>),
+                ipiv: OVector::new_uninitialized(dim),
            }
        }
    }
@ -97,7 +99,7 @@ where
    where
        S2: StorageMut<T, R2, C2>,
    {
-        for i in self.ipiv.rows_range(..self.len).iter() {
+        for i in self.ipiv.rows_range(..self.len).iter().map(MaybeUninit::assume_init) {
            rhs.swap_rows(i.0, i.1)
        }
    }