From df9b6f5f646e90eb6300e5b08a39e253e5474e88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Violeta=20Hern=C3=A1ndez?= <vi.hdz.p@gmail.com>
Date: Thu, 15 Jul 2021 23:56:58 -0500
Subject: [PATCH] blas.rs works now!

---
 src/base/allocator.rs              |  12 +-
 src/base/blas.rs                   | 407 ++++++++++++++++-------------
 src/base/construction.rs           |  97 +++++--
 src/base/conversion.rs             |  14 +-
 src/base/default_allocator.rs      |  12 +-
 src/base/edition.rs                |  70 +++--
 src/base/matrix.rs                 | 232 ++++++++++++----
 src/base/matrix_slice.rs           |  64 ++++-
 src/base/ops.rs                    | 176 ++++++-------
 src/base/statistics.rs             |  14 +-
 src/linalg/permutation_sequence.rs |  12 +-
 11 files changed, 695 insertions(+), 415 deletions(-)
diff --git a/src/base/allocator.rs b/src/base/allocator.rs
index 77c9b528..92a38300 100644
--- a/src/base/allocator.rs
+++ b/src/base/allocator.rs
@@ -49,7 +49,7 @@ pub trait Allocator<T, R: Dim, C: Dim = U1>:
 /// A matrix reallocator. Changes the size of the memory buffer that initially contains (RFrom ×
 /// CFrom) elements to a smaller or larger size (RTo, CTo).
 pub trait Reallocator<T, RFrom: Dim, CFrom: Dim, RTo: Dim, CTo: Dim>:
-    InnerAllocator<T, RFrom, CFrom> + InnerAllocator<T, RTo, CTo>
+    Allocator<T, RFrom, CFrom> + Allocator<T, RTo, CTo>
 {
     /// Reallocates a buffer of shape `(RTo, CTo)`, possibly reusing a previously allocated buffer
     /// `buf`. Data stored by `buf` are linearly copied to the output:
@@ -75,7 +75,7 @@ pub type SameShapeC<C1, C2> = <ShapeConstraint as SameNumberOfColumns<C1, C2>>::
 // TODO: Bad name.
 /// Restricts the given number of rows and columns to be respectively the same.
 pub trait SameShapeAllocator<T, R1: Dim, C1: Dim, R2: Dim, C2: Dim>:
-    InnerAllocator<T, R1, C1> + InnerAllocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>
+    Allocator<T, R1, C1> + Allocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>
 where
     ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>,
 {
@@ -85,7 +85,7 @@ impl<T, R1: Dim, R2: Dim, C1: Dim, C2: Dim> SameShapeAllocator<T, R1, C1, R2, C2
     for DefaultAllocator
 where
     DefaultAllocator:
-        InnerAllocator<T, R1, C1> + InnerAllocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>,
+        Allocator<T, R1, C1> + Allocator<T, SameShapeR<R1, R2>, SameShapeC<C1, C2>>,
     ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>,
 {
 }
@@ -93,8 +93,8 @@ where
 // XXX: Bad name.
 /// Restricts the given number of rows to be equal.
 pub trait SameShapeVectorAllocator<T, R1: Dim, R2: Dim>:
-    InnerAllocator<T, R1>
-    + InnerAllocator<T, SameShapeR<R1, R2>>
+    Allocator<T, R1>
+    + Allocator<T, SameShapeR<R1, R2>>
     + SameShapeAllocator<T, R1, U1, R2, U1>
 where
     ShapeConstraint: SameNumberOfRows<R1, R2>,
@@ -103,7 +103,7 @@ where
 
 impl<T, R1: Dim, R2: Dim> SameShapeVectorAllocator<T, R1, R2> for DefaultAllocator
 where
-    DefaultAllocator: InnerAllocator<T, R1, U1> + InnerAllocator<T, SameShapeR<R1, R2>>,
+    DefaultAllocator: Allocator<T, R1, U1> + Allocator<T, SameShapeR<R1, R2>>,
     ShapeConstraint: SameNumberOfRows<R1, R2>,
 {
 }
diff --git a/src/base/blas.rs b/src/base/blas.rs
index 3b8ac951..2ef0dff7 100644
--- a/src/base/blas.rs
+++ b/src/base/blas.rs
@@ -1,4 +1,12 @@
-use crate::{OVector, SimdComplexField};
+//! Implements a subset of the Basic Linear Algebra Subprograms (BLAS), a
+//! standard and highly optimized set of basic vector and matrix operations.
+//!
+//! To avoid unsoundness due to mishandling of uninitialized data, we divide our
+//! methods into two groups: those that take in a `&mut` to a matrix, and those
+//! that return an owned matrix that would otherwise result from setting a
+//! parameter to zero in the other methods.
+
+use crate::{OMatrix, OVector, SimdComplexField};
 #[cfg(feature = "std")]
 use matrixmultiply;
 use num::{One, Zero};
@@ -279,72 +287,16 @@ where
     }
 }
 
-#[allow(clippy::too_many_arguments)]
-fn array_axcpy<T>(
-    y: &mut [T],
-    a: T,
-    x: &[T],
-    c: T,
-    beta: T,
-    stride1: usize,
-    stride2: usize,
-    len: usize,
-) where
-    T: Scalar + Zero + ClosedAdd + ClosedMul,
-{
-    for i in 0..len {
-        unsafe {
-            let y = y.get_unchecked_mut(i * stride1);
-            *y = a.inlined_clone()
-                * x.get_unchecked(i * stride2).inlined_clone()
-                * c.inlined_clone()
-                + beta.inlined_clone() * y.inlined_clone();
-        }
-    }
-}
-
-fn array_axc<T>(y: &mut [T], a: T, x: &[T], c: T, stride1: usize, stride2: usize, len: usize)
-where
-    T: Scalar + Zero + ClosedAdd + ClosedMul,
-{
-    for i in 0..len {
-        unsafe {
-            *y.get_unchecked_mut(i * stride1) = a.inlined_clone()
-                * x.get_unchecked(i * stride2).inlined_clone()
-                * c.inlined_clone();
-        }
-    }
-}
-
-fn array_axc_uninit<T>(
-    y: &mut [MaybeUninit<T>],
-    a: T,
-    x: &[T],
-    c: T,
-    stride1: usize,
-    stride2: usize,
-    len: usize,
-) where
-    T: Scalar + Zero + ClosedAdd + ClosedMul,
-{
-    for i in 0..len {
-        unsafe {
-            *y.get_unchecked_mut(i * stride1) = MaybeUninit::new(
-                a.inlined_clone()
-                    * x.get_unchecked(i * stride2).inlined_clone()
-                    * c.inlined_clone(),
-            );
-        }
-    }
-}
-
 /// # BLAS functions
 impl<T, D: Dim, S> Vector<T, D, S>
 where
     T: Scalar + Zero + ClosedAdd + ClosedMul,
     S: StorageMut<T, D>,
 {
-    /// Computes `self = a * x * c + b * self`.
+    /// Computes `self = a * x * c + b * self`, where `a`, `b`, `c` are scalars,
+    /// and `x` is a vector of the same size as `self`.
+    ///
+    /// For commutative scalars, this is equivalent to an [`axpy`] call.
     ///
     /// If `b` is zero, `self` is never read from.
     ///
@@ -376,9 +328,24 @@ where
             let x = x.data.as_slice_unchecked();
 
             if !b.is_zero() {
-                array_axcpy(y, a, x, c, b, rstride1, rstride2, x.len());
+                for i in 0..x.len() {
+                    unsafe {
+                        let y = y.get_unchecked_mut(i * rstride1);
+                        *y = a.inlined_clone()
+                            * x.get_unchecked(i * rstride2).inlined_clone()
+                            * c.inlined_clone()
+                            + b.inlined_clone() * y.inlined_clone();
+                    }
+                }
             } else {
-                array_axc(y, a, x, c, rstride1, rstride2, x.len());
+                for i in 0..x.len() {
+                    unsafe {
+                        let y = y.get_unchecked_mut(i * rstride1);
+                        *y = a.inlined_clone()
+                            * x.get_unchecked(i * rstride2).inlined_clone()
+                            * c.inlined_clone();
+                    }
+                }
             }
         }
     }
@@ -746,49 +713,55 @@ where
     }
 }
 
-impl<T, D: Dim> OVector<MaybeUninit<T>, D>
+impl<T, D: Dim, S> Vector<MaybeUninit<T>, D, S>
 where
     T: Scalar + Zero + ClosedAdd + ClosedMul,
-    DefaultAllocator: Allocator<T, D>,
+    S: StorageMut<MaybeUninit<T>, D>,
 {
-    pub fn axc<D2: Dim, SB>(&mut self, a: T, x: &Vector<T, D2, SB>, c: T) -> OVector<T, D>
+    /// Computes `alpha * a * x`, where `a` is a matrix, `x` a vector, and
+    /// `alpha` is a scalar.
+    ///
+    /// # Safety
+    /// `self` must be completely uninitialized, or data leaks will occur. After
+    /// this method is called, all entries in `self` will be initialized.
+    pub fn axc<D2: Dim, S2>(&mut self, a: T, x: &Vector<T, D2, S2>, c: T)
     where
-        SB: Storage<T, D2>,
+        S2: Storage<T, D2>,
         ShapeConstraint: DimEq<D, D2>,
     {
-        assert_eq!(self.nrows(), x.nrows(), "Axcpy: mismatched vector shapes.");
-
         let rstride1 = self.strides().0;
         let rstride2 = x.strides().0;
 
         unsafe {
-            // SAFETY: the conversion to slices is OK because we access the
-            //         elements taking the strides into account.
             let y = self.data.as_mut_slice_unchecked();
             let x = x.data.as_slice_unchecked();
 
-            array_axc_uninit(y, a, x, c, rstride1, rstride2, x.len());
-            self.assume_init()
+            for i in 0..y.len() {
+                *y.get_unchecked_mut(i * rstride1) = MaybeUninit::new(
+                    a.inlined_clone()
+                        * x.get_unchecked(i * rstride2).inlined_clone()
+                        * c.inlined_clone(),
+                );
+            }
         }
     }
 
-    /// Computes `self = alpha * a * x, where `a` is a matrix, `x` a vector, and
+    /// Computes `alpha * a * x`, where `a` is a matrix, `x` a vector, and
     /// `alpha` is a scalar.
     ///
-    /// By the time this method returns, `self` will have been initialized.
+    /// Initializes `self`.
     #[inline]
-    pub fn gemv_uninit<R2: Dim, C2: Dim, D3: Dim, SB, SC>(
-        mut self,
+    pub fn gemv_z<R2: Dim, C2: Dim, D3: Dim, SB, SC>(
+        &mut self,
         alpha: T,
         a: &Matrix<T, R2, C2, SB>,
         x: &Vector<T, D3, SC>,
-        beta: T,
-    ) -> OVector<T, D>
-    where
+    ) where
         T: One,
         SB: Storage<T, R2, C2>,
         SC: Storage<T, D3>,
         ShapeConstraint: DimEq<D, R2> + AreMultipliable<R2, C2, D3, U1>,
+        // DefaultAllocator: Allocator<T, D>,
     {
         let dim1 = self.nrows();
         let (nrows2, ncols2) = a.shape();
@@ -801,22 +774,169 @@ where
 
         if ncols2 == 0 {
             self.fill_fn(|| MaybeUninit::new(T::zero()));
-            return self.assume_init();
+            return;
         }
 
         // TODO: avoid bound checks.
         let col2 = a.column(0);
         let val = unsafe { x.vget_unchecked(0).inlined_clone() };
-        let res = self.axc(alpha.inlined_clone(), &col2, val);
+        self.axc(alpha.inlined_clone(), &col2, val);
 
-        for j in 1..ncols2 {
-            let col2 = a.column(j);
-            let val = unsafe { x.vget_unchecked(j).inlined_clone() };
+        // Safety: axc initializes self.
+        unsafe {
+            let mut init = self.assume_init_mut();
 
-            res.axcpy(alpha.inlined_clone(), &col2, val, T::one());
+            for j in 1..ncols2 {
+                let col2 = a.column(j);
+                let val = unsafe { x.vget_unchecked(j).inlined_clone() };
+                init.axcpy(alpha.inlined_clone(), &col2, val, T::one());
+            }
+        }
+    }
+}
+
+impl<T, R1: Dim, C1: Dim> OMatrix<T, R1, C1>
+where
+    T: Scalar + Zero + One + ClosedAdd + ClosedMul,
+    DefaultAllocator: Allocator<T, R1, C1>,
+{
+    /// Computes `alpha * a * b`, where `a` and `b` are matrices, and `alpha` is
+    /// a scalar.
+    ///
+    /// # Examples:
+    ///
+    /// ```
+    /// # #[macro_use] extern crate approx;
+    /// # use nalgebra::{Matrix2x3, Matrix3x4, Matrix2x4};
+    /// let mut mat1 = Matrix2x4::identity();
+    /// let mat2 = Matrix2x3::new(1.0, 2.0, 3.0,
+    ///                           4.0, 5.0, 6.0);
+    /// let mat3 = Matrix3x4::new(0.1, 0.2, 0.3, 0.4,
+    ///                           0.5, 0.6, 0.7, 0.8,
+    ///                           0.9, 1.0, 1.1, 1.2);
+    /// let expected = mat2 * mat3 * 10.0 + mat1 * 5.0;
+    ///
+    /// mat1.gemm(10.0, &mat2, &mat3, 5.0);
+    /// assert_relative_eq!(mat1, expected);
+    /// ```
+    #[inline]
+    pub fn gemm_z<R2: Dim, C2: Dim, R3: Dim, C3: Dim, SB, SC>(
+        alpha: T,
+        a: &Matrix<T, R2, C2, SB>,
+        b: &Matrix<T, R3, C3, SC>,
+    ) -> Self
+    where
+        SB: Storage<T, R2, C2>,
+        SC: Storage<T, R3, C3>,
+        ShapeConstraint: SameNumberOfRows<R1, R2>
+            + SameNumberOfColumns<C1, C3>
+            + AreMultipliable<R2, C2, R3, C3>,
+    {
+        let (nrows1, ncols1) = a.shape();
+        let (nrows2, ncols2) = b.shape();
+
+        assert_eq!(
+            ncols1, nrows2,
+            "gemm: dimensions mismatch for multiplication."
+        );
+
+        let mut res =
+            Matrix::new_uninitialized_generic(R1::from_usize(nrows1), C1::from_usize(ncols2));
+
+        #[cfg(feature = "std")]
+        {
+            // We assume large matrices will be Dynamic but small matrices static.
+            // We could use matrixmultiply for large statically-sized matrices but the performance
+            // threshold to activate it would be different from SMALL_DIM because our code optimizes
+            // better for statically-sized matrices.
+            if R1::is::<Dynamic>()
+                || C1::is::<Dynamic>()
+                || R2::is::<Dynamic>()
+                || C2::is::<Dynamic>()
+                || R3::is::<Dynamic>()
+                || C3::is::<Dynamic>()
+            {
+                // matrixmultiply can be used only if the std feature is available.
+
+                // Threshold determined empirically.
+                const SMALL_DIM: usize = 5;
+
+                if nrows1 > SMALL_DIM
+                    && ncols1 > SMALL_DIM
+                    && nrows2 > SMALL_DIM
+                    && ncols2 > SMALL_DIM
+                {
+                    // NOTE: this case should never happen because we enter this
+                    // codepath only when ncols2 > SMALL_DIM. Though we keep this
+                    // here just in case if in the future we change the conditions to
+                    // enter this codepath.
+                    if ncols1 == 0 {
+                        // NOTE: we can't just always multiply by beta
+                        // because we documented the guaranty that `self` is
+                        // never read if `beta` is zero.
+
+                        // Safety: this buffer is empty.
+                        return res.assume_init();
+                    }
+
+                    let (rsa, csa) = a.strides();
+                    let (rsb, csb) = b.strides();
+                    let (rsc, csc) = res.strides();
+
+                    if T::is::<f32>() {
+                        unsafe {
+                            matrixmultiply::sgemm(
+                                nrows1,
+                                ncols1,
+                                ncols2,
+                                mem::transmute_copy(&alpha),
+                                a.data.ptr() as *const f32,
+                                rsa as isize,
+                                csa as isize,
+                                b.data.ptr() as *const f32,
+                                rsb as isize,
+                                csb as isize,
+                                0.0,
+                                res.data.ptr_mut() as *mut f32,
+                                rsc as isize,
+                                csc as isize,
+                            );
+
+                            return res.assume_init();
+                        }
+                    } else if T::is::<f64>() {
+                        unsafe {
+                            matrixmultiply::dgemm(
+                                nrows1,
+                                ncols1,
+                                ncols2,
+                                mem::transmute_copy(&alpha),
+                                a.data.ptr() as *const f64,
+                                rsa as isize,
+                                csa as isize,
+                                b.data.ptr() as *const f64,
+                                rsb as isize,
+                                csb as isize,
+                                0.0,
+                                res.data.ptr_mut() as *mut f64,
+                                rsc as isize,
+                                csc as isize,
+                            );
+
+                            return res.assume_init();
+                        }
+                    }
+                }
+            }
         }
 
-        res
+        for j1 in 0..ncols1 {
+            // TODO: avoid bound checks.
+            res.column_mut(j1)
+                .gemv_z(alpha.inlined_clone(), a, &b.column(j1));
+        }
+
+        unsafe { res.assume_init() }
     }
 }
 
@@ -1372,49 +1492,6 @@ where
     ///
     /// mat.quadform_tr_with_workspace(&mut workspace, 10.0, &lhs, &mid, 5.0);
     /// assert_relative_eq!(mat, expected);
-    pub fn quadform_tr_with_workspace<D2: Dim, R3: Dim, C3: Dim, S3, D4: Dim, S4>(
-        &mut self,
-        work: &mut OVector<MaybeUninit<T>, D2>,
-        alpha: T,
-        lhs: &Matrix<T, R3, C3, S3>,
-        mid: &SquareMatrix<T, D4, S4>,
-        beta: T,
-    ) where
-        S3: Storage<T, R3, C3>,
-        S4: Storage<T, D4, D4>,
-        ShapeConstraint: DimEq<D1, D2> + DimEq<D1, R3> + DimEq<D2, R3> + DimEq<C3, D4>,
-        DefaultAllocator: Allocator<T, D2>,
-    {
-        let work = work.gemv_uninit(T::one(), lhs, &mid.column(0), T::zero());
-        self.ger(alpha.inlined_clone(), &work, &lhs.column(0), beta);
-
-        for j in 1..mid.ncols() {
-            work.gemv(T::one(), lhs, &mid.column(j), T::zero());
-            self.ger(alpha.inlined_clone(), &work, &lhs.column(j), T::one());
-        }
-    }
-
-    /// Computes the quadratic form `self = alpha * lhs * mid * lhs.transpose() + beta * self`.
-    ///
-    /// This allocates a workspace vector of dimension D1 for intermediate results.
-    /// If `D1` is a type-level integer, then the allocation is performed on the stack.
-    /// Use `.quadform_tr_with_workspace(...)` instead to avoid allocations.
-    ///
-    /// # Examples:
-    ///
-    /// ```
-    /// # #[macro_use] extern crate approx;
-    /// # use nalgebra::{Matrix2, Matrix3, Matrix2x3, Vector2};
-    /// let mut mat = Matrix2::identity();
-    /// let lhs = Matrix2x3::new(1.0, 2.0, 3.0,
-    ///                          4.0, 5.0, 6.0);
-    /// let mid = Matrix3::new(0.1, 0.2, 0.3,
-    ///                        0.5, 0.6, 0.7,
-    ///                        0.9, 1.0, 1.1);
-    /// let expected = lhs * mid * lhs.transpose() * 10.0 + mat * 5.0;
-    ///
-    /// mat.quadform_tr(10.0, &lhs, &mid, 5.0);
-    /// assert_relative_eq!(mat, expected);
     pub fn quadform_tr<R3: Dim, C3: Dim, S3, D4: Dim, S4>(
         &mut self,
         alpha: T,
@@ -1424,11 +1501,19 @@ where
     ) where
         S3: Storage<T, R3, C3>,
         S4: Storage<T, D4, D4>,
-        ShapeConstraint: DimEq<D1, D1> + DimEq<D1, R3> + DimEq<C3, D4>,
-        DefaultAllocator: Allocator<T, D1>,
+        ShapeConstraint: DimEq<D1, R3> + DimEq<C3, D4>,
+        DefaultAllocator: Allocator<T, R3>,
     {
-        let mut work = Matrix::new_uninitialized_generic(self.data.shape().0, Const::<1>);
-        self.quadform_tr_with_workspace(&mut work, alpha, lhs, mid, beta)
+        let work = Matrix::new_uninitialized_generic(R3::from_usize(self.shape().0), Const::<1>);
+        work.gemv_z(T::one(), lhs, &mid.column(0));
+        let work = unsafe { work.assume_init() };
+
+        self.ger(alpha.inlined_clone(), &work, &lhs.column(0), beta);
+
+        for j in 1..mid.ncols() {
+            work.gemv(T::one(), lhs, &mid.column(j), T::zero());
+            self.ger(alpha.inlined_clone(), &work, &lhs.column(j), T::one());
+        }
     }
 
     /// Computes the quadratic form `self = alpha * rhs.transpose() * mid * rhs + beta * self`.
@@ -1454,11 +1539,10 @@ where
     /// let mut workspace = DVector::new_random(3);
     /// let expected = rhs.transpose() * &mid * &rhs * 10.0 + &mat * 5.0;
     ///
-    /// mat.quadform_with_workspace(&mut workspace, 10.0, &mid, &rhs, 5.0);
+    /// mat.quadform(&mut workspace, 10.0, &mid, &rhs, 5.0);
     /// assert_relative_eq!(mat, expected);
-    pub fn quadform_with_workspace<D2: Dim, D3: Dim, S3, R4: Dim, C4: Dim, S4>(
+    pub fn quadform<D3: Dim, S3, R4: Dim, C4: Dim, S4>(
         &mut self,
-        work: &mut OVector<MaybeUninit<T>, D2>,
         alpha: T,
         mid: &SquareMatrix<T, D3, S3>,
         rhs: &Matrix<T, R4, C4, S4>,
@@ -1466,54 +1550,21 @@ where
     ) where
         S3: Storage<T, D3, D3>,
         S4: Storage<T, R4, C4>,
-        ShapeConstraint:
-            DimEq<D3, R4> + DimEq<D1, C4> + DimEq<D2, D3> + AreMultipliable<C4, R4, D2, U1>,
-        DefaultAllocator: Allocator<T, D2>,
+        ShapeConstraint: DimEq<R4, D3> + DimEq<D3, R4> + DimEq<D1, C4>,
+        DefaultAllocator: Allocator<T, D3>,
     {
-        let work = work.gemv_uninit(T::one(), mid, &rhs.column(0), T::zero());
+        // TODO: figure out why type inference wasn't doing its job.
+        let work = Matrix::new_uninitialized_generic(D3::from_usize(self.shape().0), Const::<1>);
+        work.gemv_z::<D3, D3, R4, S3, _>(T::one(), mid, &rhs.column(0));
+        let work = unsafe { work.assume_init() };
+
         self.column_mut(0)
             .gemv_tr(alpha.inlined_clone(), rhs, &work, beta.inlined_clone());
 
         for j in 1..rhs.ncols() {
-            work.gemv(T::one(), mid, &rhs.column(j), T::zero());
+            work.gemv::<D3, D3, R4, S3, _>(T::one(), mid, &rhs.column(j), T::zero());
             self.column_mut(j)
                 .gemv_tr(alpha.inlined_clone(), rhs, &work, beta.inlined_clone());
         }
     }
-
-    /// Computes the quadratic form `self = alpha * rhs.transpose() * mid * rhs + beta * self`.
-    ///
-    /// This allocates a workspace vector of dimension D2 for intermediate results.
-    /// If `D2` is a type-level integer, then the allocation is performed on the stack.
-    /// Use `.quadform_with_workspace(...)` instead to avoid allocations.
-    ///
-    /// ```
-    /// # #[macro_use] extern crate approx;
-    /// # use nalgebra::{Matrix2, Matrix3x2, Matrix3};
-    /// let mut mat = Matrix2::identity();
-    /// let rhs = Matrix3x2::new(1.0, 2.0,
-    ///                          3.0, 4.0,
-    ///                          5.0, 6.0);
-    /// let mid = Matrix3::new(0.1, 0.2, 0.3,
-    ///                        0.5, 0.6, 0.7,
-    ///                        0.9, 1.0, 1.1);
-    /// let expected = rhs.transpose() * mid * rhs * 10.0 + mat * 5.0;
-    ///
-    /// mat.quadform(10.0, &mid, &rhs, 5.0);
-    /// assert_relative_eq!(mat, expected);
-    pub fn quadform<D2: Dim, S2, R3: Dim, C3: Dim, S3>(
-        &mut self,
-        alpha: T,
-        mid: &SquareMatrix<T, D2, S2>,
-        rhs: &Matrix<T, R3, C3, S3>,
-        beta: T,
-    ) where
-        S2: Storage<T, D2, D2>,
-        S3: Storage<T, R3, C3>,
-        ShapeConstraint: DimEq<D2, R3> + DimEq<D1, C3> + AreMultipliable<C3, R3, D2, U1>,
-        DefaultAllocator: Allocator<T, D2>,
-    {
-        let mut work = Matrix::new_uninitialized_generic(mid.data.shape().0, Const::<1>);
-        self.quadform_with_workspace(&mut work, alpha, mid, rhs, beta)
-    }
 }
diff --git a/src/base/construction.rs b/src/base/construction.rs
index c040a9dc..f0709917 100644
--- a/src/base/construction.rs
+++ b/src/base/construction.rs
@@ -18,15 +18,14 @@ use typenum::{self, Cmp, Greater};
 
 use simba::scalar::{ClosedAdd, ClosedMul};
 
-use crate::{base::allocator::Allocator};
+use crate::base::allocator::{Allocator, InnerAllocator};
 use crate::base::dimension::{Dim, DimName, Dynamic, ToTypenum};
 use crate::base::storage::Storage;
 use crate::base::{
     ArrayStorage, Const, DefaultAllocator, Matrix, OMatrix, OVector, Scalar, Unit, Vector,
 };
 
-/// When "no_unsound_assume_init" is enabled, expands to `unimplemented!()` instead of `new_uninitialized_generic().assume_init()`.
-/// Intended as a placeholder, each callsite should be refactored to use uninitialized memory soundly
+/// OBJECTIVE: GET RID OF THIS!
 #[macro_export]
 macro_rules! unimplemented_or_uninitialized_generic {
     ($nrows:expr, $ncols:expr) => {{
@@ -99,7 +98,7 @@ where
             "Matrix init. error: the slice did not contain the right number of elements."
         );
 
-        let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
+        let mut res = Self::new_uninitialized_generic(nrows, ncols);
         let mut iter = slice.iter();
 
         for i in 0..nrows.value() {
@@ -117,7 +116,10 @@ where
     /// Creates a matrix with its elements filled with the components provided by a slice. The
     /// components must have the same layout as the matrix data storage (i.e. column-major).
     #[inline]
-    pub fn from_column_slice_generic(nrows: R, ncols: C, slice: &[T]) -> Self where T:Clone{
+    pub fn from_column_slice_generic(nrows: R, ncols: C, slice: &[T]) -> Self
+    where
+        T: Clone,
+    {
         Self::from_iterator_generic(nrows, ncols, slice.iter().cloned())
     }
 
@@ -128,7 +130,7 @@ where
     where
         F: FnMut(usize, usize) -> T,
     {
-        let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
+        let mut res = Self::new_uninitialized_generic(nrows, ncols);
 
         for j in 0..ncols.value() {
             for i in 0..nrows.value() {
@@ -139,7 +141,7 @@ where
         }
 
         // Safety: all entries have been initialized.
-        unsafe { res.assume_init()}
+        unsafe { res.assume_init() }
     }
 
     /// Creates a new identity matrix.
@@ -352,7 +354,7 @@ where
     #[inline]
     pub fn from_diagonal<SB: Storage<T, D>>(diag: &Vector<T, D, SB>) -> Self
     where
-        T: Zero+Scalar,
+        T: Zero + Scalar,
     {
         let (dim, _) = diag.data.shape();
         let mut res = Self::zeros_generic(dim, dim);
@@ -374,12 +376,6 @@ where
  */
 macro_rules! impl_constructors(
     ($($Dims: ty),*; $(=> $DimIdent: ident: $DimBound: ident),*; $($gargs: expr),*; $($args: ident),*) => {
-        /// Creates a new uninitialized matrix or vector.
-        #[inline]
-        pub unsafe fn new_uninitialized($($args: usize),*) -> MaybeUninit<Self> {
-            Self::new_uninitialized_generic($($gargs),*)
-        }
-
         /// Creates a matrix or vector with all its elements set to `elem`.
         ///
         /// # Example
@@ -518,8 +514,7 @@ macro_rules! impl_constructors(
         ///         dm[(1, 0)] == 3 && dm[(1, 1)] == 4 && dm[(1, 2)] == 5);
         /// ```
         #[inline]
-        pub fn from_fn<F>($($args: usize,)* f: F) -> Self
-            where F: FnMut(usize, usize) -> T {
+        pub fn from_fn<F: FnMut(usize, usize) -> T>($($args: usize,)* f: F) -> Self {
             Self::from_fn_generic($($gargs, )* f)
         }
 
@@ -543,7 +538,9 @@ macro_rules! impl_constructors(
         /// ```
         #[inline]
         pub fn identity($($args: usize,)*) -> Self
-            where T: Zero + One {
+        where
+            T: Zero + One + Scalar
+        {
             Self::identity_generic($($gargs),* )
         }
 
@@ -566,7 +563,9 @@ macro_rules! impl_constructors(
         /// ```
         #[inline]
         pub fn from_diagonal_element($($args: usize,)* elt: T) -> Self
-            where T: Zero + One {
+        where
+            T: Zero + One + Scalar
+        {
             Self::from_diagonal_element_generic($($gargs, )* elt)
         }
 
@@ -593,7 +592,9 @@ macro_rules! impl_constructors(
         /// ```
         #[inline]
         pub fn from_partial_diagonal($($args: usize,)* elts: &[T]) -> Self
-            where T: Zero {
+        where
+            T: Zero + Scalar
+        {
             Self::from_partial_diagonal_generic($($gargs, )* elts)
         }
 
@@ -612,7 +613,9 @@ macro_rules! impl_constructors(
         #[inline]
         #[cfg(feature = "rand")]
         pub fn new_random($($args: usize),*) -> Self
-            where Standard: Distribution<T> {
+        where
+            Standard: Distribution<T>
+        {
             Self::new_random_generic($($gargs),*)
         }
     }
@@ -630,6 +633,17 @@ where
     ); // Arguments for non-generic constructors.
 }
 
+impl<T, R: DimName, C: DimName> OMatrix<MaybeUninit<T>, R, C>
+where
+    DefaultAllocator: Allocator<T, R, C>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized() -> Self {
+        Self::new_uninitialized_generic(R::name(), C::name())
+    }
+}
+
 /// # Constructors of matrices with a dynamic number of columns
 impl<T, R: DimName> OMatrix<T, R, Dynamic>
 where
@@ -641,6 +655,17 @@ where
                    ncols);
 }
 
+impl<T, R: DimName> OMatrix<MaybeUninit<T>, R, Dynamic>
+where
+    DefaultAllocator: Allocator<T, R, Dynamic>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized(ncols: usize) -> Self {
+        Self::new_uninitialized_generic(R::name(), Dynamic::new(ncols))
+    }
+}
+
 /// # Constructors of dynamic vectors and matrices with a dynamic number of rows
 impl<T, C: DimName> OMatrix<T, Dynamic, C>
 where
@@ -652,6 +677,17 @@ where
                    nrows);
 }
 
+impl<T, C: DimName> OMatrix<MaybeUninit<T>, Dynamic, C>
+where
+    DefaultAllocator: Allocator<T, Dynamic, C>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized(nrows: usize) -> Self {
+        Self::new_uninitialized_generic(Dynamic::new(nrows), C::name())
+    }
+}
+
 /// # Constructors of fully dynamic matrices
 impl<T> OMatrix<T, Dynamic, Dynamic>
 where
@@ -663,6 +699,17 @@ where
                    nrows, ncols);
 }
 
+impl<T> OMatrix<MaybeUninit<T>, Dynamic, Dynamic>
+where
+    DefaultAllocator: Allocator<T, Dynamic, Dynamic>,
+{
+    /// Creates a new uninitialized matrix or vector.
+    #[inline]
+    pub fn new_uninitialized(nrows: usize, ncols: usize) -> Self {
+        Self::new_uninitialized_generic(Dynamic::new(nrows), Dynamic::new(ncols))
+    }
+}
+
 /*
  *
  * Constructors that don't necessarily require all dimensions
@@ -701,7 +748,10 @@ macro_rules! impl_constructors_from_data(
             ///         dm[(1, 0)] == 3 && dm[(1, 1)] == 4 && dm[(1, 2)] == 5);
             /// ```
             #[inline]
-            pub fn from_row_slice($($args: usize,)* $data: &[T]) -> Self {
+            pub fn from_row_slice($($args: usize,)* $data: &[T]) -> Self
+            where
+                T: Clone
+            {
                 Self::from_row_slice_generic($($gargs, )* $data)
             }
 
@@ -728,7 +778,10 @@ macro_rules! impl_constructors_from_data(
             ///         dm[(1, 0)] == 1 && dm[(1, 1)] == 3 && dm[(1, 2)] == 5);
             /// ```
             #[inline]
-            pub fn from_column_slice($($args: usize,)* $data: &[T]) -> Self {
+            pub fn from_column_slice($($args: usize,)* $data: &[T]) -> Self
+            where
+                T: Clone
+            {
                 Self::from_column_slice_generic($($gargs, )* $data)
             }
 
diff --git a/src/base/conversion.rs b/src/base/conversion.rs
index 97194a13..1efb9a91 100644
--- a/src/base/conversion.rs
+++ b/src/base/conversion.rs
@@ -27,14 +27,10 @@ use crate::constraint::DimEq;
 use crate::{IsNotStaticOne, RowSVector, SMatrix, SVector};
 
 // TODO: too bad this won't work for slice conversions.
-impl<T1, T2, R1, C1, R2, C2> SubsetOf<OMatrix<T2, R2, C2>> for OMatrix<T1, R1, C1>
+impl<T1, T2, R1: Dim, C1: Dim, R2: Dim, C2: Dim> SubsetOf<OMatrix<T2, R2, C2>>
+    for OMatrix<T1, R1, C1>
 where
-    R1: Dim,
-    C1: Dim,
-    R2: Dim,
-    C2: Dim,
-    T1: Scalar,
-    T2: Scalar + SupersetOf<T1>,
+    T2: SupersetOf<T1>,
     DefaultAllocator:
         Allocator<T2, R2, C2> + Allocator<T1, R1, C1> + SameShapeAllocator<T1, R1, C1, R2, C2>,
     ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>,
@@ -45,7 +41,7 @@ where
         let nrows2 = R2::from_usize(nrows);
         let ncols2 = C2::from_usize(ncols);
 
-        let mut res = OMatrix::<T2, R2, C2>::new_uninitialized_generic(nrows2, ncols2);
+        let mut res = Matrix::new_uninitialized_generic(nrows2, ncols2);
 
         for i in 0..nrows {
             for j in 0..ncols {
@@ -57,7 +53,7 @@ where
         }
 
         // Safety: all entries have been initialized.
-        unsafe { Matrix::assume_init(res) }
+        unsafe { res.assume_init() }
     }
 
     #[inline]
diff --git a/src/base/default_allocator.rs b/src/base/default_allocator.rs
index 7ee425ff..b9cb793c 100644
--- a/src/base/default_allocator.rs
+++ b/src/base/default_allocator.rs
@@ -77,9 +77,13 @@ impl<T, const R: usize, const C: usize> Allocator<T, Const<R>, Const<C>> for Def
     unsafe fn assume_init(
         uninit: <Self as InnerAllocator<MaybeUninit<T>, Const<R>, Const<C>>>::Buffer,
     ) -> Owned<T, Const<R>, Const<C>> {
-        // Safety: MaybeUninit<T> has the same alignment and layout as T, and by
-        // extension so do arrays based on these.
-        mem::transmute(uninit)
+        // SAFETY:
+        // * The caller guarantees that all elements of the array are initialized
+        // * `MaybeUninit<T>` and T are guaranteed to have the same layout
+        // * MaybeUnint does not drop, so there are no double-frees
+        // * `ArrayStorage` is transparent.
+        // And thus the conversion is safe
+        ArrayStorage((&uninit as *const _ as *const [_; C]).read())
     }
 }
 
@@ -205,7 +209,7 @@ where
         );
 
         // Safety: TODO
-        <Self as Allocator<_, RTO, CTO>>::assume_init(res)
+        <Self as Allocator<_, Const<RTO>, Const<CTO>>>::assume_init(res)
     }
 }
 
diff --git a/src/base/edition.rs b/src/base/edition.rs
index 81e10b48..f013ffd3 100644
--- a/src/base/edition.rs
+++ b/src/base/edition.rs
@@ -4,6 +4,7 @@ use std::cmp;
 use std::iter::ExactSizeIterator;
 #[cfg(any(feature = "std", feature = "alloc"))]
 use std::mem;
+use std::mem::MaybeUninit;
 use std::ptr;
 
 use crate::base::allocator::{Allocator, Reallocator};
@@ -49,13 +50,10 @@ impl<T: Scalar + Zero, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
     where
         I: IntoIterator<Item = &'a usize>,
         I::IntoIter: ExactSizeIterator + Clone,
-        DefaultAllocator: Allocator<T, Dynamic, C>,
     {
         let irows = irows.into_iter();
         let ncols = self.data.shape().1;
-        let mut res = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(Dynamic::new(irows.len()), ncols)
-        };
+        let mut res =  OMatrix::<T, Dynamic, C>::new_uninitialized_generic(Dynamic::new(irows.len()), ncols);
 
         // First, check that all the indices from irows are valid.
         // This will allow us to use unchecked access in the inner loop.
@@ -71,12 +69,12 @@ impl<T: Scalar + Zero, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
             for (destination, source) in irows.clone().enumerate() {
                 unsafe {
                     *res.vget_unchecked_mut(destination) =
-                        src.vget_unchecked(*source).inlined_clone()
+                        MaybeUninit::new(src.vget_unchecked(*source).inlined_clone());
                 }
             }
         }
 
-        res
+        unsafe { res.assume_init() }
     }
 
     /// Creates a new matrix by extracting the given set of columns from `self`.
@@ -90,15 +88,19 @@ impl<T: Scalar + Zero, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
     {
         let icols = icols.into_iter();
         let nrows = self.data.shape().0;
-        let mut res = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(nrows, Dynamic::new(icols.len()))
-        };
+        let mut res = Matrix::new_uninitialized_generic(nrows, Dynamic::new(icols.len()));
 
         for (destination, source) in icols.enumerate() {
-            res.column_mut(destination).copy_from(&self.column(*source))
+            for (d, s) in res
+                .column_mut(destination)
+                .iter_mut()
+                .zip(self.column(*source).iter())
+            {
+                *d = MaybeUninit::new(s.clone());
+            }
         }
 
-        res
+        unsafe { res.assume_init() }
     }
 }
 
@@ -190,7 +192,10 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
 
     /// Sets all the diagonal elements of this matrix to `val`.
     #[inline]
-    pub fn fill_diagonal(&mut self, val: T) {
+    pub fn fill_diagonal(&mut self, val: T)
+    where
+        T: Clone,
+    {
         let (nrows, ncols) = self.shape();
         let n = cmp::min(nrows, ncols);
 
@@ -201,19 +206,25 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
 
     /// Sets all the elements of the selected row to `val`.
     #[inline]
-    pub fn fill_row(&mut self, i: usize, val: T) {
+    pub fn fill_row(&mut self, i: usize, val: T)
+    where
+        T: Clone,
+    {
         assert!(i < self.nrows(), "Row index out of bounds.");
         for j in 0..self.ncols() {
-            unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+            unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
         }
     }
 
     /// Sets all the elements of the selected column to `val`.
     #[inline]
-    pub fn fill_column(&mut self, j: usize, val: T) {
+    pub fn fill_column(&mut self, j: usize, val: T)
+    where
+        T: Clone,
+    {
         assert!(j < self.ncols(), "Row index out of bounds.");
         for i in 0..self.nrows() {
-            unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+            unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
         }
     }
 
@@ -225,10 +236,13 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
     /// * If `shift > 1`, then the diagonal and the first `shift - 1` subdiagonals are left
     /// untouched.
     #[inline]
-    pub fn fill_lower_triangle(&mut self, val: T, shift: usize) {
+    pub fn fill_lower_triangle(&mut self, val: T, shift: usize)
+    where
+        T: Clone,
+    {
         for j in 0..self.ncols() {
             for i in (j + shift)..self.nrows() {
-                unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+                unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
             }
         }
     }
@@ -241,12 +255,15 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
     /// * If `shift > 1`, then the diagonal and the first `shift - 1` superdiagonals are left
     /// untouched.
     #[inline]
-    pub fn fill_upper_triangle(&mut self, val: T, shift: usize) {
+    pub fn fill_upper_triangle(&mut self, val: T, shift: usize)
+    where
+        T: Clone,
+    {
         for j in shift..self.ncols() {
             // TODO: is there a more efficient way to avoid the min ?
             // (necessary for rectangular matrices)
             for i in 0..cmp::min(j + 1 - shift, self.nrows()) {
-                unsafe { *self.get_unchecked_mut((i, j)) = val.inlined_clone() }
+                unsafe { *self.get_unchecked_mut((i, j)) = val.clone() }
             }
         }
     }
@@ -921,9 +938,8 @@ impl<T: Scalar> OMatrix<T, Dynamic, Dynamic> {
     where
         DefaultAllocator: Reallocator<T, Dynamic, Dynamic, Dynamic, Dynamic>,
     {
-        let placeholder = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(Dynamic::new(0), Dynamic::new(0))
-        };
+        let placeholder =
+            Matrix::new_uninitialized_generic(Dynamic::new(0), Dynamic::new(0)).assume_init();
         let old = mem::replace(self, placeholder);
         let new = old.resize(new_nrows, new_ncols, val);
         let _ = mem::replace(self, new);
@@ -946,9 +962,7 @@ where
     where
         DefaultAllocator: Reallocator<T, Dynamic, C, Dynamic, C>,
     {
-        let placeholder = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(Dynamic::new(0), self.data.shape().1)
-        };
+        let placeholder = Matrix::from_fn_generic(Dynamic::new(0), self.data.shape().1, |_, _| val);
         let old = mem::replace(self, placeholder);
         let new = old.resize_vertically(new_nrows, val);
         let _ = mem::replace(self, new);
@@ -971,9 +985,7 @@ where
     where
         DefaultAllocator: Reallocator<T, R, Dynamic, R, Dynamic>,
     {
-        let placeholder = unsafe {
-            crate::unimplemented_or_uninitialized_generic!(self.data.shape().0, Dynamic::new(0))
-        };
+        let placeholder = Matrix::from_fn_generic(self.data.shape().0, Dynamic::new(0), |_, _| val);
         let old = mem::replace(self, placeholder);
         let new = old.resize_horizontally(new_ncols, val);
         let _ = mem::replace(self, new);
diff --git a/src/base/matrix.rs b/src/base/matrix.rs
index 7e8f79cc..51c8b945 100644
--- a/src/base/matrix.rs
+++ b/src/base/matrix.rs
@@ -29,7 +29,7 @@ use crate::base::storage::{
     ContiguousStorage, ContiguousStorageMut, Owned, SameShapeStorage, Storage, StorageMut,
 };
 use crate::base::{Const, DefaultAllocator, OMatrix, OVector, Scalar, Unit};
-use crate::{ArrayStorage, SMatrix, SimdComplexField};
+use crate::{ArrayStorage, MatrixSlice, MatrixSliceMut, SMatrix, SimdComplexField};
 
 #[cfg(any(feature = "std", feature = "alloc"))]
 use crate::{DMatrix, DVector, Dynamic, VecStorage};
@@ -347,16 +347,13 @@ impl<T, R, C, S> Matrix<T, R, C, S> {
     }
 }
 
-impl<T, R: Dim, C: Dim> OMatrix<MaybeUninit<T>, R, C>
+impl<T, R: Dim, C: Dim> OMatrix<T, R, C>
 where
     DefaultAllocator: Allocator<T, R, C>,
 {
     /// Allocates a matrix with the given number of rows and columns without initializing its content.
-    ///
-    /// Note: calling `Self::new_uninitialized_generic` is often **not** what you want to do. Consider
-    /// calling `Matrix::new_uninitialized_generic` instead.
-    pub fn new_uninitialized_generic(nrows: R, ncols: C) -> Self {
-        Self {
+    pub fn new_uninitialized_generic(nrows: R, ncols: C) -> OMatrix<MaybeUninit<T>, R, C> {
+        OMatrix {
             data: <DefaultAllocator as Allocator<T, R, C>>::allocate_uninitialized(nrows, ncols),
             _phantoms: PhantomData,
         }
@@ -376,6 +373,24 @@ where
     }
 }
 
+impl<T, R: Dim, C: Dim, S> Matrix<MaybeUninit<T>, R, C, S> {
+    /// Creates a full slice from `self` and assumes it to be initialized.
+    pub unsafe fn assume_init_ref(&self) -> MatrixSlice<T, R, C, S::RStride, S::CStride>
+    where
+        S: Storage<MaybeUninit<T>, R, C>,
+    {
+        self.full_slice().slice_assume_init()
+    }
+
+    /// Creates a full mutable slice from `self` and assumes it to be initialized.
+    pub unsafe fn assume_init_mut(&mut self) -> MatrixSliceMut<T, R, C, S::RStride, S::CStride>
+    where
+        S: StorageMut<MaybeUninit<T>, R, C>,
+    {
+        self.full_slice_mut().slice_assume_init()
+    }
+}
+
 impl<T, const R: usize, const C: usize> SMatrix<T, R, C> {
     /// Creates a new statically-allocated matrix from the given [ArrayStorage].
     ///
@@ -428,6 +443,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
 
     /// Creates a new uninitialized matrix with the given uninitialized data
     pub unsafe fn from_uninitialized_data(data: MaybeUninit<S>) -> MaybeUninit<Self> {
+        // BEEP BEEP this doesn't seem good
         let res: Matrix<T, R, C, MaybeUninit<S>> = Matrix {
             data,
             _phantoms: PhantomData,
@@ -493,6 +509,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
     /// let slice = mat.slice_with_steps((0, 0), (5, 3), (1, 2));
     /// // The column strides is the number of steps (here 2) multiplied by the corresponding dimension.
     /// assert_eq!(mat.strides(), (1, 10));
+    /// ```
     #[inline]
     #[must_use]
     pub fn strides(&self) -> (usize, usize) {
@@ -657,7 +674,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
             }
         }
 
-        unsafe { res.assume_init()}
+        unsafe { res.assume_init() }
     }
 
     /// Transposes `self` and store the result into `out`, which will become
@@ -815,7 +832,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
     {
         let (nrows, ncols) = self.data.shape();
 
-        let mut res = OMatrix::<N3, R, C>::new_uninitialized_generic(nrows, ncols);
+        let mut res = OMatrix::new_uninitialized_generic(nrows, ncols);
 
         assert_eq!(
             (nrows.value(), ncols.value()),
@@ -1201,13 +1218,25 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
         }
     }
 
-    /// Fills this matrix with the content of another one. Both must have the same shape.
+    /// Fills this matrix with the content of another one via clones. Both must have the same shape.
     #[inline]
     pub fn copy_from<R2: Dim, C2: Dim, SB>(&mut self, other: &Matrix<T, R2, C2, SB>)
     where
         T: Clone,
         SB: Storage<T, R2, C2>,
         ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+    {
+        self.copy_from_fn(other, T::clone)
+    }
+
+    /// Fills this matrix with the content of another one, after applying a function to
+    /// the references of the entries of the other matrix. Both must have the same shape.
+    #[inline]
+    pub fn copy_from_fn<U, R2: Dim, C2: Dim, SB, F>(&mut self, other: &Matrix<U, R2, C2, SB>, f: F)
+    where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+        F: FnMut(&U) -> T,
     {
         assert!(
             self.shape() == other.shape(),
@@ -1217,19 +1246,68 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
         for j in 0..self.ncols() {
             for i in 0..self.nrows() {
                 unsafe {
-                    *self.get_unchecked_mut((i, j)) = other.get_unchecked((i, j)).clone();
+                    *self.get_unchecked_mut((i, j)) = f(other.get_unchecked((i, j)));
                 }
             }
         }
     }
 
-    /// Fills this matrix with the content of the transpose another one.
+    /// Fills this matrix with the content of another one, after applying a function to
+    /// the entries of the other matrix. Both must have the same shape.
+    #[inline]
+    pub fn move_from<R2: Dim, C2: Dim, SB>(&mut self, other: Matrix<T, R2, C2, SB>)
+    where
+        SB: Storage<T, R2, C2>,
+        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+    {
+        self.move_from_fn(other, |e| e)
+    }
+
+    /// Fills this matrix with the content of another one via moves. Both must have the same shape.
+    #[inline]
+    pub fn move_from_fn<U, R2: Dim, C2: Dim, SB, F>(&mut self, other: Matrix<U, R2, C2, SB>, f: F)
+    where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
+        F: FnMut(U) -> T,
+    {
+        assert!(
+            self.shape() == other.shape(),
+            "Unable to move from a matrix with a different shape."
+        );
+
+        for j in 0..self.ncols() {
+            for i in 0..self.nrows() {
+                unsafe {
+                    *self.get_unchecked_mut((i, j)) = f(*other.get_unchecked((i, j)));
+                }
+            }
+        }
+    }
+
+    /// Fills this matrix with the content of the transpose another one via clones.
     #[inline]
     pub fn tr_copy_from<R2: Dim, C2: Dim, SB>(&mut self, other: &Matrix<T, R2, C2, SB>)
     where
         T: Clone,
         SB: Storage<T, R2, C2>,
         ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+    {
+        self.tr_copy_from_fn(other, T::clone)
+    }
+
+    /// Fills this matrix with the content of the transpose of another one, after applying
+    /// a function to the references of the entries of the other matrix. Both must have the
+    /// same shape.
+    #[inline]
+    pub fn tr_copy_from_fn<U, R2: Dim, C2: Dim, SB, F>(
+        &mut self,
+        other: &Matrix<U, R2, C2, SB>,
+        f: F,
+    ) where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+        F: FnMut(&U) -> T,
     {
         let (nrows, ncols) = self.shape();
         assert!(
@@ -1240,7 +1318,44 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
         for j in 0..ncols {
             for i in 0..nrows {
                 unsafe {
-                    *self.get_unchecked_mut((i, j)) = other.get_unchecked((j, i)).clone();
+                    *self.get_unchecked_mut((i, j)) = f(other.get_unchecked((j, i)));
+                }
+            }
+        }
+    }
+
+    /// Fills this matrix with the content of the transpose another one via moves.
+    #[inline]
+    pub fn tr_move_from<R2: Dim, C2: Dim, SB>(&mut self, other: Matrix<T, R2, C2, SB>)
+    where
+        SB: Storage<T, R2, C2>,
+        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+    {
+        self.tr_move_from_fn(other, |e| e)
+    }
+
+    /// Fills this matrix with the content of the transpose of another one, after applying
+    /// a function to the entries of the other matrix. Both must have the same shape.
+    #[inline]
+    pub fn tr_move_from_fn<U, R2: Dim, C2: Dim, SB, F>(
+        &mut self,
+        other: Matrix<U, R2, C2, SB>,
+        f: F,
+    ) where
+        SB: Storage<U, R2, C2>,
+        ShapeConstraint: DimEq<R, C2> + SameNumberOfColumns<C, R2>,
+        F: FnMut(U) -> T,
+    {
+        let (nrows, ncols) = self.shape();
+        assert!(
+            (ncols, nrows) == other.shape(),
+            "Unable to move from a matrix with incompatible shape."
+        );
+
+        for j in 0..ncols {
+            for i in 0..nrows {
+                unsafe {
+                    *self.get_unchecked_mut((i, j)) = f(*other.get_unchecked((j, i)));
                 }
             }
         }
@@ -1316,11 +1431,9 @@ impl<T, D: Dim, S: StorageMut<T, D, D>> Matrix<T, D, D, S> {
 impl<T: SimdComplexField, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
     /// Takes the adjoint (aka. conjugate-transpose) of `self` and store the result into `out`.
     #[inline]
-    pub fn adjoint_to<R2, C2, SB>(&self, out: &mut Matrix<MaybeUninit<T>, R2, C2, SB>)
+    pub fn adjoint_to<R2: Dim, C2: Dim, SB>(&self, out: &mut Matrix<MaybeUninit<T>, R2, C2, SB>)
     where
-        R2: Dim,
-        C2: Dim,
-        SB: StorageMut<T, R2, C2>,
+        SB: StorageMut<MaybeUninit<T>, R2, C2>,
         ShapeConstraint: SameNumberOfRows<R, C2> + SameNumberOfColumns<C, R2>,
     {
         let (nrows, ncols) = self.shape();
@@ -1348,23 +1461,20 @@ impl<T: SimdComplexField, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S
         DefaultAllocator: Allocator<T, C, R>,
     {
         let (nrows, ncols) = self.data.shape();
+        let mut res = OMatrix::new_uninitialized_generic(ncols, nrows);
+        self.adjoint_to(&mut res);
 
-        unsafe {
-            let mut res = OMatrix::new_uninitialized_generic(ncols, nrows);
-            self.adjoint_to(&mut res);
-
-            res
-        }
+        unsafe { res.assume_init() }
     }
 
     /// Takes the conjugate and transposes `self` and store the result into `out`.
     #[deprecated(note = "Renamed `self.adjoint_to(out)`.")]
     #[inline]
-    pub fn conjugate_transpose_to<R2, C2, SB>(&self, out: &mut Matrix<T, R2, C2, SB>)
-    where
-        R2: Dim,
-        C2: Dim,
-        SB: StorageMut<T, R2, C2>,
+    pub fn conjugate_transpose_to<R2: Dim, C2: Dim, SB>(
+        &self,
+        out: &mut Matrix<MaybeUninit<T>, R2, C2, SB>,
+    ) where
+        SB: StorageMut<MaybeUninit<T>, R2, C2>,
         ShapeConstraint: SameNumberOfRows<R, C2> + SameNumberOfColumns<C, R2>,
     {
         self.adjoint_to(out)
@@ -1495,7 +1605,7 @@ impl<T, D: Dim, S: Storage<T, D, D>> SquareMatrix<T, D, S> {
         );
 
         let dim = self.data.shape().0;
-        let mut res = OVector::<T2, D>::new_uninitialized_generic(dim, Const::<1>);
+        let mut res = OVector::new_uninitialized_generic(dim, Const::<1>);
 
         for i in 0..dim.value() {
             unsafe {
@@ -1505,7 +1615,7 @@ impl<T, D: Dim, S: Storage<T, D, D>> SquareMatrix<T, D, S> {
         }
 
         // Safety: we have initialized all entries.
-        unsafe { Matrix::assume_init(res) }
+        unsafe { res.assume_init() }
     }
 
     /// Computes a trace of a square matrix, i.e., the sum of its diagonal elements.
@@ -1630,13 +1740,12 @@ impl<T: Clone + Zero, D: DimAdd<U1>, S: Storage<T, D>> Vector<T, D, S> {
     {
         let len = self.len();
         let hnrows = DimSum::<D, U1>::from_usize(len + 1);
-        let mut res: OVector<T, _> =
-            unsafe { crate::unimplemented_or_uninitialized_generic!(hnrows, Const::<1>) };
+        let mut res = OVector::new_uninitialized_generic(hnrows, Const::<1>);
         res.generic_slice_mut((0, 0), self.data.shape())
-            .copy_from(self);
-        res[(len, 0)] = element;
+            .copy_from_fn(self, |e| MaybeUninit::new(e.clone()));
+        res[(len, 0)] = MaybeUninit::new(element);
 
-        res
+        unsafe { res.assume_init() }
     }
 }
 
@@ -1953,10 +2062,11 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
     /// dynamically-sized matrices and statically-sized 3D matrices.
     #[inline]
     #[must_use]
-    pub fn cross<R2, C2, SB>(&self, b: &Matrix<T, R2, C2, SB>) -> MatrixCross<T, R, C, R2, C2>
+    pub fn cross<R2: Dim, C2: Dim, SB>(
+        &self,
+        b: &Matrix<T, R2, C2, SB>,
+    ) -> MatrixCross<T, R, C, R2, C2>
     where
-        R2: Dim,
-        C2: Dim,
         SB: Storage<T, R2, C2>,
         DefaultAllocator: SameShapeAllocator<T, R, C, R2, C2>,
         ShapeConstraint: SameNumberOfRows<R, R2> + SameNumberOfColumns<C, C2>,
@@ -1974,8 +2084,7 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
                 // TODO: soooo ugly!
                 let nrows = SameShapeR::<R, R2>::from_usize(3);
                 let ncols = SameShapeC::<C, C2>::from_usize(1);
-                let mut res: MatrixCross<T, R, C, R2, C2> =
-                    crate::unimplemented_or_uninitialized_generic!(nrows, ncols);
+                let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
 
                 let ax = self.get_unchecked((0, 0));
                 let ay = self.get_unchecked((1, 0));
@@ -1985,22 +2094,27 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
                 let by = b.get_unchecked((1, 0));
                 let bz = b.get_unchecked((2, 0));
 
-                *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone()
-                    - az.inlined_clone() * by.inlined_clone();
-                *res.get_unchecked_mut((1, 0)) = az.inlined_clone() * bx.inlined_clone()
-                    - ax.inlined_clone() * bz.inlined_clone();
-                *res.get_unchecked_mut((2, 0)) = ax.inlined_clone() * by.inlined_clone()
-                    - ay.inlined_clone() * bx.inlined_clone();
+                *res.get_unchecked_mut((0, 0)) = MaybeUninit::new(
+                    ay.inlined_clone() * bz.inlined_clone()
+                        - az.inlined_clone() * by.inlined_clone(),
+                );
+                *res.get_unchecked_mut((1, 0)) = MaybeUninit::new(
+                    az.inlined_clone() * bx.inlined_clone()
+                        - ax.inlined_clone() * bz.inlined_clone(),
+                );
+                *res.get_unchecked_mut((2, 0)) = MaybeUninit::new(
+                    ax.inlined_clone() * by.inlined_clone()
+                        - ay.inlined_clone() * bx.inlined_clone(),
+                );
 
-                res
+                res.assume_init()
             }
         } else {
             unsafe {
                 // TODO: ugly!
                 let nrows = SameShapeR::<R, R2>::from_usize(1);
                 let ncols = SameShapeC::<C, C2>::from_usize(3);
-                let mut res: MatrixCross<T, R, C, R2, C2> =
-                    crate::unimplemented_or_uninitialized_generic!(nrows, ncols);
+                let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
 
                 let ax = self.get_unchecked((0, 0));
                 let ay = self.get_unchecked((0, 1));
@@ -2010,14 +2124,20 @@ impl<T: Scalar + ClosedAdd + ClosedSub + ClosedMul, R: Dim, C: Dim, S: Storage<T
                 let by = b.get_unchecked((0, 1));
                 let bz = b.get_unchecked((0, 2));
 
-                *res.get_unchecked_mut((0, 0)) = ay.inlined_clone() * bz.inlined_clone()
-                    - az.inlined_clone() * by.inlined_clone();
-                *res.get_unchecked_mut((0, 1)) = az.inlined_clone() * bx.inlined_clone()
-                    - ax.inlined_clone() * bz.inlined_clone();
-                *res.get_unchecked_mut((0, 2)) = ax.inlined_clone() * by.inlined_clone()
-                    - ay.inlined_clone() * bx.inlined_clone();
+                *res.get_unchecked_mut((0, 0)) = MaybeUninit::new(
+                    ay.inlined_clone() * bz.inlined_clone()
+                        - az.inlined_clone() * by.inlined_clone(),
+                );
+                *res.get_unchecked_mut((0, 1)) = MaybeUninit::new(
+                    az.inlined_clone() * bx.inlined_clone()
+                        - ax.inlined_clone() * bz.inlined_clone(),
+                );
+                *res.get_unchecked_mut((0, 2)) = MaybeUninit::new(
+                    ax.inlined_clone() * by.inlined_clone()
+                        - ay.inlined_clone() * bx.inlined_clone(),
+                );
 
-                res
+                res.assume_init()
             }
         }
     }
diff --git a/src/base/matrix_slice.rs b/src/base/matrix_slice.rs
index 5f6bfd6f..d8ccb44f 100644
--- a/src/base/matrix_slice.rs
+++ b/src/base/matrix_slice.rs
@@ -1,4 +1,5 @@
 use std::marker::PhantomData;
+use std::mem::MaybeUninit;
 use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo};
 use std::slice;
 
@@ -218,6 +219,22 @@ macro_rules! storage_impl(
 
 storage_impl!(SliceStorage, SliceStorageMut);
 
+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    SliceStorage<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn assume_init(self) -> SliceStorage<'a, T, R, C, RStride, CStride> {
+        Self::from_raw_parts(self.ptr as *const T, self.shape, self.strides)
+    }
+}
+
+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    SliceStorageMut<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn assume_init(self) -> SliceStorageMut<'a, T, R, C, RStride, CStride> {
+        Self::from_raw_parts(self.ptr as *mut T, self.shape, self.strides)
+    }
+}
+
 unsafe impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim> StorageMut<T, R, C>
     for SliceStorageMut<'a, T, R, C, RStride, CStride>
 {
@@ -242,10 +259,12 @@ unsafe impl<'a, T, R: Dim, CStride: Dim> ContiguousStorage<T, R, U1>
     for SliceStorage<'a, T, R, U1, U1, CStride>
 {
 }
+
 unsafe impl<'a, T, R: Dim, CStride: Dim> ContiguousStorage<T, R, U1>
     for SliceStorageMut<'a, T, R, U1, U1, CStride>
 {
 }
+
 unsafe impl<'a, T, R: Dim, CStride: Dim> ContiguousStorageMut<T, R, U1>
     for SliceStorageMut<'a, T, R, U1, U1, CStride>
 {
@@ -255,10 +274,12 @@ unsafe impl<'a, T, R: DimName, C: Dim + IsNotStaticOne> ContiguousStorage<T, R,
     for SliceStorage<'a, T, R, C, U1, R>
 {
 }
+
 unsafe impl<'a, T, R: DimName, C: Dim + IsNotStaticOne> ContiguousStorage<T, R, C>
     for SliceStorageMut<'a, T, R, C, U1, R>
 {
 }
+
 unsafe impl<'a, T, R: DimName, C: Dim + IsNotStaticOne> ContiguousStorageMut<T, R, C>
     for SliceStorageMut<'a, T, R, C, U1, R>
 {
@@ -312,6 +333,7 @@ macro_rules! matrix_slice_impl(
      $fixed_slice_with_steps: ident,
      $generic_slice: ident,
      $generic_slice_with_steps: ident,
+     $full_slice: ident,
      $rows_range_pair: ident,
      $columns_range_pair: ident) => {
         /*
@@ -370,7 +392,7 @@ macro_rules! matrix_slice_impl(
         pub fn $rows_generic<RSlice: Dim>($me: $Me, row_start: usize, nrows: RSlice)
             -> $MatrixSlice<T, RSlice, C, S::RStride, S::CStride> {
 
-            let my_shape   = $me.data.shape();
+            let my_shape  = $me.data.shape();
             $me.assert_slice_index((row_start, 0), (nrows.value(), my_shape.1.value()), (0, 0));
 
             let shape = (nrows, my_shape.1);
@@ -388,12 +410,12 @@ macro_rules! matrix_slice_impl(
             -> $MatrixSlice<T, RSlice, C, Dynamic, S::CStride>
             where RSlice: Dim {
 
-            let my_shape   = $me.data.shape();
+            let my_shape = $me.data.shape();
             let my_strides = $me.data.strides();
             $me.assert_slice_index((row_start, 0), (nrows.value(), my_shape.1.value()), (step, 0));
 
             let strides = (Dynamic::new((step + 1) * my_strides.0.value()), my_strides.1);
-            let shape   = (nrows, my_shape.1);
+            let shape = (nrows, my_shape.1);
 
             unsafe {
                 let data = $SliceStorage::new_with_strides_unchecked($data, (row_start, 0), shape, strides);
@@ -468,20 +490,19 @@ macro_rules! matrix_slice_impl(
             }
         }
 
-
         /// Extracts from this matrix `ncols` columns skipping `step` columns. Both argument may
         /// or may not be values known at compile-time.
         #[inline]
         pub fn $columns_generic_with_step<CSlice: Dim>($me: $Me, first_col: usize, ncols: CSlice, step: usize)
             -> $MatrixSlice<T, R, CSlice, S::RStride, Dynamic> {
 
-            let my_shape   = $me.data.shape();
+            let my_shape = $me.data.shape();
             let my_strides = $me.data.strides();
 
             $me.assert_slice_index((0, first_col), (my_shape.0.value(), ncols.value()), (0, step));
 
             let strides = (my_strides.0, Dynamic::new((step + 1) * my_strides.1.value()));
-            let shape   = (my_shape.0, ncols);
+            let shape = (my_shape.0, ncols);
 
             unsafe {
                 let data = $SliceStorage::new_with_strides_unchecked($data, (0, first_col), shape, strides);
@@ -509,7 +530,6 @@ macro_rules! matrix_slice_impl(
             }
         }
 
-
         /// Slices this matrix starting at its component `(start.0, start.1)` and with
         /// `(shape.0, shape.1)` components. Each row (resp. column) of the sliced matrix is
         /// separated by `steps.0` (resp. `steps.1`) ignored rows (resp. columns) of the
@@ -550,11 +570,9 @@ macro_rules! matrix_slice_impl(
 
         /// Creates a slice that may or may not have a fixed size and stride.
         #[inline]
-        pub fn $generic_slice<RSlice, CSlice>($me: $Me, start: (usize, usize), shape: (RSlice, CSlice))
+        pub fn $generic_slice<RSlice: Dim, CSlice: Dim>($me: $Me, start: (usize, usize), shape: (RSlice, CSlice))
             -> $MatrixSlice<T, RSlice, CSlice, S::RStride, S::CStride>
-            where RSlice: Dim,
-                  CSlice: Dim {
-
+        {
             $me.assert_slice_index(start, (shape.0.value(), shape.1.value()), (0, 0));
 
             unsafe {
@@ -585,6 +603,12 @@ macro_rules! matrix_slice_impl(
             }
         }
 
+        /// Returns a slice containing the entire matrix.
+        pub fn $full_slice($me: $Me) -> $MatrixSlice<T, R, C, S::RStride, S::CStride> {
+            let (nrows, ncols) = $me.shape();
+            $me.generic_slice((0, 0), (R::from_usize(nrows), C::from_usize(ncols)))
+        }
+
         /*
          *
          * Splitting.
@@ -697,6 +721,7 @@ impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
      fixed_slice_with_steps,
      generic_slice,
      generic_slice_with_steps,
+     full_slice,
      rows_range_pair,
      columns_range_pair);
 }
@@ -727,10 +752,27 @@ impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> Matrix<T, R, C, S> {
      fixed_slice_with_steps_mut,
      generic_slice_mut,
      generic_slice_with_steps_mut,
+     full_slice_mut,
      rows_range_pair_mut,
      columns_range_pair_mut);
 }
 
+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    MatrixSlice<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn slice_assume_init(self) -> MatrixSlice<'a, T, R, C, RStride, CStride> {
+        Matrix::from_data(self.data.assume_init())
+    }
+}
+
+impl<'a, T, R: Dim, C: Dim, RStride: Dim, CStride: Dim>
+    MatrixSliceMut<'a, MaybeUninit<T>, R, C, RStride, CStride>
+{
+    pub unsafe fn slice_assume_init(self) -> MatrixSliceMut<'a, T, R, C, RStride, CStride> {
+        Matrix::from_data(self.data.assume_init())
+    }
+}
+
 /// A range with a size that may be known at compile-time.
 ///
 /// This may be:
diff --git a/src/base/ops.rs b/src/base/ops.rs
index 8da0249f..44b1c7c5 100644
--- a/src/base/ops.rs
+++ b/src/base/ops.rs
@@ -24,7 +24,7 @@ use crate::SimdComplexField;
  * Indexing.
  *
  */
-impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Index<usize> for Matrix<T, R, C, S> {
+impl<T, R: Dim, C: Dim, S: Storage<T, R, C>> Index<usize> for Matrix<T, R, C, S> {
     type Output = T;
 
     #[inline]
@@ -36,7 +36,6 @@ impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Index<usize> for Matrix<T,
 
 impl<T, R: Dim, C: Dim, S> Index<(usize, usize)> for Matrix<T, R, C, S>
 where
-    T: Scalar,
     S: Storage<T, R, C>,
 {
     type Output = T;
@@ -54,7 +53,7 @@ where
 }
 
 // Mutable versions.
-impl<T: Scalar, R: Dim, C: Dim, S: StorageMut<T, R, C>> IndexMut<usize> for Matrix<T, R, C, S> {
+impl<T, R: Dim, C: Dim, S: StorageMut<T, R, C>> IndexMut<usize> for Matrix<T, R, C, S> {
     #[inline]
     fn index_mut(&mut self, i: usize) -> &mut T {
         let ij = self.vector_to_matrix_index(i);
@@ -64,7 +63,6 @@ impl<T: Scalar, R: Dim, C: Dim, S: StorageMut<T, R, C>> IndexMut<usize> for Matr
 
 impl<T, R: Dim, C: Dim, S> IndexMut<(usize, usize)> for Matrix<T, R, C, S>
 where
-    T: Scalar,
     S: StorageMut<T, R, C>,
 {
     #[inline]
@@ -139,15 +137,15 @@ macro_rules! componentwise_binop_impl(
      $TraitAssign: ident, $method_assign: ident, $method_assign_statically_unchecked: ident,
      $method_assign_statically_unchecked_rhs: ident;
      $method_to: ident, $method_to_statically_unchecked: ident) => {
-
         impl<T, R1: Dim, C1: Dim, SA: Storage<T, R1, C1>> Matrix<T, R1, C1, SA>
-            where T: Scalar + $bound {
-
+        where
+            T: Scalar + $bound
+        {
             /*
              *
              * Methods without dimension checking at compile-time.
-             * This is useful for code reuse because the sum representative system does not plays
-             * easily with static checks.
+             * This is useful for code reuse because the sum representative system does not play
+             * nicely with static checks.
              *
              */
             #[inline]
@@ -155,7 +153,7 @@ macro_rules! componentwise_binop_impl(
                 &self, rhs: &Matrix<T, R2, C2, SB>, out: &mut Matrix<MaybeUninit<T>, R3, C3, SC>
             ) where
                 SB: Storage<T, R2, C2>,
-                SC: StorageMut<T, R3, C3> + StorageMut<MaybeUninit<T>, R3, C3>
+                SC: StorageMut<MaybeUninit<T>, R3, C3>
             {
                 assert_eq!(self.shape(), rhs.shape(), "Matrix addition/subtraction dimensions mismatch.");
                 assert_eq!(self.shape(), out.shape(), "Matrix addition/subtraction output dimensions mismatch.");
@@ -184,13 +182,13 @@ macro_rules! componentwise_binop_impl(
                 }
             }
 
-
             #[inline]
-            fn $method_assign_statically_unchecked<R2, C2, SB>(&mut self, rhs: &Matrix<T, R2, C2, SB>)
-                where R2: Dim,
-                      C2: Dim,
-                      SA: StorageMut<T, R1, C1>,
-                      SB: Storage<T, R2, C2> {
+            fn $method_assign_statically_unchecked<R2: Dim, C2: Dim, SB>(
+                &mut self, rhs: &Matrix<T, R2, C2, SB>
+            ) where
+                SA: StorageMut<T, R1, C1>,
+                SB: Storage<T, R2, C2>
+            {
                 assert_eq!(self.shape(), rhs.shape(), "Matrix addition/subtraction dimensions mismatch.");
 
                 // This is the most common case and should be deduced at compile-time.
@@ -213,12 +211,12 @@ macro_rules! componentwise_binop_impl(
                 }
             }
 
-
             #[inline]
-            fn $method_assign_statically_unchecked_rhs<R2, C2, SB>(&self, rhs: &mut Matrix<T, R2, C2, SB>)
-                where R2: Dim,
-                      C2: Dim,
-                      SB: StorageMut<T, R2, C2> {
+            fn $method_assign_statically_unchecked_rhs<R2: Dim, C2: Dim, SB>(
+                &self, rhs: &mut Matrix<T, R2, C2, SB>
+            ) where
+                SB: StorageMut<T, R2, C2>
+            {
                 assert_eq!(self.shape(), rhs.shape(), "Matrix addition/subtraction dimensions mismatch.");
 
                 // This is the most common case and should be deduced at compile-time.
@@ -253,14 +251,19 @@ macro_rules! componentwise_binop_impl(
              */
             /// Equivalent to `self + rhs` but stores the result into `out` to avoid allocations.
             #[inline]
-            pub fn $method_to<R2: Dim, C2: Dim, SB,
-                              R3: Dim, C3: Dim, SC>(&self,
-                                                    rhs: &Matrix<T, R2, C2, SB>,
-                                                    out: &mut Matrix<T, R3, C3, SC>)
-                where SB: Storage<T, R2, C2>,
-                      SC: StorageMut<T, R3, C3>,
-                      ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> +
-                                       SameNumberOfRows<R1, R3> + SameNumberOfColumns<C1, C3> {
+            pub fn $method_to<R2: Dim, C2: Dim, SB, R3: Dim, C3: Dim, SC>(
+                &self,
+                rhs: &Matrix<T, R2, C2, SB>,
+                out: &mut Matrix<MaybeUninit<T>, R3, C3, SC>
+            ) where
+                SB: Storage<T, R2, C2>,
+                SC: StorageMut<MaybeUninit<T>, R3, C3>,
+                ShapeConstraint:
+                    SameNumberOfRows<R1, R2> +
+                    SameNumberOfColumns<C1, C2> +
+                    SameNumberOfRows<R1, R3> +
+                    SameNumberOfColumns<C1, C3>
+            {
                 self.$method_to_statically_unchecked(rhs, out)
             }
         }
@@ -283,13 +286,14 @@ macro_rules! componentwise_binop_impl(
             }
         }
 
-        impl<'a, T, R1, C1, R2, C2, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: Storage<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  DefaultAllocator: SameShapeAllocator<T, R2, C2, R1, C1>,
-                  ShapeConstraint:  SameNumberOfRows<R2, R1> + SameNumberOfColumns<C2, C1> {
+        impl<'a, T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: Storage<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            DefaultAllocator: SameShapeAllocator<T, R2, C2, R1, C1>,
+            ShapeConstraint:  SameNumberOfRows<R2, R1> + SameNumberOfColumns<C2, C1>
+        {
             type Output = MatrixSum<T, R2, C2, R1, C1>;
 
             #[inline]
@@ -301,13 +305,14 @@ macro_rules! componentwise_binop_impl(
             }
         }
 
-        impl<T, R1, C1, R2, C2, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: Storage<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
-                  ShapeConstraint:  SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
+        impl<T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $Trait<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: Storage<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
+            ShapeConstraint:  SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
             type Output = MatrixSum<T, R1, C1, R2, C2>;
 
             #[inline]
@@ -316,49 +321,48 @@ macro_rules! componentwise_binop_impl(
             }
         }
 
-        impl<'a, 'b, T, R1, C1, R2, C2, SA, SB> $Trait<&'b Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: Storage<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
-                  ShapeConstraint:  SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
+        impl<'a, 'b, T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $Trait<&'b Matrix<T, R2, C2, SB>> for &'a Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: Storage<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            DefaultAllocator: SameShapeAllocator<T, R1, C1, R2, C2>,
+            ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
             type Output = MatrixSum<T, R1, C1, R2, C2>;
 
             #[inline]
             fn $method(self, rhs: &'b Matrix<T, R2, C2, SB>) -> Self::Output {
-                let mut res = unsafe {
-                    let (nrows, ncols) = self.shape();
-                    let nrows: SameShapeR<R1, R2> = Dim::from_usize(nrows);
-                    let ncols: SameShapeC<C1, C2> = Dim::from_usize(ncols);
-                    crate::unimplemented_or_uninitialized_generic!(nrows, ncols)
-                };
+                let (nrows, ncols) = self.shape();
+                let nrows: SameShapeR<R1, R2> = Dim::from_usize(nrows);
+                let ncols: SameShapeC<C1, C2> = Dim::from_usize(ncols);
+                let mut res = Matrix::new_uninitialized_generic(nrows, ncols);
 
                 self.$method_to_statically_unchecked(rhs, &mut res);
-                res
+                unsafe { res.assume_init() }
             }
         }
 
-        impl<'b, T, R1, C1, R2, C2, SA, SB> $TraitAssign<&'b Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: StorageMut<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
-
+        impl<'b, T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $TraitAssign<&'b Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: StorageMut<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
             #[inline]
             fn $method_assign(&mut self, rhs: &'b Matrix<T, R2, C2, SB>) {
                 self.$method_assign_statically_unchecked(rhs)
             }
         }
 
-        impl<T, R1, C1, R2, C2, SA, SB> $TraitAssign<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
-            where R1: Dim, C1: Dim, R2: Dim, C2: Dim,
-                  T: Scalar + $bound,
-                  SA: StorageMut<T, R1, C1>,
-                  SB: Storage<T, R2, C2>,
-                  ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2> {
-
+        impl<T, R1: Dim, C1: Dim, R2: Dim, C2: Dim, SA, SB> $TraitAssign<Matrix<T, R2, C2, SB>> for Matrix<T, R1, C1, SA>
+        where
+            T: Scalar + $bound,
+            SA: StorageMut<T, R1, C1>,
+            SB: Storage<T, R2, C2>,
+            ShapeConstraint: SameNumberOfRows<R1, R2> + SameNumberOfColumns<C1, C2>
+        {
             #[inline]
             fn $method_assign(&mut self, rhs: Matrix<T, R2, C2, SB>) {
                 self.$method_assign(&rhs)
@@ -576,9 +580,9 @@ where
 
     #[inline]
     fn mul(self, rhs: &'b Matrix<T, R2, C2, SB>) -> Self::Output {
-        let mut res =Matrix::new_uninitialized_generic(self.data.shape().0, rhs.data.shape().1);
-                self.mul_to(rhs, &mut res);
-      unsafe{  res.assume_init()}
+        let mut res = Matrix::new_uninitialized_generic(self.data.shape().0, rhs.data.shape().1);
+        self.mul_to(rhs, &mut res);
+        unsafe { res.assume_init() }
     }
 }
 
@@ -636,11 +640,8 @@ where
 // TODO: this is too restrictive:
 //    − we can't use `a *= b` when `a` is a mutable slice.
 //    − we can't use `a *= b` when C2 is not equal to C1.
-impl<T, R1, C1, R2, SA, SB> MulAssign<Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
+impl<T, R1: Dim, C1: Dim, R2: Dim, SA, SB> MulAssign<Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
 where
-    R1: Dim,
-    C1: Dim,
-    R2: Dim,
     T: Scalar + Zero + One + ClosedAdd + ClosedMul,
     SB: Storage<T, R2, C1>,
     SA: ContiguousStorageMut<T, R1, C1> + Clone,
@@ -653,11 +654,8 @@ where
     }
 }
 
-impl<'b, T, R1, C1, R2, SA, SB> MulAssign<&'b Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
+impl<'b, T, R1: Dim, C1: Dim, R2: Dim, SA, SB> MulAssign<&'b Matrix<T, R2, C1, SB>> for Matrix<T, R1, C1, SA>
 where
-    R1: Dim,
-    C1: Dim,
-    R2: Dim,
     T: Scalar + Zero + One + ClosedAdd + ClosedMul,
     SB: Storage<T, R2, C1>,
     SA: ContiguousStorageMut<T, R1, C1> + Clone,
@@ -697,7 +695,7 @@ where
     pub fn ad_mul<R2: Dim, C2: Dim, SB>(&self, rhs: &Matrix<T, R2, C2, SB>) -> OMatrix<T, C1, C2>
     where
         T: SimdComplexField,
-        SB: Storage<MaybeUninit<T>, R2, C2>,
+        SB: Storage<T, R2, C2>,
         DefaultAllocator: Allocator<T, C1, C2>,
         ShapeConstraint: SameNumberOfRows<R1, R2>,
     {
@@ -746,7 +744,9 @@ where
         for i in 0..ncols1 {
             for j in 0..ncols2 {
                 let dot = dot(&self.column(i), &rhs.column(j));
-                unsafe { *out.get_unchecked_mut((i, j)) = MaybeUninit::new(dot) ;}
+                unsafe {
+                    *out.get_unchecked_mut((i, j)) = MaybeUninit::new(dot);
+                }
             }
         }
     }
@@ -786,16 +786,16 @@ where
     #[inline]
     pub fn mul_to<R2: Dim, C2: Dim, SB, R3: Dim, C3: Dim, SC>(
         &self,
-        rhs: &Matrix<MaybeUninit<T>, R2, C2, SB>,
-        out: &mut Matrix<T, R3, C3, SC>,
+        rhs: &Matrix<T, R2, C2, SB>,
+        out: &mut Matrix<MaybeUninit<T>, R3, C3, SC>,
     ) where
         SB: Storage<T, R2, C2>,
-        SC: StorageMut<T, R3, C3>,
+        SC: StorageMut<MaybeUninit<T>, R3, C3>,
         ShapeConstraint: SameNumberOfRows<R3, R1>
             + SameNumberOfColumns<C3, C2>
             + AreMultipliable<R1, C1, R2, C2>,
     {
-        out.gemm(T::one(), self, rhs, T::zero());
+        out.gemm_z(T::one(), self, rhs);
     }
 
     /// The kronecker product of two matrices (aka. tensor product of the corresponding linear
diff --git a/src/base/statistics.rs b/src/base/statistics.rs
index 59d78482..23ab524e 100644
--- a/src/base/statistics.rs
+++ b/src/base/statistics.rs
@@ -1,3 +1,5 @@
+use std::mem::MaybeUninit;
+
 use crate::allocator::Allocator;
 use crate::storage::Storage;
 use crate::{Const, DefaultAllocator, Dim, Matrix, OVector, RowOVector, Scalar, VectorSlice, U1};
@@ -18,13 +20,12 @@ impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
         DefaultAllocator: Allocator<T, U1, C>,
     {
         let ncols = self.data.shape().1;
-        let mut res: RowOVector<T, C> =
-            unsafe { crate::unimplemented_or_uninitialized_generic!(Const::<1>, ncols) };
+        let mut res = RowOVector::new_uninitialized_generic(Const::<1>, ncols);
 
         for i in 0..ncols.value() {
             // TODO: avoid bound checking of column.
             unsafe {
-                *res.get_unchecked_mut((0, i)) = f(self.column(i));
+                *res.get_unchecked_mut((0, i)) =MaybeUninit::new( f(self.column(i)));
             }
         }
 
@@ -45,17 +46,16 @@ impl<T: Scalar, R: Dim, C: Dim, S: Storage<T, R, C>> Matrix<T, R, C, S> {
         DefaultAllocator: Allocator<T, C>,
     {
         let ncols = self.data.shape().1;
-        let mut res: OVector<T, C> =
-            unsafe { crate::unimplemented_or_uninitialized_generic!(ncols, Const::<1>) };
+        let mut res = Matrix::new_uninitialized_generic(ncols, Const::<1>);
 
         for i in 0..ncols.value() {
             // TODO: avoid bound checking of column.
             unsafe {
-                *res.vget_unchecked_mut(i) = f(self.column(i));
+                *res.vget_unchecked_mut(i) = MaybeUninit::new(f(self.column(i)));
             }
         }
 
-        res
+        unsafe { res.assume_init() }
     }
 
     /// Returns a column vector resulting from the folding of `f` on each column of this matrix.
diff --git a/src/linalg/permutation_sequence.rs b/src/linalg/permutation_sequence.rs
index ea868b5a..a088c458 100644
--- a/src/linalg/permutation_sequence.rs
+++ b/src/linalg/permutation_sequence.rs
@@ -1,3 +1,5 @@
+use std::mem::MaybeUninit;
+
 #[cfg(feature = "serde-serialize-no-std")]
 use serde::{Deserialize, Serialize};
 
@@ -8,7 +10,7 @@ use crate::allocator::Allocator;
 use crate::base::{DefaultAllocator, Matrix, OVector, Scalar};
 #[cfg(any(feature = "std", feature = "alloc"))]
 use crate::dimension::Dynamic;
-use crate::dimension::{Const, Dim, DimName};
+use crate::dimension::{ Dim, DimName};
 use crate::storage::StorageMut;
 
 /// A sequence of row or column permutations.
@@ -29,13 +31,13 @@ where
     DefaultAllocator: Allocator<(usize, usize), D>,
 {
     len: usize,
-    ipiv: OVector<(usize, usize), D>,
+    ipiv: OVector<MaybeUninit<(usize, usize)>, D>,
 }
 
 impl<D: Dim> Copy for PermutationSequence<D>
 where
     DefaultAllocator: Allocator<(usize, usize), D>,
-    OVector<(usize, usize), D>: Copy,
+    OVector<MaybeUninit<(usize, usize)>, D>: Copy,
 {
 }
 
@@ -72,7 +74,7 @@ where
         unsafe {
             Self {
                 len: 0,
-                ipiv: crate::unimplemented_or_uninitialized_generic!(dim, Const::<1>),
+                ipiv: OVector::new_uninitialized(dim),
             }
         }
     }
@@ -97,7 +99,7 @@ where
     where
         S2: StorageMut<T, R2, C2>,
     {
-        for i in self.ipiv.rows_range(..self.len).iter() {
+        for i in self.ipiv.rows_range(..self.len).iter().map(MaybeUninit::assume_init) {
             rhs.swap_rows(i.0, i.1)
         }
     }