From cc2a70664da7523f2f529272f6831ce6399a33e4 Mon Sep 17 00:00:00 2001 From: sebcrozet Date: Sun, 9 Dec 2018 11:21:15 +0100 Subject: [PATCH] Add statistics functions: sum, variance, mean. --- src/base/statistics.rs | 163 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 src/base/statistics.rs diff --git a/src/base/statistics.rs b/src/base/statistics.rs new file mode 100644 index 00000000..8f95e447 --- /dev/null +++ b/src/base/statistics.rs @@ -0,0 +1,163 @@ +use ::{Real, Dim, Matrix, VectorN, RowVectorN, DefaultAllocator, U1, VectorSliceN}; +use storage::Storage; +use allocator::Allocator; + +impl> Matrix { + #[inline] + pub fn compress_rows(&self, f: impl Fn(VectorSliceN) -> N) -> RowVectorN + where DefaultAllocator: Allocator { + + let ncols = self.data.shape().1; + let mut res = unsafe { RowVectorN::new_uninitialized_generic(U1, ncols) }; + + for i in 0..ncols.value() { + // FIXME: avoid bound checking of column. + unsafe { *res.get_unchecked_mut(0, i) = f(self.column(i)); } + } + + res + } + + #[inline] + pub fn compress_rows_tr(&self, f: impl Fn(VectorSliceN) -> N) -> VectorN + where DefaultAllocator: Allocator { + + let ncols = self.data.shape().1; + let mut res = unsafe { VectorN::new_uninitialized_generic(ncols, U1) }; + + for i in 0..ncols.value() { + // FIXME: avoid bound checking of column. + unsafe { *res.vget_unchecked_mut(i) = f(self.column(i)); } + } + + res + } + + #[inline] + pub fn compress_columns(&self, init: VectorN, f: impl Fn(&mut VectorN, VectorSliceN)) -> VectorN + where DefaultAllocator: Allocator { + let mut res = init; + + for i in 0..self.ncols() { + f(&mut res, self.column(i)) + } + + res + } +} + +impl> Matrix { + /* + * + * Sum computation. + * + */ + #[inline] + pub fn sum(&self) -> N { + self.iter().cloned().fold(N::zero(), |a, b| a + b) + } + + #[inline] + pub fn row_sum(&self) -> RowVectorN + where DefaultAllocator: Allocator { + self.compress_rows(|col| col.sum()) + } + + #[inline] + pub fn row_sum_tr(&self) -> VectorN + where DefaultAllocator: Allocator { + self.compress_rows_tr(|col| col.sum()) + } + + #[inline] + pub fn column_sum(&self) -> VectorN + where DefaultAllocator: Allocator { + let nrows = self.data.shape().0; + self.compress_columns(VectorN::zeros_generic(nrows, U1), |out, col| { + out.axpy(N::one(), &col, N::one()) + }) + } + + /* + * + * Variance computation. + * + */ + #[inline] + pub fn variance(&self) -> N { + if self.len() == 0 { + N::zero() + } else { + let val = self.iter().cloned().fold((N::zero(), N::zero()), |a, b| (a.0 + b * b, a.1 + b)); + let denom = N::one() / ::convert::<_, N>(self.len() as f64); + val.0 * denom - (val.1 * denom) * (val.1 * denom) + } + } + + #[inline] + pub fn row_variance(&self) -> RowVectorN + where DefaultAllocator: Allocator { + self.compress_rows(|col| col.variance()) + } + + #[inline] + pub fn row_variance_tr(&self) -> VectorN + where DefaultAllocator: Allocator { + self.compress_rows_tr(|col| col.variance()) + } + + #[inline] + pub fn column_variance(&self) -> VectorN + where DefaultAllocator: Allocator { + let (nrows, ncols) = self.data.shape(); + + let mut mean = self.column_mean(); + mean.apply(|e| -(e * e)); + + let denom = N::one() / ::convert::<_, N>(ncols.value() as f64); + self.compress_columns(mean, |out, col| { + for i in 0..nrows.value() { + unsafe { + let val = col.vget_unchecked(i); + *out.vget_unchecked_mut(i) += denom * *val * *val + } + } + }) + } + + /* + * + * Mean computation. + * + */ + #[inline] + pub fn mean(&self) -> N { + if self.len() == 0 { + N::zero() + } else { + self.sum() / ::convert(self.len() as f64) + } + } + + #[inline] + pub fn row_mean(&self) -> RowVectorN + where DefaultAllocator: Allocator { + self.compress_rows(|col| col.mean()) + } + + #[inline] + pub fn row_mean_tr(&self) -> VectorN + where DefaultAllocator: Allocator { + self.compress_rows_tr(|col| col.mean()) + } + + #[inline] + pub fn column_mean(&self) -> VectorN + where DefaultAllocator: Allocator { + let (nrows, ncols) = self.data.shape(); + let denom = N::one() / ::convert::<_, N>(ncols.value() as f64); + self.compress_columns(VectorN::zeros_generic(nrows, U1), |out, col| { + out.axpy(denom, &col, N::one()) + }) + } +} \ No newline at end of file