From 39f6cde39a11dadf5386d6a316d6669c0ad951d4 Mon Sep 17 00:00:00 2001 From: L <457124+liborty@users.noreply.github.com> Date: Thu, 25 Apr 2024 09:53:51 +1000 Subject: [PATCH] 2.1.0 --- Cargo.toml | 2 +- README.md | 49 +++++++++++-------- src/lib.rs | 44 ++++++++--------- src/vecvec.rs | 62 ++++++++++++++++++++++++ src/vecvecg.rs | 126 +++++++++++++------------------------------------ tests/tests.rs | 6 +-- 6 files changed, 150 insertions(+), 139 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 78e7c52..7749ff9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rstats" -version = "2.0.12" +version = "2.1.0" authors = ["Libor Spacek"] edition = "2021" description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.." diff --git a/README.md b/README.md index d808262..84bd8cd 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ use Rstats::{Stats,Vecg,Vecu8,MutVecg,VecVec,VecVecg}; and any of the following auxiliary functions: ```rust -use Rstats::{noop,fromop,sumn,t_stat,unit_matrix,re_error}; +use Rstats::{asop,fromop,sumn,t_stat,unit_matrix,re_error}; ``` The latest (nightly) version is always available in the github repository [Rstats](https://github.com/liborty/Rstats). Sometimes it may be (only in some details) a little ahead of the `crates.io` release versions. @@ -148,13 +148,13 @@ The main constituent parts of Rstats are its traits. The different traits are de * `Vecu8`: some methods specialized for end-type `u8`, * `MutVecg`: some of the above methods, mutating self, * `VecVec`: methods operating on n vectors (rows of numbers), -* `VecVecg`: methods for n vectors, plus another generic argument, e.g. a vector of weights. +* `VecVecg`: methods for n vectors, plus another generic argument, typically a vector of n weights, expressing the relative significance of the vectors. The traits and their methods operate on arguments of their required categories. In classical statistical parlance, the main categories correspond to the number of 'random variables'. **`Vec>`** type is used for rectangular matrices (could also have irregular rows). -**`struct TriangMat`** is used for symmetric / antisymmetric / transposed / triangular matrices and wedge and geometric products. All instances of `TriangMat` store only `n*(n+1)/2` items in a single flat vector, instead of `n*n`, thus almost halving the memory requirements. Their transposed versions only set up a flag `kind >=3` that is interpreted by software, instead of unnecessarily rewriting the whole matrix. Thus saving some processing as well. All this is put to a good use in our implementation of the matrix decomposition methods. +**`struct TriangMat`** is used for symmetric / antisymmetric / transposed / triangular matrices and wedge and geometric products. All instances of `TriangMat` store only `n*(n+1)/2` items in a single flat vector, instead of `n*n`, thus almost halving the memory requirements. Their transposed versions only set up a flag `kind >=3` that is interpreted by software, instead of unnecessarily rewriting the whole matrix. Thus saving processing of all transposes (a common operation). All this is put to a good use in our implementation of the matrix decomposition methods. The vectors' end types (of the actual data) are mostly generic: usually some numeric type. `Copy` trait bounds on these generic input types have been relaxed to `Clone`, to allow cloning user's own end data types in any way desired. There is no difference for primitive types. @@ -193,7 +193,7 @@ if dif <= 0_f64 { pub type RE = RError; ``` -Convenience function `re_error` is used to simplify constructing and returning these errors. Its message argument can be either a literal `&str`, or a `String`, e.g. constructed by `format!`. It returns a Result, thus it needs `?` operator to unwrap its `Err` variant. +Convenience function `re_error` can be used to simplify constructing and returning these errors. Its message argument can be either literal `&str`, or `String` (e.g. constructed by `format!`). It returns a Result, thus it needs `?` operator to unwrap its `Err` variant. ```rust if dif <= 0_f64 { @@ -211,41 +211,48 @@ holds the central tendency of `1d` data, e.g. some kind of mean or median, and i holds triangular matrices of all kinds, as described in Implementation section above. Beyond the usual conversion to full matrix form, a number of (the best) Linear Algebra methods are implemented directly on `TriangMat`, in module `triangmat.rs`, such as: -* **Cholesky-Banachiewicz** matrix decomposition: `S = LL'` (where ' denotes the transpose). This decomposition is used by `mahalanobis`, `variances`, etc. -* **Mahalanobis Distance** -* **Householder UR** (M = QR) matrix decomposition +* **Cholesky-Banachiewicz** matrix decomposition: `S = LL'` (where ' denotes the transpose). This decomposition is used by `mahalanobis`, `eigenvalues`, etc. +* **Eigenvectors, eigenvalues, determinant** +* **Mahalanobis Distance** for ML recognition tasks +* **Householder UR** (M = QR) more general matrix decomposition -Some methods implemented for `VecVecg` also produce `TriangMat` matrices, specifically the covariance/comedience calculations: `covar` and `wcovar`. Their results are positive definite, which makes the most efficient Cholesky-Banachiewicz decomposition applicable. +Some methods, specifically the covariance/comedience calculations in `VecVec` and `VecVecg` return `TriangMat` matrices. These are positive definite, which makes the most efficient Cholesky-Banachiewicz decomposition applicable to them. ## Quantify Functions (Dependency Injection) -Most methods in `medians` and some in `indxvec` crates, e.g. `hashort`, `find_any` and `find_all`, require explicit closure passed to them, usually to tell them how to quantify input data of any type T, into f64. Variety of different quantifying methods can then be dynamically employed. +Most methods in `medians` and some in `indxvec` crates, e.g. `find_any` and `find_all`, require explicit closure passed to them, usually to tell them how to quantify input data of any type T into f64. Variety of different quantifying methods can then be dynamically employed. -For example, in text analysis (`&str` type), it can be the word length, or the numerical value of its first few bytes, or the numerical value of its consonants, etc. Then we can sort them or find their means / medians / spreads under these different measures. We do not necessarily want to explicitly store all such quantifications, as data can be voluminous. Rather, we want to be able to compute any of them on custom demand. +For example, in text analysis (`&str` end type), it can be the word length, or the numerical value of its first few letters, or the numerical value of its consonants, etc. Then we can sort them or find their means / medians / spreads under all these different measures. We do not necessarily want to explicitly store all such different quantifications values, as input data can be voluminous. It is preferable to be able to compute any of them on demand, using these closure arguments. -When the data is already of the required end-type, just use the 'dummy' closure `|f| *f` +When data is already of the required end-type, use the 'dummy' closure: -### `asop` +```rust +|&f| f +``` -When T is a wide primitive type, such as i64, u64, usize, that can only be converted to f64 by explicit truncation, we can use (with some loss of accuracy): +When T is a primitive type, such as i64, u64, usize, that can be converted to f64, possibly with some loss of accuracy, use: ```rust -|f:&T| *f as f64 +|&f| f as f64 ``` ### `fromop` -When T is a narrow numeric type, or T is convertible by an existing `From` implementation, and `f64:From, T:Clone` have been duly added everywhere as trait bounds, then we can pass in: +When T is convertible by an existing custom `From` implementation (and `f64:From, T:Clone` have been duly added everywhere as trait bounds), then simply pass in `fromop`, defined as: ```rust -fromop -|f:&T| (*f).clone().into() +/// Convenience From quantification invocation +pub fn fromop>(f: &T) -> f64 { + f.clone().into() +}| ``` -The last case previously required manual implementations written for the (global) `From` trait for each type and each different quantification method, whereby the different implementations of `From` would conflict with each other. Now the user can simply implement all custom quantifications within the closures. This generality is obtained at the price of a small inconvenience: using the above signature closures even in simple cases. +The remaining general cases previously required new manual implementations to be written for the (global) `From` trait for each new type and for each different quantification method, and adding the trait bounds everywhere. Even then, the different implementations of `From` would conflict with each other. Now we can simply implement all the custom quantifications within the closures. This generality is obtained at the price of a small inconvenience: having to supply one of the above closures argument for the primitive types as well. ## Auxiliary Functions +* `fromop`: see above. + * `sumn`: the sum of the sequence `1..n = n*(n+1)/2`. It is also the size of a lower/upper triangular matrix. * `t_stat`: of a value x: (x-centre)/spread. In one dimension. @@ -272,7 +279,7 @@ Included in this trait are: * linear transformation to [0,1], * other measures and basic vector algebra operators -Note that a fast implementation of 1d 'classic' medians is, as of version 1.1.0, provided in a separate crate `medians`. +Note that fast implementations of 1d 'classic' medians are, as of version 1.1.0, provided in a separate crate `medians`. ## Trait Vecg @@ -326,7 +333,9 @@ Methods which take an additional generic vector argument, such as a vector of we ## Appendix: Recent Releases -* **Version 2.0.12** - added `depth_ratio`. +* **Version 2.1.0** - Changed the type of `mid` argument to covariance methods from U -> f64, making the normal expectation for the type of precise geometric medians explicit. Accordingly, moved `covar` and `serial_covar` from trait `VecVecg` to `VecVec`. This might potentially require changing some `use` declarations in your code. + +* **Version 2.0.12** - added `depth_ratio` * **Version 2.0.11** - removed not so useful `variances`. Tidied up error processing in `vecvecg.rs`. Added to it `serial_covar` and `serial_wcovar` for when heavy loading of all the cores may not be wanted. diff --git a/src/lib.rs b/src/lib.rs index 8be0c24..589bebc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ pub use medians::{MedError, Median, Medianf64}; /// Convenience From quantification invocation pub fn fromop>(f: &T) -> f64 { - (*f).clone().into() + f.clone().into() } /// tm_statistic in 1d: (value-centre)/dispersion @@ -194,7 +194,7 @@ pub trait Vecg { /// Vector similarity S in the interval `[0,1]: S = (1+cos(theta))/2` fn vsim>(self, v: &[U]) -> f64; /// Median correlation similarity of vectors, range [0,1] - fn vcorrsim(self, v:Self) -> Result; + fn vcorrsim(self, v: Self) -> Result; /// Positive dotp [0,2|a||b|] fn pdotp>(self, v: &[U]) -> f64; /// We define vector dissimilarity D in the interval `[0,1]: D = 1-S = (1-cos(theta))/2` @@ -224,13 +224,13 @@ pub trait Vecg { /// Spearman rho correlation. fn spearmancorr>(self, v: &[U]) -> f64; /// Change to gm that adding point self will cause - fn contribvec_newpt(self, gm: &[f64], recips: f64) -> Result,RE>; + fn contribvec_newpt(self, gm: &[f64], recips: f64) -> Result, RE>; /// Normalized magnitude of change to gm that adding point self will cause - fn contrib_newpt(self, gm: &[f64], recips: f64, nf: f64) -> Result; + fn contrib_newpt(self, gm: &[f64], recips: f64, nf: f64) -> Result; /// Contribution of removing point self - fn contribvec_oldpt(self, gm: &[f64], recips: f64) -> Result,RE>; + fn contribvec_oldpt(self, gm: &[f64], recips: f64) -> Result, RE>; /// Normalized contribution of removing point self (as negative scalar) - fn contrib_oldpt(self, gm: &[f64], recips: f64, nf: f64) -> Result; + fn contrib_oldpt(self, gm: &[f64], recips: f64, nf: f64) -> Result; /// Householder reflect fn house_reflect>(self, x: &[U]) -> Vec; } @@ -278,7 +278,7 @@ pub trait Vecu8 { /// Operations on a whole set of multidimensional vectors. pub trait VecVec { /// Linearly weighted approximate time series derivative at the last point (present time). - fn dvdt(self, centre: &[f64]) -> Result, RE>; + fn dvdt(self, centre: &[f64]) -> Result, RE>; /// Maps a scalar valued closure onto all vectors in self fn scalar_fn(self, f: impl Fn(&[T]) -> Result) -> Result, RE>; /// Maps vector valued closure onto all vectors in self and collects @@ -315,7 +315,7 @@ pub trait VecVec { fn distsums(self) -> Vec; /// Medoid distance, its index, outlier distance, its index fn medout(self, gm: &[f64]) -> Result, RE>; - /// Radius of a point specified by its subscript. + /// Radius of a point specified by its subscript. fn radius(self, i: usize, gm: &[f64]) -> Result; /// Arith mean and std (in Params struct), Median and mad, Medoid and Outlier (in MinMax struct) fn eccinfo(self, gm: &[f64]) -> Result<(Params, Params, MinMax), RE> @@ -329,13 +329,13 @@ pub trait VecVec { fn madgm(self, gm: &[f64]) -> Result; /// stdgm mean of radii from gm: nd data spread estimator fn stdgm(self, gm: &[f64]) -> Result; - /// Outer hull subscripts from their square radii and their sort index. - fn outer_hull(self, sqrads: &[f64], sindex: &[usize]) -> Vec; + /// Outer hull subscripts from their square radii and their sort index. + fn outer_hull(self, sqrads: &[f64], sindex: &[usize]) -> Vec; /// Inner hull subscripts from their square radii and their sort index. - fn inner_hull(self, sqrads: &[f64], sindex: &[usize]) -> Vec; + fn inner_hull(self, sqrads: &[f64], sindex: &[usize]) -> Vec; /// Measure of likelihood of zero median point **p** belonging to zero median data cloud `self`. - fn depth(self, descending_index: &[usize], p: &[f64]) -> Result; - /// The proportion of points outside of the normal plane through **p** + fn depth(self, descending_index: &[usize], p: &[f64]) -> Result; + /// The proportion of points outside of the normal plane through **p** fn depth_ratio(self, descending_index: &[usize], p: &[f64]) -> f64; /// Collects indices of outer and inner hull points, from zero median data fn hulls(self) -> (Vec, Vec); @@ -347,6 +347,10 @@ pub trait VecVec { fn par_gmedian(self, eps: f64) -> Vec; /// Like `gmedian` but returns also the sum of reciprocals of distances fn gmparts(self, eps: f64) -> (Vec, f64); + /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors. + fn covar(self, mid: &[f64]) -> Result; + /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors. + fn serial_covar(self, mid: &[f64]) -> Result; } /// Methods applicable to slice of vectors of generic end type, plus one other argument @@ -371,9 +375,9 @@ pub trait VecVecg { /// weighted 1.0-dotproduct of **v**, with all in self fn wdivs(self, ws: &[U], v: &[f64]) -> Result<(Vec, f64), RE>; /// median of weighted cos deviations from **v** - fn wdivsmed(self, ws: &[U], v: &[f64]) -> Result; + fn wdivsmed(self, ws: &[U], v: &[f64]) -> Result; /// weighted radii to all points in self - fn wradii(self, ws:&[U], gm: &[f64]) -> Result<(Vec,f64),RE>; + fn wradii(self, ws: &[U], gm: &[f64]) -> Result<(Vec, f64), RE>; /// wmadgm median of weighted radii from (weighted) gm: stable nd data spread estimator fn wmadgm(self, ws: &[U], wgm: &[f64]) -> Result; /// Leftmultiply (column) vector v by (rows of) matrix self @@ -393,7 +397,7 @@ pub trait VecVecg { /// Weighted sums of points in each hemisphere fn wsigvec(self, idx: &[usize], ws: &[U]) -> Result, RE>; /// Weighted likelihood of zero median point **p** belonging to zero median data cloud `self`. - fn wdepth(self, descending_index: &[usize], ws:&[U], p: &[f64]) -> Result; + fn wdepth(self, descending_index: &[usize], ws: &[U], p: &[f64]) -> Result; /// Dependencies of vector m on each vector in self fn dependencies(self, m: &[U]) -> Result, RE>; /// Sum of distances from arbitrary point (v) to all the points in self @@ -408,12 +412,8 @@ pub trait VecVecg { fn par_wgmedian(self, ws: &[U], eps: f64) -> Result, RE>; /// Like `wgmedian` but returns also the sum of reciprocals. fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec, f64), RE>; - /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors. - fn serial_covar(self, mid:&[U]) -> Result; - /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors. - fn covar(self, med: &[U]) -> Result; /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors. - fn serial_wcovar(self, ws: &[U], m: &[U]) -> Result; + fn wcovar(self, ws: &[U], mid: &[f64]) -> Result; /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors. - fn wcovar(self, ws: &[U], m: &[f64]) -> Result; + fn serial_wcovar(self, ws: &[U], mid: &[f64]) -> Result; } diff --git a/src/vecvec.rs b/src/vecvec.rs index b0e9379..30b8b42 100644 --- a/src/vecvec.rs +++ b/src/vecvec.rs @@ -549,4 +549,66 @@ where recsum = nextrecsum; } } + + /// Symmetric covariance matrix. Becomes comediance when argument `mid` + /// is the geometric median instead of the centroid. + /// The indexing is always in this order: (row,column) (left to right, top to bottom). + /// The items are flattened into a single vector in this order. + fn covar(self, mid:&[f64]) -> Result { + let d = self[0].len(); // dimension of the vector(s) + if d != mid.len() { + return re_error("data","covar self and mid dimensions mismatch")? }; + let mut covsum = self + .par_iter() + .fold( + || vec![0_f64; (d+1)*d/2], + | mut cov: Vec, p | { + let mut covsub = 0_usize; // subscript into the flattened array cov + let vm = p.vsub(mid); // zero mean vector + vm.iter().enumerate().for_each(|(i,thisc)| + // its products up to and including the diagonal (itself) + vm.iter().take(i+1).for_each(|vmi| { + cov[covsub] += thisc*vmi; + covsub += 1; + })); + cov + } + ) + .reduce( + || vec![0_f64; (d+1)*d/2], + | mut covout: Vec, covin: Vec | { + covout.mutvadd(&covin); + covout + } + ); + // now compute the means and return + let lf = self.len() as f64; + covsum.iter_mut().for_each(|c| *c /= lf); + Ok(TriangMat{ kind:2,data:covsum }) // symmetric, non transposed + } + + /// Symmetric covariance matrix. Becomes comediance when supplied argument `mid` + /// is the geometric median instead of the centroid. + /// Indexing is always in this order: (row,column) (left to right, top to bottom). + fn serial_covar(self, mid:&[f64]) -> Result { + let d = self[0].len(); // dimension of the vector(s) + if d != mid.len() { + return re_error("data","serial_covar self and mid dimensions mismatch")? }; + let mut covsums = vec![0_f64; (d+1)*d/2]; + for p in self { + let mut covsub = 0_usize; // subscript into the flattened array cov + let zp = p.vsub(mid); // zero mean/median vector + zp.iter().enumerate().for_each(|(i,thisc)| + // its products up to and including the diagonal + zp.iter().take(i+1).for_each(|otherc| { + covsums[covsub] += thisc*otherc; + covsub += 1; + }) ) + }; + // now compute the means and return + let lf = self.len() as f64; + for c in covsums.iter_mut() { *c /= lf }; + Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed + } + } diff --git a/src/vecvecg.rs b/src/vecvecg.rs index c18a1a4..90a3d60 100644 --- a/src/vecvecg.rs +++ b/src/vecvecg.rs @@ -376,96 +376,6 @@ impl VecVecg for &[Vec] } } - /// Symmetric covariance matrix. Becomes comediance when supplied argument `mid` - /// is the geometric median instead of the centroid. - /// Indexing is always in this order: (row,column) (left to right, top to bottom). - fn serial_covar(self, mid:&[U]) -> Result { - let d = self[0].len(); // dimension of the vector(s) - if d != mid.len() { - return re_error("data","serial_covar self and mid dimensions mismatch")? }; - let mut covsums = vec![0_f64; (d+1)*d/2]; - for p in self { - let mut covsub = 0_usize; // subscript into the flattened array cov - let zp = p.vsub(mid); // zero mean/median vector - zp.iter().enumerate().for_each(|(i,thisc)| - // its products up to and including the diagonal - zp.iter().take(i+1).for_each(|otherc| { - covsums[covsub] += thisc*otherc; - covsub += 1; - }) ) - }; - // now compute the means and return - let lf = self.len() as f64; - for c in covsums.iter_mut() { *c /= lf }; - Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed - } - - /// Symmetric covariance matrix for weighted vectors. - /// Becomes comediance when supplied argument `mid` - /// is the geometric median instead of the centroid. - /// Indexing is always in this order: (row,column) (left to right, top to bottom). - fn serial_wcovar(self, ws:&[U], mid:&[U]) -> Result { - let d = self[0].len(); // dimension of the vector(s) - if d != mid.len() { - return re_error("data","serial_wcovar self and mid dimensions mismatch")? }; - if self.len() != ws.len() { - return re_error("data","serial_wcovar self and ws lengths mismatch")? }; - let mut covsums = vec![0_f64; (d+1)*d/2]; - let mut wsum = 0_f64; - for (p,w) in self.iter().zip(ws) { - let mut covsub = 0_usize; // subscript into the flattened array cov - let zp = p.vsub(mid); // zero mean vector - let wf:f64 = w.clone().into(); - wsum += wf; - zp.iter().enumerate().for_each(|(i,thisc)| - // its products up to and including the diagonal - zp.iter().take(i+1).for_each(|otherc| { - covsums[covsub] += wf*thisc*otherc; - covsub += 1; - }) ) - }; - // now compute the means and return - for c in covsums.iter_mut() { *c /= wsum }; - Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed - } - - /// Symmetric covariance matrix. Becomes comediance when argument `mid` - /// is the geometric median instead of the centroid. - /// The indexing is always in this order: (row,column) (left to right, top to bottom). - /// The items are flattened into a single vector in this order. - fn covar(self, mid:&[U]) -> Result { - let d = self[0].len(); // dimension of the vector(s) - if d != mid.len() { - return re_error("data","covar self and mid dimensions mismatch")? }; - let mut covsum = self - .par_iter() - .fold( - || vec![0_f64; (d+1)*d/2], - | mut cov: Vec, p | { - let mut covsub = 0_usize; // subscript into the flattened array cov - let vm = p.vsub(mid); // zero mean vector - vm.iter().enumerate().for_each(|(i,thisc)| - // its products up to and including the diagonal (itself) - vm.iter().take(i+1).for_each(|vmi| { - cov[covsub] += thisc*vmi; - covsub += 1; - })); - cov - } - ) - .reduce( - || vec![0_f64; (d+1)*d/2], - | mut covout: Vec, covin: Vec | { - covout.mutvadd(&covin); - covout - } - ); - // now compute the means and return - let lf = self.len() as f64; - covsum.iter_mut().for_each(|c| *c /= lf); - Ok(TriangMat{ kind:2,data:covsum }) // symmetric, non transposed - } - /// Weighted covariance matrix for f64 vectors in self. Becomes comediance when /// argument m is the geometric median instead of the centroid. /// Since the matrix is symmetric, the missing upper triangular part can be trivially @@ -473,9 +383,9 @@ impl VecVecg for &[Vec] /// The indexing is always in this order: (row,column) (left to right, top to bottom). /// The items are flattened into a single vector in this order. /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`. - fn wcovar(self, ws:&[U], m:&[f64]) -> Result { + fn wcovar(self, ws:&[U], mid:&[f64]) -> Result { let n = self[0].len(); // dimension of the vector(s) - if n != m.len() { + if n != mid.len() { return re_error("data","wcovar self and m dimensions mismatch")? }; if self.len() != ws.len() { return re_error("data","wcovar self and ws lengths mismatch")? }; @@ -485,7 +395,7 @@ impl VecVecg for &[Vec] || (vec![0_f64; (n+1)*n/2], 0_f64), | mut pair: (Vec, f64), (p,w) | { let mut covsub = 0_usize; // subscript into the flattened array cov - let vm = p.vsub(m); // zero mean vector + let vm = p.vsub(mid); // zero mean vector let wf = w.clone().into(); // f64 weight for this point vm.iter().enumerate().for_each(|(i,component)| // its products up to and including the diagonal @@ -509,4 +419,34 @@ impl VecVecg for &[Vec] covsum.iter_mut().for_each(|c| *c /= wsum); Ok(TriangMat{ kind:2,data:covsum }) // symmetric, non transposed } + + /// Symmetric covariance matrix for weighted vectors. + /// Becomes comediance when supplied argument `mid` + /// is the geometric median instead of the centroid. + /// Indexing is always in this order: (row,column) (left to right, top to bottom). + fn serial_wcovar(self, ws:&[U], mid:&[f64]) -> Result { + let d = self[0].len(); // dimension of the vector(s) + if d != mid.len() { + return re_error("data","serial_wcovar self and mid dimensions mismatch")? }; + if self.len() != ws.len() { + return re_error("data","serial_wcovar self and ws lengths mismatch")? }; + let mut covsums = vec![0_f64; (d+1)*d/2]; + let mut wsum = 0_f64; + for (p,w) in self.iter().zip(ws) { + let mut covsub = 0_usize; // subscript into the flattened array cov + let zp = p.vsub(mid); // zero mean vector + let wf:f64 = w.clone().into(); + wsum += wf; + zp.iter().enumerate().for_each(|(i,thisc)| + // its products up to and including the diagonal + zp.iter().take(i+1).for_each(|otherc| { + covsums[covsub] += wf*thisc*otherc; + covsub += 1; + }) ) + }; + // now compute the means and return + for c in covsums.iter_mut() { *c /= wsum }; + Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed + } + } diff --git a/tests/tests.rs b/tests/tests.rs index 0db0ca3..554006a 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -149,7 +149,7 @@ fn ustats() -> Result<(), RE> { fn intstats() -> Result<(), RE> { let v = vec![1_i64, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; println!("\n{}", (&v).gr()); - let v1: Vec = v.iter().map(|i| *i as f64).collect(); // downcast to f64 here + let v1: Vec = v.iter().map(|&f| f as f64).collect(); // downcast to f64 here println!("Linear transform:\n{}", v1.lintrans()?.gr()); println!("Arithmetic\t{}", v1.ameanstd()?); println!("Median\t\t{}",v1.medmad()?); @@ -348,7 +348,7 @@ fn vecvec() -> Result<(), RE> { println!( "\nMedoid, outlier and radii summary:\n{eccecc}\nRadii centroid {eccstd}\nRadii median {eccmed}"); - let radsindex = pts.radii(&median)?.hashsort_indexed(|x| *x); + let radsindex = pts.radii(&median)?.hashsort_indexed(|&x| x); println!( "Radii ratio: {GR}{}{UN}", pts.radius(radsindex[0], &median)? / pts.radius(radsindex[radsindex.len() - 1], &median)? @@ -364,7 +364,7 @@ fn vecvec() -> Result<(), RE> { println!("Outlier's radius: {}", outlier.vdist(&median).gr()); println!("Outlier to Medoid: {}", outlier.vdist(medoid).gr()); - let seccs = pts.radii(&median)?.sorth(|f| *f, true); + let seccs = pts.radii(&median)?.sorth(|&f| f, true); // println!("\nSorted eccs: {}\n", seccs)); let lqcnt = seccs.binsearch(&(eccmed.centre - eccmed.spread)); println!(