Added support for a riemannian variant of clean_asr

chkothe · chkothe · commit 07abf9d7ff44 · 2025-05-09T12:01:54.000-07:00
- also added covariance utilities and is_debug function to streamline targeted testing
diff --git a/src/eegprep/clean_asr.py b/src/eegprep/clean_asr.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Dict, Any, Optional, Union, Tuple
+from typing import Dict, Any, Optional, Union, Tuple, Optional
 from copy import deepcopy
 
 import numpy as np
@@ -22,7 +22,7 @@ def clean_asr(
     ref_tolerances: Union[Tuple[float, float], str] = (-3.5, 5.5),
     ref_wndlen: Union[float, str] = 1.0,
     use_gpu: bool = False,
-    useriemannian: bool = False,
+    useriemannian: Optional[str] = None,
     maxmem: Optional[int] = 64
 ) -> Dict[str, Any]:
     """Run the Artifact Subspace Reconstruction (ASR) method on EEG data.
@@ -55,7 +55,9 @@ def clean_asr(
                                     for a channel to be considered 'bad' during calibration data selection. Default: (-3.5, 5.5). Use 'off' to disable.
         ref_wndlen (Union[float, str], optional): Window length in seconds for calibration data selection granularity. Default: 1.0. Use 'off' to disable.
         use_gpu (bool, optional): Whether to try using GPU (requires compatible hardware and libraries, currently ignored). Default: False.
-        useriemannian (bool, optional): Whether to use Riemannian ASR variant (NOT IMPLEMENTED). Default: False.
+        useriemannian (str, optional): Option to use a Riemannian ASR variant. Can be set to 'calib' to use a Riemannian estimate 
+            at calibration time; this make somewhat different statistical tradeoffs than the default, resulting in a somewhat different 
+            baseline rejection threshold; as a result it is suggested to visually check results and adjust the cutoff as needed. Default: None (disabled).
         maxmem (Optional[int], optional): Maximum memory in MB (passed to asr_calibrate/process, but chunking based on it is not implemented in Python port). Default: 64.
 
     Returns:
@@ -66,8 +68,6 @@ def clean_asr(
         ImportError: If automatic calibration data selection is needed (`ref_maxbadchannels` is float) but `clean_windows` cannot be imported.
         ValueError: If input arguments are invalid or calibration fails critically.
     """
-    if useriemannian:
-        raise NotImplementedError("The Riemannian ASR variant is not implemented in this Python port.")
 
     if 'data' not in EEG or 'srate' not in EEG or 'nbchan' not in EEG:
         raise ValueError("EEG dictionary must contain 'data', 'srate', and 'nbchan'.")
@@ -128,14 +128,14 @@ def clean_asr(
     # The Python asr_calibrate uses its own defaults for blocksize, filters, etc.
     # We only pass the core parameters specified in the clean_asr call signature.
     try:
-        state = asr_calibrate(ref_section_data, srate, cutoff=cutoff, maxmem=maxmem)
+        state = asr_calibrate(ref_section_data, srate, cutoff=cutoff, maxmem=maxmem, useriemannian=useriemannian)
     except ValueError as e:
          # Catch specific errors like not enough calibration data
          raise ValueError(f"ASR calibration failed: {e}")
-    except Exception as e:
-         # Catch unexpected errors during calibration
-         logger.exception("An unexpected error occurred during ASR calibration.")
-         raise RuntimeError(f"ASR calibration failed unexpectedly: {e}")
+    # except Exception as e:
+    #      # Catch unexpected errors during calibration
+    #      logger.exception("An unexpected error occurred during ASR calibration.")
+    #      raise RuntimeError(f"ASR calibration failed unexpectedly: {e}")
 
     del ref_section_data # Free memory
 
diff --git a/src/eegprep/utils/asr.py b/src/eegprep/utils/asr.py
@@ -4,15 +4,17 @@
 import scipy.signal
 import scipy.linalg
 
-from .stats import block_geometric_median, fit_eeg_distribution
+from .stats import geometric_median, fit_eeg_distribution
 from .sigproc import moving_average
+from .covariance import cov_mean, cov_shrinkage
+
 
 logger = logging.getLogger(__name__)
 
 
 def asr_calibrate(X, srate, cutoff=None, blocksize=None, B=None, A=None,
                   window_len=None, window_overlap=None, max_dropout_fraction=None,
-                  min_clean_fraction=None, maxmem=None):
+                  min_clean_fraction=None, maxmem=None, useriemannian=None):
     """Calibration function for the Artifact Subspace Reconstruction (ASR) method.
 
     State = asr_calibrate(Data, SamplingRate, Cutoff, BlockSize, FilterB, FilterA, WindowLength, WindowOverlap, MaxDropoutFraction, MinCleanFraction, MaxMemory)
@@ -56,6 +58,10 @@ def asr_calibrate(X, srate, cutoff=None, blocksize=None, B=None, A=None,
       max_dropout_fraction (float, optional): Maximum fraction (0-1) of windows subject to dropouts. Default: 0.1.
       min_clean_fraction (float, optional): Minimum fraction (0-1) of windows that must be clean. Default: 0.25.
       maxmem (int, optional): Maximum memory in MB (for very large data/many channels). Default: 64.
+      useriemannian (str, optional): Option to use a Riemannian ASR variant. Can be set to 'calib' to use a Riemannian estimate 
+            at calibration time; this make somewhat different statistical tradeoffs than the default, resulting in a potentially
+            different baseline rejection threshold; as a result it is suggested to visually check results and adjust 
+            the cutoff as needed. Default: None (disabled).
 
     Returns:
       dict: State dictionary containing calibration results ('M', 'T') and filter parameters ('B', 'A', 'sos', 'iir_state')
@@ -168,22 +174,26 @@ def asr_calibrate(X, srate, cutoff=None, blocksize=None, B=None, A=None,
     # Average the accumulated covariances
     U /= blocksize
 
-    # Reshape for geometric median calculation
-    U_reshaped = U.reshape(C * C, -1).T  # Shape: (num_blocks, C*C)
-
-    # Calculate the geometric median of covariance matrices
-    logger.info("Calculating robust geometric median covariance...")
-    med = block_geometric_median(U_reshaped)
-    
-    # Handle NaN cases (can happen with single observation or degenerate data)
+    # compute a robust average of the covariance matrices
+    med = None    
+    if useriemannian in ('calib', 'all', True):
+        logger.info("Calculating Riemannian geometric median covariance...")
+        U = U.transpose(2, 0, 1)
+        # small amount of shrinkage to prevent singularities
+        U = cov_shrinkage(U, 1e-4, target='scaled-eye')
+        med = cov_mean(U, robust=True)
+    if med is None or np.any(np.isnan(med)):
+        if med is not None:
+            logger.warning("Riemannian geometric median calculation resulted in "
+                           "NaNs. Using standard geometric median as fallback.")
+        logger.info("Calculating robust geometric median covariance...")
+        med = geometric_median(U.reshape(C * C, -1).T)
     if np.any(np.isnan(med)):
-        if U_reshaped.shape[0] == 1:
-            med = np.median(U_reshaped, axis=0)
-        else:
-            logger.warning("Geometric median calculation resulted in NaNs. Using standard median as fallback.")
-            med = np.median(U_reshaped, axis=0)
+        logger.warning("Geometric median calculation resulted in NaNs. "
+                       "Using standard median as fallback.")
+        med = np.median(U, axis=-1)
 
-    # Reshape median back to matrix form
+    # make sure median is reshaped back to matrix form
     M_robust = np.reshape(med, (C, C))
 
     # Get the mixing matrix M (matrix square root of the robust covariance)
@@ -269,6 +279,7 @@ def asr_calibrate(X, srate, cutoff=None, blocksize=None, B=None, A=None,
         'carry': None,          # Initial carry buffer (will be set in process)
         'last_R': None,         # Initial reconstruction matrix (will be set in process)
         'last_trivial': True,   # Initial trivial flag
+        'useriemannian': useriemannian, # Riemannian ASR variant option
     }
     
     return state
diff --git a/src/eegprep/utils/covariance.py b/src/eegprep/utils/covariance.py
@@ -0,0 +1,205 @@
+"""Tools for working with covariance matrices or stacks thereof."""
+
+# Copyright (c) 2015-2025 Syntrogi Inc. dba Intheon.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+import logging
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+__all__ = ['cov_mean', 'cov_logm', 'cov_expm', 'cov_powm', 'cov_sqrtm', 'cov_rsqrtm', 'cov_sqrtm2', 'cov_shrinkage']
+
+
+def diag_nd(M):
+    """Like np.diag, but in case of a ...,N, returns a ...,N,N array of diag matrices."""
+    *dims, N = M.shape
+    if dims:
+        cat = np.concatenate([np.diag(d) for d in M.reshape((-1, N))])
+        return np.reshape(cat, dims + [N, N])
+    else:
+        return np.diag(M)
+
+
+def cov_logm(C):
+    """Calculate the matrix logarithm of a covariance matrix or ...,N,N array."""
+    D, V = np.linalg.eigh(C)
+    return V @ diag_nd(np.log(D)) @ V.swapaxes(-2, -1)
+
+
+def cov_expm(C):
+    """Calculate the matrix exponent of a covariance matrix or ...,N,N array."""
+    D, V = np.linalg.eigh(C)
+    return V @ diag_nd(np.exp(D)) @ V.swapaxes(-2, -1)
+
+
+def cov_powm(C, exp):
+    """Calculate a matrix power of a covariance matrix or ...,N,N array."""
+    D, V = np.linalg.eigh(C)
+    return V @ diag_nd(D**exp) @ V.swapaxes(-2, -1)
+
+
+def cov_sqrtm(C):
+    """Calculate the matrix square root of a covariance matrix or ...,N,N array."""
+    D, V = np.linalg.eigh(C)
+    return V @ diag_nd(np.sqrt(D)) @ V.swapaxes(-2, -1)
+
+
+def cov_rsqrtm(C):
+    """Calculate the matrix reciprocal square root of a covariance matrix or ...,N,N array."""
+    D, V = np.linalg.eigh(C)
+    return V @ diag_nd(1./np.sqrt(D)) @ V.swapaxes(-2, -1)
+
+
+def cov_sqrtm2(C):
+    """Calculate the matrix square root, and its reciprocal, for a covariance matrix or ...,N,N array."""
+    D, V = np.linalg.eigh(C)
+    sqrtD = np.sqrt(D)
+    return V @ diag_nd(sqrtD) @ V.swapaxes(-2, -1), V @ diag_nd(1./sqrtD) @ V.swapaxes(-2, -1)
+
+
+def cov_mean(X, *, weights=None, robust=False, iters=50, tol=1e-5, huber=0,
+             nancheck=False, verbose=False):
+    """Calculate the (weighted) average of a set of covariance matrices on the
+    manifold of SPD matrices, optionally robustly using the geometric median or
+    Huber mean.
+
+    Args:
+        X: a M,N,N array of covariance matrices
+        weights: optionally a vector of sample weights (can be unnormalized)
+        robust: whether to use a robust estimator
+        iters: maximum number of iterations
+        huber: huber threshold (delta parameter); can be set to
+          * None: use regular least-squares solution
+          * 0: use geometric / l1 median
+          * >0: use a Huber mean with the given value as the threshold
+        tol: tolerance for convergence check
+        nancheck: check for NaNs
+        verbose: generate verbose output (will print deviations in huber=None mode)
+
+    Returns:
+        the N,N mean covariance matrix
+    """
+    # This algorithm is based on:
+    #  [1] Ostresh et al., 1978, "On the Convergence of a Class of Iterative Methods for Solving the Weber Location Problem"
+    #  [2] Fletcher et al., 2004, "Principal Geodesic Analysis on Symmetric Spaces: Statistics of Diffusion Tensors"
+    #  [3] Fletcher et al. 2010, "The geometric median on Riemannian manifolds with application to robust atlas estimation"
+    #  [4] Barachant et al., 2014, "Multiclass Brain-Computer Interface Classification by Riemannian Geometry"
+    weights = np.ones(len(X)) if weights is None else np.asarray(weights)
+    scales = weights
+
+    mu = np.sum(X * weights[:, None, None], axis=0)/np.sum(weights)
+    # step size and divergence check threshold
+    step, thresh = 1.0, 1e20
+    for i in range(iters):
+        mu_sqrt, mu_rsqrt = cov_sqrtm2(mu)
+        # linearize around mu (this would be the tangent space, but we omit
+        # the pre/post-multiplied mu_sqrt terms since they cancel in both
+        # the scale calculation and the exponential map)
+        Xt = cov_logm(mu_rsqrt @ X @ mu_rsqrt)
+        # geometric-median correction (downweight each pt by its riemannian
+        # distance from mu, which we calc here after linearization)
+        if robust:
+            # deviations/errors per sample
+            d = np.sqrt(np.sum(np.square(Xt), axis=(-2, -1)))
+            # apply robust scale factor to provided sample weights
+            if huber is None:
+                scales = weights
+                if verbose:
+                    logger.info(f"median deviations: {np.median(d)}")
+            elif huber == 0:
+                scales = weights / d
+            else:
+                w = np.where(d <= huber, 1, huber / d)
+                scales = weights * w
+        # get update Jacobian (np.average takes care of renormalization)
+        J = np.sum(Xt * scales[:, None, None], axis=0)/np.sum(scales)
+        # apply update on manifold
+        mu = mu_sqrt @ cov_expm(step * J) @ mu_sqrt
+        # convergence checks
+        Jnorm = np.sqrt(np.sum(np.square(J)))
+        if Jnorm < tol or step < tol:
+            break
+        h = step * Jnorm
+        if h < thresh:
+            # exponentially decaying learning rate
+            step *= 0.95
+            thresh = h
+        else:
+            # prevent blow-up
+            step /= 2
+        if nancheck and np.any(np.isnan(mu)):
+            raise RuntimeError("NaNs occurred in cov_mean()")
+    return mu
+
+
+def cov_shrinkage(cov, shrinkage=0, *, target='eye'):
+    """Regularize the given covariance matrix or stack of matrices using shrinkage.
+
+    Args:
+        cov: the covariance matrix (N,N) or stack of matrices (...,N,N).
+        shrinkage: degree of shrinkage, between 0 and 1
+        target: target matrix to shrink towards; can be:
+          'eye': the identity matrix (classic shrinkage; good for small values
+            of shrinkage)
+          'scaled-eye': the identity matrix, scaled to the average variance
+            of the data (can be practical when shrinkage degree is large, since
+            otherwise whitening will not have unit variance)
+          'diag': the diagonal of the covariance matrix (diagonal shrinkage)
+
+    Returns:
+        the regularized covariance matrix or stack of matrices.
+    """    
+    if not shrinkage:        
+        return cov  # early exit
+
+    N = cov.shape[-1]
+
+    if target == 'eye':
+        # create a stack of identity matrices matching cov's shape
+        eye_target = np.zeros_like(cov)
+        eye_target[..., range(N), range(N)] = 1
+    elif target == 'scaled-eye':
+        # calculate trace for each matrix in the stack (or single matrix)
+        # trace_cov will have shape cov.shape[:-2] or be scalar if cov is 2D
+        trace_cov = np.trace(cov, axis1=-2, axis2=-1)
+        scale = trace_cov / N
+
+        # create a base stack of identity matrices
+        eye_base = np.zeros_like(cov)
+        eye_base[..., range(N), range(N)] = 1
+        
+        # apply scaling
+        scale_val = scale
+        if cov.ndim > 2:
+            scale_val = scale[..., np.newaxis, np.newaxis]        
+        eye_target = eye_base * scale_val
+    elif target == 'diag':
+        # get the main diagonal of each matrix in the stack
+        main_diagonals = np.diagonal(cov, axis1=-2, axis2=-1)
+        # create a stack of diagonal matrices
+        eye_target = diag_nd(main_diagonals)
+    else:
+        raise ValueError(f'Unsupported shrinkage target: {target}')
+
+    cov_regu = shrinkage * eye_target + (1 - shrinkage) * cov
+    return cov_regu
diff --git a/src/eegprep/utils/spatial.py b/src/eegprep/utils/spatial.py
@@ -4,7 +4,6 @@
 
 
 # Helper function (vectorized version of MATLAB's interpMx)
-# Using a leading underscore as is common for internal helper functions in Python
 def _interpMx(cosEE, order, tol):
     """
     Compute the interpolation matrix for a set of point pairs (vectorized).
diff --git a/src/eegprep/utils/testing.py b/src/eegprep/utils/testing.py
@@ -1,9 +1,11 @@
 """Testing utilities."""
 
-import numpy as np
+import sys
 import unittest
 
-__all__ = ['compare_eeg', 'DebuggableTestCase']
+import numpy as np
+
+__all__ = ['compare_eeg', 'DebuggableTestCase', 'is_debug']
 
 
 # default to True since the round-tripping through file can force data to
@@ -43,3 +45,7 @@ def debugTestCase(cls):
         loader = unittest.defaultTestLoader
         testSuite = loader.loadTestsFromTestCase(cls)
         testSuite.debug()
+
+def is_debug():
+    """Determine whether Python is running in debug mode."""
+    return getattr(sys, 'gettrace', None)() is not None
diff --git a/tests/test_clean_rawdata.py b/tests/test_clean_rawdata.py