From 093574c86908e668498ac7b58f69885e5f14e415 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 12:35:03 -0500 Subject: [PATCH 1/7] BUG: Fix biased cov estimation --- doc/changes/latest.inc | 2 + mne/cov.py | 20 +-- mne/fixes.py | 270 ----------------------------------------- mne/tests/test_cov.py | 59 ++++++--- mne/utils/_testing.py | 5 +- 5 files changed, 59 insertions(+), 297 deletions(-) diff --git a/doc/changes/latest.inc b/doc/changes/latest.inc index b1356a37098..c1d5f0f6f82 100644 --- a/doc/changes/latest.inc +++ b/doc/changes/latest.inc @@ -145,6 +145,8 @@ Bug - Fix bug in ``method='eLORETA'`` for :func:`mne.minimum_norm.apply_inverse` (and variants) to allow restricting source estimation to a label by `Luke Bloy`_ +- Fix bug in :func:`mne.compute_covariance` and :func:`mne.compute_covariance_raw` where biased normalization (based on degrees of freedom) was used and ``cov.nfree`` was not set properly by `Eric Larson`_ + - Fix :func:`mne.VectorSourceEstimate.normal` to account for cortical patch statistics using ``use_cps=True`` by `Eric Larson`_ - Fix ``pick_ori='normal'`` for :func:`mne.minimum_norm.apply_inverse` when the inverse was computed with ``loose=1.`` and the forward solution was not in surface orientation, by `Eric Larson`_ diff --git a/mne/cov.py b/mne/cov.py index d05f884b302..9ce67ba0d05 100644 --- a/mne/cov.py +++ b/mne/cov.py @@ -39,7 +39,7 @@ _check_option, eigh) from . import viz -from .fixes import BaseEstimator, EmpiricalCovariance, _logdet +from .fixes import BaseEstimator, _logdet def _check_covs_algebra(cov1, cov2): @@ -463,9 +463,10 @@ def compute_raw_covariance(raw, tmin=0, tmax=None, tstep=0.2, reject=None, (instead of across epochs) for each channel. """ tmin = 0. if tmin is None else float(tmin) - tmax = raw.times[-1] if tmax is None else float(tmax) + dt = 1. / raw.info['sfreq'] + tmax = raw.times[-1] + dt if tmax is None else float(tmax) tstep = tmax - tmin if tstep is None else float(tstep) - tstep_m1 = tstep - 1. / raw.info['sfreq'] # inclusive! + tstep_m1 = tstep - dt # inclusive! events = make_fixed_length_events(raw, 1, tmin, tmax, tstep) logger.info('Using up to %s segment%s' % (len(events), _pl(events))) @@ -504,7 +505,7 @@ def compute_raw_covariance(raw, tmin=0, tmax=None, tstep=0.2, reject=None, ch_names = [raw.info['ch_names'][k] for k in picks] bads = [b for b in raw.info['bads'] if b in ch_names] return Covariance(data, ch_names, bads, raw.info['projs'], - nfree=n_samples) + nfree=n_samples - 1) del picks, pick_mask # This makes it equivalent to what we used to do (and do above for @@ -878,7 +879,7 @@ def _unpack_epochs(epochs): if keep_sample_mean is False: cov = cov_data['empirical']['data'] # undo scaling - cov *= n_samples_tot + cov *= (n_samples_tot - 1) # ... apply pre-computed class-wise normalization for mean_cov in data_mean: cov -= mean_cov @@ -887,7 +888,7 @@ def _unpack_epochs(epochs): covs = list() for this_method, data in cov_data.items(): cov = Covariance(data.pop('data'), ch_names, info['bads'], projs, - nfree=n_samples_tot) + nfree=n_samples_tot - 1) # add extra info cov.update(method=this_method, **data) @@ -944,6 +945,7 @@ def _compute_covariance_auto(data, method, info, method_params, cv, scalings, n_jobs, stop_early, picks_list, rank): """Compute covariance auto mode.""" # rescale to improve numerical stability + from sklearn.covariance import EmpiricalCovariance orig_rank = rank rank = compute_rank(RawArray(data.T, info, copy=None, verbose=False), rank, scalings, info) @@ -1073,6 +1075,8 @@ def _compute_covariance_auto(data, method, info, method_params, cv, loglik = None # project back cov = np.dot(eigvec.T, np.dot(cov, eigvec)) + # undo bias + cov *= data.shape[0] / (data.shape[0] - 1) # undo scaling _undo_scaling_cov(cov, picks_list, scalings) method_ = method[ei] @@ -1198,6 +1202,7 @@ def fit(self, X): self.covariance_ = self.estimator_.fit(X).covariance_ self.covariance_ = 0.5 * (self.covariance_ + self.covariance_.T) + self.covariance_ *= X.shape[0] / (X.shape[0] - 1) cov_ = Covariance( data=self.covariance_, names=self.info['ch_names'], bads=self.info['bads'], projs=self.info['projs'], @@ -1277,10 +1282,11 @@ def fit(self, X): def score(self, X_test, y=None): """Delegate to modified EmpiricalCovariance instance.""" - from sklearn.covariance import empirical_covariance, log_likelihood # compute empirical covariance of the test set + from sklearn.covariance import empirical_covariance, log_likelihood test_cov = empirical_covariance(X_test - self.estimator_.location_, assume_centered=True) + test_cov *= X_test.shape[0] / (X_test.shape[0] - 1) if np.any(self.zero_cross_cov_): test_cov[self.zero_cross_cov_] = 0. res = log_likelihood(test_cov, self.estimator_.get_precision()) diff --git a/mne/fixes.py b/mne/fixes.py index 436e35447d6..daaeac766be 100644 --- a/mne/fixes.py +++ b/mne/fixes.py @@ -687,276 +687,6 @@ def _check_fit_params(X, fit_params, indices=None): return fit_params_validated -############################################################################### -# Copied from sklearn to simplify code paths - -def empirical_covariance(X, assume_centered=False): - """Computes the Maximum likelihood covariance estimator - - - Parameters - ---------- - X : ndarray, shape (n_samples, n_features) - Data from which to compute the covariance estimate - - assume_centered : Boolean - If True, data are not centered before computation. - Useful when working with data whose mean is almost, but not exactly - zero. - If False, data are centered before computation. - - Returns - ------- - covariance : 2D ndarray, shape (n_features, n_features) - Empirical covariance (Maximum Likelihood Estimator). - - """ - X = np.asarray(X) - if X.ndim == 1: - X = np.reshape(X, (1, -1)) - - if X.shape[0] == 1: - warnings.warn("Only one sample available. " - "You may want to reshape your data array") - - if assume_centered: - covariance = np.dot(X.T, X) / X.shape[0] - else: - covariance = np.cov(X.T, bias=1) - - if covariance.ndim == 0: - covariance = np.array([[covariance]]) - return covariance - - -class EmpiricalCovariance(BaseEstimator): - """Maximum likelihood covariance estimator - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - store_precision : bool - Specifies if the estimated precision is stored. - - assume_centered : bool - If True, data are not centered before computation. - Useful when working with data whose mean is almost, but not exactly - zero. - If False (default), data are centered before computation. - - Attributes - ---------- - covariance_ : 2D ndarray, shape (n_features, n_features) - Estimated covariance matrix - - precision_ : 2D ndarray, shape (n_features, n_features) - Estimated pseudo-inverse matrix. - (stored only if store_precision is True) - - """ - def __init__(self, store_precision=True, assume_centered=False): - self.store_precision = store_precision - self.assume_centered = assume_centered - - def _set_covariance(self, covariance): - """Saves the covariance and precision estimates - - Storage is done accordingly to `self.store_precision`. - Precision stored only if invertible. - - Parameters - ---------- - covariance : 2D ndarray, shape (n_features, n_features) - Estimated covariance matrix to be stored, and from which precision - is computed. - - """ - # covariance = check_array(covariance) - # set covariance - self.covariance_ = covariance - # set precision - if self.store_precision: - self.precision_ = linalg.pinvh(covariance) - else: - self.precision_ = None - - def get_precision(self): - """Getter for the precision matrix. - - Returns - ------- - precision_ : array-like, - The precision matrix associated to the current covariance object. - - """ - if self.store_precision: - precision = self.precision_ - else: - precision = linalg.pinvh(self.covariance_) - return precision - - def fit(self, X, y=None): - """Fits the Maximum Likelihood Estimator covariance model - according to the given training data and parameters. - - Parameters - ---------- - X : array-like, shape = [n_samples, n_features] - Training data, where n_samples is the number of samples and - n_features is the number of features. - - y : not used, present for API consistence purpose. - - Returns - ------- - self : object - Returns self. - - """ - # X = check_array(X) - if self.assume_centered: - self.location_ = np.zeros(X.shape[1]) - else: - self.location_ = X.mean(0) - covariance = empirical_covariance( - X, assume_centered=self.assume_centered) - self._set_covariance(covariance) - - return self - - def score(self, X_test, y=None): - """Computes the log-likelihood of a Gaussian data set with - `self.covariance_` as an estimator of its covariance matrix. - - Parameters - ---------- - X_test : array-like, shape = [n_samples, n_features] - Test data of which we compute the likelihood, where n_samples is - the number of samples and n_features is the number of features. - X_test is assumed to be drawn from the same distribution than - the data used in fit (including centering). - - y : not used, present for API consistence purpose. - - Returns - ------- - res : float - The likelihood of the data set with `self.covariance_` as an - estimator of its covariance matrix. - - """ - # compute empirical covariance of the test set - test_cov = empirical_covariance( - X_test - self.location_, assume_centered=True) - # compute log likelihood - res = log_likelihood(test_cov, self.get_precision()) - - return res - - def error_norm(self, comp_cov, norm='frobenius', scaling=True, - squared=True): - """Computes the Mean Squared Error between two covariance estimators. - (In the sense of the Frobenius norm). - - Parameters - ---------- - comp_cov : array-like, shape = [n_features, n_features] - The covariance to compare with. - - norm : str - The type of norm used to compute the error. Available error types: - - 'frobenius' (default): sqrt(tr(A^t.A)) - - 'spectral': sqrt(max(eigenvalues(A^t.A)) - where A is the error ``(comp_cov - self.covariance_)``. - - scaling : bool - If True (default), the squared error norm is divided by n_features. - If False, the squared error norm is not rescaled. - - squared : bool - Whether to compute the squared error norm or the error norm. - If True (default), the squared error norm is returned. - If False, the error norm is returned. - - Returns - ------- - The Mean Squared Error (in the sense of the Frobenius norm) between - `self` and `comp_cov` covariance estimators. - - """ - # compute the error - error = comp_cov - self.covariance_ - # compute the error norm - if norm == "frobenius": - squared_norm = np.sum(error ** 2) - elif norm == "spectral": - squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error))) - else: - raise NotImplementedError( - "Only spectral and frobenius norms are implemented") - # optionally scale the error norm - if scaling: - squared_norm = squared_norm / error.shape[0] - # finally get either the squared norm or the norm - if squared: - result = squared_norm - else: - result = np.sqrt(squared_norm) - - return result - - def mahalanobis(self, observations): - """Computes the squared Mahalanobis distances of given observations. - - Parameters - ---------- - observations : array-like, shape = [n_observations, n_features] - The observations, the Mahalanobis distances of the which we - compute. Observations are assumed to be drawn from the same - distribution than the data used in fit. - - Returns - ------- - mahalanobis_distance : array, shape = [n_observations,] - Squared Mahalanobis distances of the observations. - - """ - precision = self.get_precision() - # compute mahalanobis distances - centered_obs = observations - self.location_ - mahalanobis_dist = np.sum( - np.dot(centered_obs, precision) * centered_obs, 1) - - return mahalanobis_dist - - -def log_likelihood(emp_cov, precision): - """Computes the sample mean of the log_likelihood under a covariance model - - computes the empirical expected log-likelihood (accounting for the - normalization terms and scaling), allowing for universal comparison (beyond - this software package) - - Parameters - ---------- - emp_cov : 2D ndarray (n_features, n_features) - Maximum Likelihood Estimator of covariance - - precision : 2D ndarray (n_features, n_features) - The precision matrix of the covariance model to be tested - - Returns - ------- - sample mean of the log-likelihood - """ - p = precision.shape[0] - log_likelihood_ = - np.sum(emp_cov * precision) + _logdet(precision) - log_likelihood_ -= p * np.log(2 * np.pi) - log_likelihood_ /= 2. - return log_likelihood_ - - # sklearn uses np.linalg for this, but ours is more robust to zero eigenvalues def _logdet(A): diff --git a/mne/tests/test_cov.py b/mne/tests/test_cov.py index 229e7e27933..dcf585ccfd6 100644 --- a/mne/tests/test_cov.py +++ b/mne/tests/test_cov.py @@ -235,27 +235,43 @@ def test_io_cov(tmpdir): read_cov(cov_badname) -@pytest.mark.parametrize('method', (None, ['empirical'])) +@pytest.mark.parametrize('method', (None, 'empirical', 'shrunk')) def test_cov_estimation_on_raw(method, tmpdir): """Test estimation from raw (typically empty room).""" raw = read_raw_fif(raw_fname, preload=True) cov_mne = read_cov(erm_cov_fname) + method_params = dict(shrunk=dict(shrinkage=[0])) # The pure-string uses the more efficient numpy-based method, the # the list gets triaged to compute_covariance (should be equivalent # but use more memory) with pytest.warns(None): # can warn about EEG ref - cov = compute_raw_covariance(raw, tstep=None, method=method, - rank='full') + cov = compute_raw_covariance( + raw, tstep=None, method=method, rank='full', + method_params=method_params) assert_equal(cov.ch_names, cov_mne.ch_names) assert_equal(cov.nfree, cov_mne.nfree) - assert_snr(cov.data, cov_mne.data, 1e4) + assert_snr(cov.data, cov_mne.data, 1e6) + + # test equivalence with np.cov + cov_np = np.cov(raw.copy().pick_channels(cov['names']).get_data(), ddof=1) + if method != 'shrunk': # can check all + off_diag = np.triu_indices(cov_np.shape[0]) + else: + # We explicitly zero out off-diag entries between channel types, + # so let's just check MEG off-diag entries + off_diag = np.triu_indices(len(pick_types(raw.info, exclude=()))) + for other in (cov_mne, cov): + assert_allclose(np.diag(cov_np), np.diag(other.data), rtol=5e-6) + assert_allclose(cov_np[off_diag], other.data[off_diag], rtol=4e-3) + assert_snr(cov.data, other.data, 1e6) # tstep=0.2 (default) with pytest.warns(None): # can warn about EEG ref - cov = compute_raw_covariance(raw, method=method, rank='full') - assert_equal(cov.nfree, cov_mne.nfree - 119) # cutoff some samples - assert_snr(cov.data, cov_mne.data, 1e2) + cov = compute_raw_covariance(raw, method=method, rank='full', + method_params=method_params) + assert_equal(cov.nfree, cov_mne.nfree - 120) # cutoff some samples + assert_snr(cov.data, cov_mne.data, 170) # test IO when computation done in Python cov.save(tmpdir.join('test-cov.fif')) # test saving @@ -268,23 +284,26 @@ def test_cov_estimation_on_raw(method, tmpdir): raw_pick = raw.copy().pick_channels(raw.ch_names[:5]) raw_pick.info.normalize_proj() cov = compute_raw_covariance(raw_pick, tstep=None, method=method, - rank='full') + rank='full', method_params=method_params) assert cov_mne.ch_names[:5] == cov.ch_names - assert_snr(cov.data, cov_mne.data[:5, :5], 1e4) - cov = compute_raw_covariance(raw_pick, method=method, rank='full') + assert_snr(cov.data, cov_mne.data[:5, :5], 5e6) + cov = compute_raw_covariance(raw_pick, method=method, rank='full', + method_params=method_params) assert_snr(cov.data, cov_mne.data[:5, :5], 90) # cutoff samps # make sure we get a warning with too short a segment raw_2 = read_raw_fif(raw_fname).crop(0, 1) with pytest.warns(RuntimeWarning, match='Too few samples'): - cov = compute_raw_covariance(raw_2, method=method) + cov = compute_raw_covariance(raw_2, method=method, + method_params=method_params) # no epochs found due to rejection pytest.raises(ValueError, compute_raw_covariance, raw, tstep=None, method='empirical', reject=dict(eog=200e-6)) # but this should work - cov = compute_raw_covariance(raw.copy().crop(0, 10.), - tstep=None, method=method, - reject=dict(eog=1000e-6), - verbose='error') + with pytest.warns(None): # sklearn + cov = compute_raw_covariance( + raw.copy().crop(0, 10.), tstep=None, method=method, + reject=dict(eog=1000e-6), method_params=method_params, + verbose='error') @pytest.mark.slowtest @@ -328,7 +347,10 @@ def test_cov_estimation_with_triggers(rank, tmpdir): reject=reject, preload=True) cov = compute_covariance(epochs, keep_sample_mean=True) - _assert_cov(cov, read_cov(cov_km_fname)) + cov_km = read_cov(cov_km_fname) + # adjust for nfree bug + cov_km['nfree'] -= 1 + _assert_cov(cov, cov_km) # Test with tmin and tmax (different but not too much) cov_tmin_tmax = compute_covariance(epochs, tmin=-0.19, tmax=-0.01) @@ -347,6 +369,7 @@ def test_cov_estimation_with_triggers(rank, tmpdir): # cov with keep_sample_mean=False using a list of epochs cov = compute_covariance(epochs, keep_sample_mean=False) + assert cov_km.nfree == cov.nfree _assert_cov(cov, read_cov(cov_fname), nfree=False) method_params = {'empirical': {'assume_centered': False}} @@ -530,8 +553,8 @@ def test_compute_covariance_auto_reg(rank): cov_b['data'][diag_mask]) # but the rest is the same - assert_array_equal(cov_a['data'][off_diag_mask], - cov_b['data'][off_diag_mask]) + assert_allclose(cov_a['data'][off_diag_mask], + cov_b['data'][off_diag_mask], rtol=1e-12) else: # and here we have shrinkage everywhere. diff --git a/mne/utils/_testing.py b/mne/utils/_testing.py index 5a0d803a286..f15c151a07e 100644 --- a/mne/utils/_testing.py +++ b/mne/utils/_testing.py @@ -452,8 +452,9 @@ def assert_meg_snr(actual, desired, min_tol, med_tol=500., chpi_med_tol=500., def assert_snr(actual, desired, tol): """Assert actual and desired arrays are within some SNR tolerance.""" - snr = (linalg.norm(desired, ord='fro') / - linalg.norm(desired - actual, ord='fro')) + with np.errstate(divide='ignore'): # allow infinite + snr = (linalg.norm(desired, ord='fro') / + linalg.norm(desired - actual, ord='fro')) assert snr >= tol, '%f < %f' % (snr, tol) From 8c3a55fdd8bf971cc11785af8539c99523595f43 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 14:55:39 -0500 Subject: [PATCH 2/7] FIX: Minor fix --- mne/cov.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mne/cov.py b/mne/cov.py index 9ce67ba0d05..c4348e96dcc 100644 --- a/mne/cov.py +++ b/mne/cov.py @@ -1202,7 +1202,6 @@ def fit(self, X): self.covariance_ = self.estimator_.fit(X).covariance_ self.covariance_ = 0.5 * (self.covariance_ + self.covariance_.T) - self.covariance_ *= X.shape[0] / (X.shape[0] - 1) cov_ = Covariance( data=self.covariance_, names=self.info['ch_names'], bads=self.info['bads'], projs=self.info['projs'], @@ -1282,11 +1281,10 @@ def fit(self, X): def score(self, X_test, y=None): """Delegate to modified EmpiricalCovariance instance.""" - # compute empirical covariance of the test set from sklearn.covariance import empirical_covariance, log_likelihood + # compute empirical covariance of the test set test_cov = empirical_covariance(X_test - self.estimator_.location_, assume_centered=True) - test_cov *= X_test.shape[0] / (X_test.shape[0] - 1) if np.any(self.zero_cross_cov_): test_cov[self.zero_cross_cov_] = 0. res = log_likelihood(test_cov, self.estimator_.get_precision()) From 7d77becca5e80e5d760874496adcc0dbd84293aa Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 15:19:42 -0500 Subject: [PATCH 3/7] FIX: Link --- doc/changes/latest.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changes/latest.inc b/doc/changes/latest.inc index c1d5f0f6f82..3a30c27f4be 100644 --- a/doc/changes/latest.inc +++ b/doc/changes/latest.inc @@ -145,7 +145,7 @@ Bug - Fix bug in ``method='eLORETA'`` for :func:`mne.minimum_norm.apply_inverse` (and variants) to allow restricting source estimation to a label by `Luke Bloy`_ -- Fix bug in :func:`mne.compute_covariance` and :func:`mne.compute_covariance_raw` where biased normalization (based on degrees of freedom) was used and ``cov.nfree`` was not set properly by `Eric Larson`_ +- Fix bug in :func:`mne.compute_covariance` and :func:`mne.compute_raw_covariance` where biased normalization (based on degrees of freedom) was used and ``cov.nfree`` was not set properly by `Eric Larson`_ - Fix :func:`mne.VectorSourceEstimate.normal` to account for cortical patch statistics using ``use_cps=True`` by `Eric Larson`_ From 69284ab2025d8dd659688be3f262e905f4599ffd Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 16:06:00 -0500 Subject: [PATCH 4/7] FIX: Dont require sklearn --- mne/cov.py | 7 +- mne/fixes.py | 270 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+), 5 deletions(-) diff --git a/mne/cov.py b/mne/cov.py index c4348e96dcc..c3e5c2a5d71 100644 --- a/mne/cov.py +++ b/mne/cov.py @@ -39,7 +39,8 @@ _check_option, eigh) from . import viz -from .fixes import BaseEstimator, _logdet +from .fixes import (BaseEstimator, EmpiricalCovariance, _logdet, + empirical_covariance, log_likelihood) def _check_covs_algebra(cov1, cov2): @@ -945,7 +946,6 @@ def _compute_covariance_auto(data, method, info, method_params, cv, scalings, n_jobs, stop_early, picks_list, rank): """Compute covariance auto mode.""" # rescale to improve numerical stability - from sklearn.covariance import EmpiricalCovariance orig_rank = rank rank = compute_rank(RawArray(data.T, info, copy=None, verbose=False), rank, scalings, info) @@ -1195,7 +1195,6 @@ def __init__(self, info, grad=0.1, mag=0.1, eeg=0.1, seeg=0.1, ecog=0.1, def fit(self, X): """Fit covariance model with classical diagonal regularization.""" - from sklearn.covariance import EmpiricalCovariance self.estimator_ = EmpiricalCovariance( store_precision=self.store_precision, assume_centered=self.assume_centered) @@ -1236,7 +1235,6 @@ def __init__(self, store_precision, assume_centered, def fit(self, X): """Fit covariance model with oracle shrinkage regularization.""" from sklearn.covariance import shrunk_covariance - from sklearn.covariance import EmpiricalCovariance self.estimator_ = EmpiricalCovariance( store_precision=self.store_precision, assume_centered=self.assume_centered) @@ -1281,7 +1279,6 @@ def fit(self, X): def score(self, X_test, y=None): """Delegate to modified EmpiricalCovariance instance.""" - from sklearn.covariance import empirical_covariance, log_likelihood # compute empirical covariance of the test set test_cov = empirical_covariance(X_test - self.estimator_.location_, assume_centered=True) diff --git a/mne/fixes.py b/mne/fixes.py index daaeac766be..436e35447d6 100644 --- a/mne/fixes.py +++ b/mne/fixes.py @@ -687,6 +687,276 @@ def _check_fit_params(X, fit_params, indices=None): return fit_params_validated +############################################################################### +# Copied from sklearn to simplify code paths + +def empirical_covariance(X, assume_centered=False): + """Computes the Maximum likelihood covariance estimator + + + Parameters + ---------- + X : ndarray, shape (n_samples, n_features) + Data from which to compute the covariance estimate + + assume_centered : Boolean + If True, data are not centered before computation. + Useful when working with data whose mean is almost, but not exactly + zero. + If False, data are centered before computation. + + Returns + ------- + covariance : 2D ndarray, shape (n_features, n_features) + Empirical covariance (Maximum Likelihood Estimator). + + """ + X = np.asarray(X) + if X.ndim == 1: + X = np.reshape(X, (1, -1)) + + if X.shape[0] == 1: + warnings.warn("Only one sample available. " + "You may want to reshape your data array") + + if assume_centered: + covariance = np.dot(X.T, X) / X.shape[0] + else: + covariance = np.cov(X.T, bias=1) + + if covariance.ndim == 0: + covariance = np.array([[covariance]]) + return covariance + + +class EmpiricalCovariance(BaseEstimator): + """Maximum likelihood covariance estimator + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + store_precision : bool + Specifies if the estimated precision is stored. + + assume_centered : bool + If True, data are not centered before computation. + Useful when working with data whose mean is almost, but not exactly + zero. + If False (default), data are centered before computation. + + Attributes + ---------- + covariance_ : 2D ndarray, shape (n_features, n_features) + Estimated covariance matrix + + precision_ : 2D ndarray, shape (n_features, n_features) + Estimated pseudo-inverse matrix. + (stored only if store_precision is True) + + """ + def __init__(self, store_precision=True, assume_centered=False): + self.store_precision = store_precision + self.assume_centered = assume_centered + + def _set_covariance(self, covariance): + """Saves the covariance and precision estimates + + Storage is done accordingly to `self.store_precision`. + Precision stored only if invertible. + + Parameters + ---------- + covariance : 2D ndarray, shape (n_features, n_features) + Estimated covariance matrix to be stored, and from which precision + is computed. + + """ + # covariance = check_array(covariance) + # set covariance + self.covariance_ = covariance + # set precision + if self.store_precision: + self.precision_ = linalg.pinvh(covariance) + else: + self.precision_ = None + + def get_precision(self): + """Getter for the precision matrix. + + Returns + ------- + precision_ : array-like, + The precision matrix associated to the current covariance object. + + """ + if self.store_precision: + precision = self.precision_ + else: + precision = linalg.pinvh(self.covariance_) + return precision + + def fit(self, X, y=None): + """Fits the Maximum Likelihood Estimator covariance model + according to the given training data and parameters. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training data, where n_samples is the number of samples and + n_features is the number of features. + + y : not used, present for API consistence purpose. + + Returns + ------- + self : object + Returns self. + + """ + # X = check_array(X) + if self.assume_centered: + self.location_ = np.zeros(X.shape[1]) + else: + self.location_ = X.mean(0) + covariance = empirical_covariance( + X, assume_centered=self.assume_centered) + self._set_covariance(covariance) + + return self + + def score(self, X_test, y=None): + """Computes the log-likelihood of a Gaussian data set with + `self.covariance_` as an estimator of its covariance matrix. + + Parameters + ---------- + X_test : array-like, shape = [n_samples, n_features] + Test data of which we compute the likelihood, where n_samples is + the number of samples and n_features is the number of features. + X_test is assumed to be drawn from the same distribution than + the data used in fit (including centering). + + y : not used, present for API consistence purpose. + + Returns + ------- + res : float + The likelihood of the data set with `self.covariance_` as an + estimator of its covariance matrix. + + """ + # compute empirical covariance of the test set + test_cov = empirical_covariance( + X_test - self.location_, assume_centered=True) + # compute log likelihood + res = log_likelihood(test_cov, self.get_precision()) + + return res + + def error_norm(self, comp_cov, norm='frobenius', scaling=True, + squared=True): + """Computes the Mean Squared Error between two covariance estimators. + (In the sense of the Frobenius norm). + + Parameters + ---------- + comp_cov : array-like, shape = [n_features, n_features] + The covariance to compare with. + + norm : str + The type of norm used to compute the error. Available error types: + - 'frobenius' (default): sqrt(tr(A^t.A)) + - 'spectral': sqrt(max(eigenvalues(A^t.A)) + where A is the error ``(comp_cov - self.covariance_)``. + + scaling : bool + If True (default), the squared error norm is divided by n_features. + If False, the squared error norm is not rescaled. + + squared : bool + Whether to compute the squared error norm or the error norm. + If True (default), the squared error norm is returned. + If False, the error norm is returned. + + Returns + ------- + The Mean Squared Error (in the sense of the Frobenius norm) between + `self` and `comp_cov` covariance estimators. + + """ + # compute the error + error = comp_cov - self.covariance_ + # compute the error norm + if norm == "frobenius": + squared_norm = np.sum(error ** 2) + elif norm == "spectral": + squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error))) + else: + raise NotImplementedError( + "Only spectral and frobenius norms are implemented") + # optionally scale the error norm + if scaling: + squared_norm = squared_norm / error.shape[0] + # finally get either the squared norm or the norm + if squared: + result = squared_norm + else: + result = np.sqrt(squared_norm) + + return result + + def mahalanobis(self, observations): + """Computes the squared Mahalanobis distances of given observations. + + Parameters + ---------- + observations : array-like, shape = [n_observations, n_features] + The observations, the Mahalanobis distances of the which we + compute. Observations are assumed to be drawn from the same + distribution than the data used in fit. + + Returns + ------- + mahalanobis_distance : array, shape = [n_observations,] + Squared Mahalanobis distances of the observations. + + """ + precision = self.get_precision() + # compute mahalanobis distances + centered_obs = observations - self.location_ + mahalanobis_dist = np.sum( + np.dot(centered_obs, precision) * centered_obs, 1) + + return mahalanobis_dist + + +def log_likelihood(emp_cov, precision): + """Computes the sample mean of the log_likelihood under a covariance model + + computes the empirical expected log-likelihood (accounting for the + normalization terms and scaling), allowing for universal comparison (beyond + this software package) + + Parameters + ---------- + emp_cov : 2D ndarray (n_features, n_features) + Maximum Likelihood Estimator of covariance + + precision : 2D ndarray (n_features, n_features) + The precision matrix of the covariance model to be tested + + Returns + ------- + sample mean of the log-likelihood + """ + p = precision.shape[0] + log_likelihood_ = - np.sum(emp_cov * precision) + _logdet(precision) + log_likelihood_ -= p * np.log(2 * np.pi) + log_likelihood_ /= 2. + return log_likelihood_ + + # sklearn uses np.linalg for this, but ours is more robust to zero eigenvalues def _logdet(A): From 052bc801e78cf67333e9f559a9a60e57693fa2a2 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 18:44:46 -0500 Subject: [PATCH 5/7] FIX: sklearn check --- mne/tests/test_cov.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mne/tests/test_cov.py b/mne/tests/test_cov.py index dcf585ccfd6..9c0a5c80b3b 100644 --- a/mne/tests/test_cov.py +++ b/mne/tests/test_cov.py @@ -238,6 +238,11 @@ def test_io_cov(tmpdir): @pytest.mark.parametrize('method', (None, 'empirical', 'shrunk')) def test_cov_estimation_on_raw(method, tmpdir): """Test estimation from raw (typically empty room).""" + if method == 'shrunk': + try: + import sklearn + except Exception as exp: + pytest.skip('sklearn is required, got %s' % (exp,)) raw = read_raw_fif(raw_fname, preload=True) cov_mne = read_cov(erm_cov_fname) method_params = dict(shrunk=dict(shrinkage=[0])) From f00d73cbc7e523b16cafb8b8dc06cc5232b16db3 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 19:14:17 -0500 Subject: [PATCH 6/7] MAINT: Simplify check --- mne/tests/test_cov.py | 14 +++++++------- mne/utils/_testing.py | 10 +--------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/mne/tests/test_cov.py b/mne/tests/test_cov.py index 9c0a5c80b3b..08a493ed725 100644 --- a/mne/tests/test_cov.py +++ b/mne/tests/test_cov.py @@ -29,7 +29,7 @@ from mne.io.pick import _DATA_CH_TYPES_SPLIT from mne.preprocessing import maxwell_filter from mne.rank import _compute_rank_int -from mne.utils import (requires_version, run_tests_if_main, +from mne.utils import (requires_sklearn, run_tests_if_main, catch_logging, assert_snr) base_dir = op.join(op.dirname(__file__), '..', 'io', 'tests', 'data') @@ -312,7 +312,7 @@ def test_cov_estimation_on_raw(method, tmpdir): @pytest.mark.slowtest -@requires_version('sklearn', '0.15') +@requires_sklearn def test_cov_estimation_on_raw_reg(): """Test estimation from raw with regularization.""" raw = read_raw_fif(raw_fname, preload=True) @@ -478,7 +478,7 @@ def test_regularized_covariance(): assert_allclose(data, evoked.data, atol=1e-20) -@requires_version('sklearn', '0.15') +@requires_sklearn def test_auto_low_rank(): """Test probabilistic low rank estimators.""" n_samples, n_features, rank = 400, 10, 5 @@ -520,7 +520,7 @@ def get_data(n_samples, n_features, rank, sigma): @pytest.mark.slowtest @pytest.mark.parametrize('rank', ('full', None, 'info')) -@requires_version('sklearn', '0.15') +@requires_sklearn def test_compute_covariance_auto_reg(rank): """Test automated regularization.""" raw = read_raw_fif(raw_fname, preload=True) @@ -632,7 +632,7 @@ def raw_epochs_events(): return (raw, epochs, events) -@requires_version('sklearn', '0.15') +@requires_sklearn @pytest.mark.parametrize('rank', (None, 'full', 'info')) def test_low_rank_methods(rank, raw_epochs_events): """Test low-rank covariance matrix estimation.""" @@ -667,7 +667,7 @@ def test_low_rank_methods(rank, raw_epochs_events): (rank, method) -@requires_version('sklearn', '0.15') +@requires_sklearn def test_low_rank_cov(raw_epochs_events): """Test additional properties of low rank computations.""" raw, epochs, events = raw_epochs_events @@ -741,7 +741,7 @@ def test_low_rank_cov(raw_epochs_events): @testing.requires_testing_data -@requires_version('sklearn', '0.15') +@requires_sklearn def test_cov_ctf(): """Test basic cov computation on ctf data with/without compensation.""" raw = read_raw_ctf(ctf_fname).crop(0., 2.).load_data() diff --git a/mne/utils/_testing.py b/mne/utils/_testing.py index f15c151a07e..ded12d6b305 100644 --- a/mne/utils/_testing.py +++ b/mne/utils/_testing.py @@ -122,14 +122,6 @@ def requires_module(function, name, call=None): raise ImportError """ -_sklearn_call = """ -required_version = '0.14' -import sklearn -version = LooseVersion(sklearn.__version__) -if version < required_version: - raise ImportError -""" - _mayavi_call = """ with warnings.catch_warnings(record=True): # traits from mayavi import mlab @@ -152,7 +144,7 @@ def requires_module(function, name, call=None): requires_pandas = partial(requires_module, name='pandas', call=_pandas_call) requires_pylsl = partial(requires_module, name='pylsl') -requires_sklearn = partial(requires_module, name='sklearn', call=_sklearn_call) +requires_sklearn = partial(requires_module, name='sklearn') requires_mayavi = partial(requires_module, name='mayavi', call=_mayavi_call) requires_mne = partial(requires_module, name='MNE-C', call=_mne_call) From e88c99062e4bc9e0ecff104d0b6dd5ebebad34ba Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 2 Mar 2020 23:14:33 -0500 Subject: [PATCH 7/7] FIX: Flake --- mne/tests/test_cov.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/tests/test_cov.py b/mne/tests/test_cov.py index 08a493ed725..04fbbe7a5d2 100644 --- a/mne/tests/test_cov.py +++ b/mne/tests/test_cov.py @@ -240,7 +240,7 @@ def test_cov_estimation_on_raw(method, tmpdir): """Test estimation from raw (typically empty room).""" if method == 'shrunk': try: - import sklearn + import sklearn # noqa: F401 except Exception as exp: pytest.skip('sklearn is required, got %s' % (exp,)) raw = read_raw_fif(raw_fname, preload=True)