From f0d7a56b533a53d3a4308274dcaf5cf9c64ec96e Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 6 May 2024 09:19:14 +0200 Subject: [PATCH 01/16] add fast kci Signed-off-by: Oliver Schacht --- causallearn/utils/cit.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 8119dd72..d842281a 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -5,6 +5,7 @@ from scipy.stats import chi2, norm from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd +from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -13,6 +14,7 @@ mv_fisherz = "mv_fisherz" mc_fisherz = "mc_fisherz" kci = "kci" +fastkci = "fastkci" chisq = "chisq" gsq = "gsq" d_separation = "d_separation" @@ -32,6 +34,8 @@ def CIT(data, method='fisherz', **kwargs): return FisherZ(data, **kwargs) elif method == kci: return KCI(data, **kwargs) + elif method == fastkci: + return FastKCI(data, **kwargs) elif method in [chisq, gsq]: return Chisq_or_Gsq(data, method_name=method, **kwargs) elif method == mv_fisherz: @@ -193,6 +197,28 @@ def __call__(self, X, Y, condition_set=None): self.pvalue_cache[cache_key] = p return p +class FastKCI(CIT_Base): + def __init__(self, data, **kwargs): + super().__init__(data, **kwargs) + kci_ui_kwargs = {k: v for k, v in kwargs.items() if k in + ['kernelX', 'kernelY', 'null_ss', 'approx', 'est_width', 'polyd', 'kwidthx', 'kwidthy']} + kci_ci_kwargs = {k: v for k, v in kwargs.items() if k in + ['K', 'J', 'alpha', 'use_gp']} + self.check_cache_method_consistent( + 'kci', hashlib.md5(json.dumps(kci_ci_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) + self.assert_input_data_is_valid() + self.kci_ui = KCI_UInd(**kci_ui_kwargs) + self.kci_ci = FastKCI_CInd(**kci_ci_kwargs) + + def __call__(self, X, Y, condition_set=None): + # Kernel-based conditional independence test. + Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set) + if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key] + p = self.kci_ui.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \ + self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] + self.pvalue_cache[cache_key] = p + return p + class Chisq_or_Gsq(CIT_Base): def __init__(self, data, method_name, **kwargs): def _unique(column): From b35b9bb876848137d3e1db4bd836a2c8b6a44c66 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 6 May 2024 09:20:46 +0200 Subject: [PATCH 02/16] safe null dist for debug Signed-off-by: Oliver Schacht --- causallearn/utils/KCI/KCI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 41cbd467..681d48d4 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -306,8 +306,8 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): k_appr, theta_appr = self.get_kappa(uu_prod) pvalue = 1 - stats.gamma.cdf(test_stat, k_appr, 0, theta_appr) else: - null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) - pvalue = sum(null_samples > test_stat) / float(self.nullss) + self.null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + pvalue = sum(self.null_samples > test_stat) / float(self.nullss) return pvalue, test_stat def kernel_matrix(self, data_x, data_y, data_z): From 4d1ca1251ba764e8437ba7d37ef82d63104aa03e Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 6 May 2024 09:25:17 +0200 Subject: [PATCH 03/16] add fast kci module Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 480 ++++++++++++++++++++++++++ causallearn/utils/FastKCI/__init__.py | 0 2 files changed, 480 insertions(+) create mode 100644 causallearn/utils/FastKCI/FastKCI.py create mode 100644 causallearn/utils/FastKCI/__init__.py diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py new file mode 100644 index 00000000..d9e2d245 --- /dev/null +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -0,0 +1,480 @@ +from causallearn.utils.KCI.KCI import GaussianKernel, Kernel +import numpy as np +from numpy.linalg import eigh +import scipy.stats as stats +from scipy.special import logsumexp +from sklearn.preprocessing import LabelEncoder +from joblib import Parallel, delayed +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF +import warnings + +class FastKCI_CInd(object): + """ + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + + References + ---------- + [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, + "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + [2] M. Zhang, and S. Williamson, + "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + + """ + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimming_thresh = 1e-3, use_gp=False): + """ + Initialize the FastKCI_CInd object. + + Parameters + ---------- + K: Number of Gaussians that are assumed to be in the mixture. + J: Number of independent repittitions. + alpha: Parameter for the Dirichlet distribution. + epsilon: Penalty for the matrix ridge regression. + eig_threshold: Threshold for Eigenvalues. + use_gp: Whether to use Gaussian Process Regression to determine the kernel widths + """ + self.K = K + self.J = J + self.alpha = alpha + self.epsilon = epsilon + self.eig_thresh = eig_thresh + self.trimming_thresh = trimming_thresh + self.use_gp = use_gp + self.nullss = 5000 + # TODO: Adjust to causal-learn API + + def compute_pvalue(self, data_x=None, data_y=None, data_z=None): + """ + Main function: compute the p value of H_0: X|Y conditional on Z. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + _________ + pvalue: p value (scalar) + test_stat: test statistic (scalar) + """ + self.data = [data_x, data_y] + self.data_z = data_z + self.n = data_x.shape[0] + + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) + self.Z_proposal, self.prob_Z = zip(*Z_proposal) + block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) + test_stat, null_samples, log_likelihood = zip(*block_res) + + log_likelihood = np.array(log_likelihood) + self.prob_Z += log_likelihood + self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used + self.all_null_samples = np.vstack(null_samples) + self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) + self.all_test_stats = np.array(test_stat) + self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) + self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + # experimental, not used + self.pvalue_alt = np.sum(np.average(null_samples, axis = 0, weights = self.prob_Z) > np.average(np.array(test_stat), weights = self.prob_Z)) / float(self.nullss) + self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) + + return self.pvalue, self.test_stat + + def partition_data(self): + """ + Partitions data into K Gaussians following an expectation maximization approach on Z. + + Returns + _________ + Z: Latent Gaussian that was drawn for each observation (nx1 array) + prob_Z: Log-Likelihood of that random assignment (scalar) + """ + Z_mean = self.data_z.mean(axis=0) + Z_sd = self.data_z.std(axis=0) + mu_k = np.random.normal(Z_mean, Z_sd, size = (self.K,self.data_z.shape[1])) + sigma_k = np.eye(self.data_z.shape[1]) + pi_j = np.random.dirichlet([self.alpha]*self.K) + ll = np.tile(np.log(pi_j),(self.n,1)) + for k in range(self.K): + ll[:,k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k,:], cov=sigma_k, allow_singular=True) + Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + le = LabelEncoder() + Z = le.fit_transform(Z) + prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed + return Z, prob_Z + + def pvalue_onblocks(self, Z_proposal): + """ + Calculate p value on given partitions of the data. + + Parameters + ---------- + Z_proposal: partition of the data into K clusters (nxd1 array) + + Returns + _________ + test_stat: test statistic (scalar) + null_samples: bootstrapped sampled of the null distribution (self.nullssx1 array) + log_likelihood: estimated probability P(X,Y|Z,k) + """ + unique_Z_j = np.unique(Z_proposal) + test_stat = 0 + log_likelihood = 0 + null_samples = np.zeros((1,self.nullss)) + for k in unique_Z_j: + K_mask = (Z_proposal==k) + X_k = np.copy(self.data[0][K_mask]) + Y_k = np.copy(self.data[1][K_mask]) + Z_k = np.copy(self.data_z[K_mask]) + if (Z_k.shape[0]<6): # small blocks cause problems in GP, experimental + continue + Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) + KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) + if epsilon_x != epsilon_y: + KyR, Rzy = Kernel.center_kernel_matrix_regression(Ky, Kzy, epsilon_y) + else: + KyR = Rzx.dot(Ky.dot(Rzx)) + test_stat += np.einsum('ij,ji->', KxR, KyR) + uu_prod, size_u = self.get_uuprod(KxR, KyR) + null_samples += self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + log_likelihood += likelihood_x + likelihood_y + return test_stat, null_samples, log_likelihood + + + def kernel_matrix(self, data_x, data_y, data_z): + """ + Calculates the Gaussian Kernel for given data inputs as well as the shared kernel. + + Returns + _________ + K: Kernel matrices (n_kxn_k array) + """ + kernel_obj = GaussianKernel() + kernel_obj.set_width_empirical_kci(data_z) + + data_x = stats.zscore(data_x, ddof=1, axis=0) + data_x[np.isnan(data_x)] = 0. + + data_y = stats.zscore(data_y, ddof=1, axis=0) + data_y[np.isnan(data_y)] = 0. + + data_z = stats.zscore(data_z, ddof=1, axis=0) + data_z[np.isnan(data_z)] = 0. + + data_x = np.concatenate((data_x, 0.5 * data_z), axis=1) + + kernelX = GaussianKernel() + kernelX.set_width_empirical_kci(data_z) + kernelY = GaussianKernel() + kernelY.set_width_empirical_kci(data_z) + + Kx = kernelX.kernel(data_x) + Ky = kernelY.kernel(data_y) + + # centering kernel matrix + Kx = Kernel.center_kernel_matrix(Kx) + Ky = Kernel.center_kernel_matrix(Ky) + + if not self.use_gp: + kernelZ = GaussianKernel() + kernelZ.set_width_empirical_kci(data_z) + Kzx = kernelZ.kernel(data_z) + Kzx = Kernel.center_kernel_matrix(Kzx) + Kzy = Kzx + epsilon_x, epsilon_y = self.epsilon, self.epsilon + gpx = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(X|Z) + gpx.fit(X = data_z, y = data_x) + likelihood_x = gpx.log_marginal_likelihood_value_ + gpy = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(Y|X,Z) + gpy.fit(X = np.c_[data_x,data_z], y=data_y) + likelihood_y = gpy.log_marginal_likelihood_value_ + + else: + n, Dz = data_z.shape + + widthz = np.sqrt(1.0 / (kernelX.width * data_x.shape[1])) + + # Instantiate a Gaussian Process model for x + wx, vx = eigh(Kx) + topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + idx = np.argsort(-wx) + wx = wx[idx] + vx = vx[:, idx] + wx = wx[0:topkx] + vx = vx[:, 0:topkx] + vx = vx[:, np.abs(wx) > np.abs(wx).max() * 1e-10] + wx = wx[np.abs(wx) > np.abs(wx).max() * 1e-10] + vx = 2 * np.sqrt(n) * vx.dot(np.diag(np.sqrt(wx))) / np.sqrt(wx[0]) + kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + gpx = GaussianProcessRegressor(kernel=kernelx) + # fit Gaussian process, including hyperparameter optimization + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + gpx.fit(data_z, vx) + + # construct Gaussian kernels according to learned hyperparameters + Kzx = gpx.kernel_.k1(data_z, data_z) + epsilon_x = np.exp(gpx.kernel_.theta[-1]) + likelihood_x = gpx.log_marginal_likelihood_value_ + + # Instantiate a Gaussian Process model for y + wy, vy = eigh(Ky) + topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + idy = np.argsort(-wy) + wy = wy[idy] + vy = vy[:, idy] + wy = wy[0:topky] + vy = vy[:, 0:topky] + vy = vy[:, np.abs(wy) > np.abs(wy).max() * 1e-10] + wy = wy[np.abs(wy) > np.abs(wy).max() * 1e-10] + vy = 2 * np.sqrt(n) * vy.dot(np.diag(np.sqrt(wy))) / np.sqrt(wy[0]) + kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + gpy = GaussianProcessRegressor(kernel=kernely) + # fit Gaussian process, including hyperparameter optimization + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + gpy.fit(data_z, vy) + + # construct Gaussian kernels according to learned hyperparameters + Kzy = gpy.kernel_.k1(data_z, data_z) + epsilon_y = np.exp(gpy.kernel_.theta[-1]) + likelihood_y = gpy.log_marginal_likelihood_value_ + + return Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y + + def get_uuprod(self, Kx, Ky): + """ + Compute eigenvalues for null distribution estimation + + Parameters + ---------- + Kx: centralized kernel matrix for data_x (nxn) + Ky: centralized kernel matrix for data_y (nxn) + + Returns + _________ + uu_prod: product of the eigenvectors of Kx and Ky + size_u: number of produced eigenvectors + + """ + n_block = Kx.shape[0] + wx, vx = eigh(Kx) + wy, vy = eigh(Ky) + idx = np.argsort(-wx) + idy = np.argsort(-wy) + wx = wx[idx] + vx = vx[:, idx] + wy = wy[idy] + vy = vy[:, idy] + vx = vx[:, wx > np.max(wx) * self.eig_thresh] + wx = wx[wx > np.max(wx) * self.eig_thresh] + vy = vy[:, wy > np.max(wy) * self.eig_thresh] + wy = wy[wy > np.max(wy) * self.eig_thresh] + vx = vx.dot(np.diag(np.sqrt(wx))) + vy = vy.dot(np.diag(np.sqrt(wy))) + + # calculate their product + num_eigx = vx.shape[1] + num_eigy = vy.shape[1] + size_u = num_eigx * num_eigy + uu = np.zeros((n_block, size_u)) + for i in range(0, num_eigx): + for j in range(0, num_eigy): + uu[:, i * num_eigy + j] = vx[:, i] * vy[:, j] + + if size_u > self.n: + uu_prod = uu.dot(uu.T) + else: + uu_prod = uu.T.dot(uu) + + return uu_prod, size_u + + def get_kappa(self, mean_appr, var_appr): + """ + Get parameters for the approximated gamma distribution + Parameters + ---------- + uu_prod: product of the eigenvectors of Kx and Ky + + Returns + ---------- + k_appr, theta_appr: approximated parameters of the gamma distribution + + """ + k_appr = mean_appr ** 2 / var_appr + theta_appr = var_appr / mean_appr + return k_appr, theta_appr + + def null_sample_spectral(self, uu_prod, size_u, T): + """ + Simulate data from null distribution + + Parameters + ---------- + uu_prod: product of the eigenvectors of Kx and Ky + size_u: number of produced eigenvectors + T: sample size + + Returns + _________ + null_dstr: samples from the null distribution + + """ + eig_uu = np.linalg.eigvalsh(uu_prod) + eig_uu = -np.sort(-eig_uu) + eig_uu = eig_uu[0:np.min((T, size_u))] + eig_uu = eig_uu[eig_uu > np.max(eig_uu) * self.eig_thresh] + + f_rand = np.random.chisquare(1, (eig_uu.shape[0], self.nullss)) + null_dstr = eig_uu.T.dot(f_rand) + return null_dstr + +class FastKCI_UInd(object): + """ + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + + References + ---------- + [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, + "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + [2] M. Zhang, and S. Williamson, + "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + """ + def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): + """ + Construct the FastKCI_UInd model. + + Parameters + ---------- + K: Number of Gaussians that are assumed to be in the mixture + J: Number of independent repittitions. + alpha: Parameter for the Dirichlet distribution. + trimming_thresh: Threshold for trimming the propensity weights. + """ + self.K = K + self.J = J + self.alpha = alpha + self.trimming_thresh = trimming_thresh + # TODO: Adjust to causal-learn API + + def compute_pvalue(self, data_x=None, data_y=None): + """ + Main function: compute the p value and return it together with the test statistic + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + + Returns + _________ + pvalue: p value (scalar) + test_stat: test statistic (scalar) + """ + self.data_x = data_x + self.data_y = data_y + self.n = data_x.shape[0] + + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)(self.data) for i in range(self.J)) + self.Z_proposal, self.prob_Y = zip(*Z_proposal) + block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) + test_stat, mean_appr, var_appr, log_likelihood = zip(*block_res) + self.prob_Y = np.array(self.prob_Y) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood))) + self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) + self.mean_appr = np.average(np.array(mean_appr), weights = self.prob_weights) + self.var_appr = np.average(np.array(var_appr), weights = self.prob_weights) + + k_appr, theta_appr = self.get_kappa(self.mean_appr, self.var_appr) + self.pvalue = 1 - stats.gamma.cdf(self.test_stat, k_appr, 0, theta_appr) + + return self.pvalue, self.test_stat + + def partition_data(self): + """ + Partitions data into K Gaussians + + Returns + _________ + Z: Latent Gaussian that was drawn for each observation (nx1 array) + prob_Y: Log-Likelihood of that random assignment (scalar) + """ + Y_mean = self.data_y.mean(axis=0) + Y_sd = self.data_y.std(axis=0) + mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.shape[1])) + sigma_k = np.eye(self.data_y.shape[1]) + pi_j = np.random.dirichlet([self.alpha]*self.K) + ll = np.tile(np.log(pi_j),(self.n,1)) + for k in range(self.K): + ll[:,k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k,:], cov=sigma_k, allow_singular=True) + Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + prop_Y = np.take_along_axis(ll, Z[:, None], axis=1).sum() + le = LabelEncoder() + Z = le.fit_transform(Z) + return (Z, prop_Y) + + def pvalue_onblocks(self, Z_proposal): + unique_Z_j = np.unique(Z_proposal) + test_stat = 0 + prob_weight = 0 + mean_appr = 0 + var_appr = 0 + for k in unique_Z_j: + K_mask = (Z_proposal==k) + X_k = np.copy(self.data[0][K_mask]) + Y_k = np.copy(self.data[1][K_mask]) + + Kx = self.kernel_matrix(X_k) + Ky = self.kernel_matrix(Y_k) + n_block = Kx.shape[0] + + Kxc = Kernel.center_kernel_matrix(Kx) + Kyc = Kernel.center_kernel_matrix(Ky) + test_stat += np.einsum('ij,ji->', Kxc, Kyc) + mean_appr += np.trace(Kxc) * np.trace(Kyc) / n_block + var_appr += 2 * np.sum(Kxc ** 2) * np.sum(Kyc ** 2) / n_block / n_block + var_appr /= unique_Z_j.shape[0] + return (test_stat, mean_appr, var_appr, prob_weight) + + def kernel_matrix(self, data): + """ + Calculates the Gaussian Kernel for given data inputs. + + Returns + _________ + K: Kernel matrix (n_kxn_k array) + """ + kernel_obj = GaussianKernel() + kernel_obj.set_width_empirical_hsic(data) + + data = stats.zscore(data, ddof=1, axis=0) + data[np.isnan(data)] = 0. + + K = kernel_obj.kernel(data) + + return K + + def get_kappa(self, mean_appr, var_appr): + """ + Get parameters for the approximated gamma distribution + Parameters + ---------- + Kx: kernel matrix for data_x (nxn) + Ky: kernel matrix for data_y (nxn) + + Returns + _________ + k_appr, theta_appr: approximated parameters of the gamma distribution + """ + k_appr = mean_appr ** 2 / var_appr + theta_appr = var_appr / mean_appr + return k_appr, theta_appr diff --git a/causallearn/utils/FastKCI/__init__.py b/causallearn/utils/FastKCI/__init__.py new file mode 100644 index 00000000..e69de29b From 14b9c6e398822a0157b5841722cc4ac773fc9a11 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Tue, 28 May 2024 12:01:48 +0200 Subject: [PATCH 04/16] update UInd test Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 102 ++++++++++++++++++++------- causallearn/utils/cit.py | 4 +- 2 files changed, 80 insertions(+), 26 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index d9e2d245..5b614dd9 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -364,6 +364,8 @@ def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): self.J = J self.alpha = alpha self.trimming_thresh = trimming_thresh + self.nullss = 5000 + self.eig_thresh = 1e-5 # TODO: Adjust to causal-learn API def compute_pvalue(self, data_x=None, data_y=None): @@ -384,18 +386,14 @@ def compute_pvalue(self, data_x=None, data_y=None): self.data_y = data_y self.n = data_x.shape[0] - Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)(self.data) for i in range(self.J)) + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) self.Z_proposal, self.prob_Y = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) - test_stat, mean_appr, var_appr, log_likelihood = zip(*block_res) - self.prob_Y = np.array(self.prob_Y) - self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood))) + test_stat, null_samples, log_likelihood = zip(*block_res) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) - self.mean_appr = np.average(np.array(mean_appr), weights = self.prob_weights) - self.var_appr = np.average(np.array(var_appr), weights = self.prob_weights) - - k_appr, theta_appr = self.get_kappa(self.mean_appr, self.var_appr) - self.pvalue = 1 - stats.gamma.cdf(self.test_stat, k_appr, 0, theta_appr) + self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) return self.pvalue, self.test_stat @@ -410,7 +408,7 @@ def partition_data(self): """ Y_mean = self.data_y.mean(axis=0) Y_sd = self.data_y.std(axis=0) - mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.shape[1])) + mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.data_y.shape[1])) sigma_k = np.eye(self.data_y.shape[1]) pi_j = np.random.dirichlet([self.alpha]*self.K) ll = np.tile(np.log(pi_j),(self.n,1)) @@ -425,25 +423,30 @@ def partition_data(self): def pvalue_onblocks(self, Z_proposal): unique_Z_j = np.unique(Z_proposal) test_stat = 0 - prob_weight = 0 - mean_appr = 0 - var_appr = 0 + log_likelihood = 0 + null_samples = np.zeros((1,self.nullss)) for k in unique_Z_j: K_mask = (Z_proposal==k) - X_k = np.copy(self.data[0][K_mask]) - Y_k = np.copy(self.data[1][K_mask]) + X_k = np.copy(self.data_x[K_mask]) + Y_k = np.copy(self.data_y[K_mask]) Kx = self.kernel_matrix(X_k) Ky = self.kernel_matrix(Y_k) - n_block = Kx.shape[0] - - Kxc = Kernel.center_kernel_matrix(Kx) - Kyc = Kernel.center_kernel_matrix(Ky) - test_stat += np.einsum('ij,ji->', Kxc, Kyc) - mean_appr += np.trace(Kxc) * np.trace(Kyc) / n_block - var_appr += 2 * np.sum(Kxc ** 2) * np.sum(Kyc ** 2) / n_block / n_block - var_appr /= unique_Z_j.shape[0] - return (test_stat, mean_appr, var_appr, prob_weight) + + v_stat, Kxc, Kyc = self.HSIC_V_statistic(Kx, Ky) + + null_samples += self.null_sample_spectral(Kxc, Kyc) + test_stat += v_stat + + gpx = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(X|Y) + gpx.fit(X = Y_k, y = X_k) + likelihood = gpx.log_marginal_likelihood_value_ + log_likelihood += likelihood + + return test_stat, null_samples, log_likelihood def kernel_matrix(self, data): """ @@ -478,3 +481,54 @@ def get_kappa(self, mean_appr, var_appr): k_appr = mean_appr ** 2 / var_appr theta_appr = var_appr / mean_appr return k_appr, theta_appr + + def null_sample_spectral(self, Kxc, Kyc): + """ + Simulate data from null distribution + + Parameters + ---------- + Kxc: centralized kernel matrix for data_x (nxn) + Kyc: centralized kernel matrix for data_y (nxn) + + Returns + _________ + null_dstr: samples from the null distribution + + """ + T = Kxc.shape[0] + if T > 1000: + num_eig = int(np.floor(T / 2)) + else: + num_eig = T + lambdax = np.linalg.eigvalsh(Kxc) + lambday = np.linalg.eigvalsh(Kyc) + lambdax = -np.sort(-lambdax) + lambday = -np.sort(-lambday) + lambdax = lambdax[0:num_eig] + lambday = lambday[0:num_eig] + lambda_prod = np.dot(lambdax.reshape(num_eig, 1), lambday.reshape(1, num_eig)).reshape( + (num_eig ** 2, 1)) + lambda_prod = lambda_prod[lambda_prod > lambda_prod.max() * self.eig_thresh] + f_rand = np.random.chisquare(1, (lambda_prod.shape[0], self.nullss)) + null_dstr = lambda_prod.T.dot(f_rand) / T + return null_dstr + + def HSIC_V_statistic(self, Kx, Ky): + """ + Compute V test statistic from kernel matrices Kx and Ky + Parameters + ---------- + Kx: kernel matrix for data_x (nxn) + Ky: kernel matrix for data_y (nxn) + + Returns + _________ + Vstat: HSIC v statistics + Kxc: centralized kernel matrix for data_x (nxn) + Kyc: centralized kernel matrix for data_y (nxn) + """ + Kxc = Kernel.center_kernel_matrix(Kx) + Kyc = Kernel.center_kernel_matrix(Ky) + V_stat = np.einsum('ij,ij->', Kxc, Kyc) + return V_stat, Kxc, Kyc diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index d842281a..be168e40 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -201,13 +201,13 @@ class FastKCI(CIT_Base): def __init__(self, data, **kwargs): super().__init__(data, **kwargs) kci_ui_kwargs = {k: v for k, v in kwargs.items() if k in - ['kernelX', 'kernelY', 'null_ss', 'approx', 'est_width', 'polyd', 'kwidthx', 'kwidthy']} + ['K', 'J', 'alpha']} kci_ci_kwargs = {k: v for k, v in kwargs.items() if k in ['K', 'J', 'alpha', 'use_gp']} self.check_cache_method_consistent( 'kci', hashlib.md5(json.dumps(kci_ci_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) self.assert_input_data_is_valid() - self.kci_ui = KCI_UInd(**kci_ui_kwargs) + self.kci_ui = FastKCI_UInd(**kci_ui_kwargs) self.kci_ci = FastKCI_CInd(**kci_ci_kwargs) def __call__(self, X, Y, condition_set=None): From fc74434ecabe7b246840ec3bf2c922b8531c19ab Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:33:33 +0200 Subject: [PATCH 05/16] add rcit Signed-off-by: Oliver Schacht --- causallearn/utils/RCIT/RCIT.py | 251 +++++++++++++++++++++++++++++ causallearn/utils/RCIT/__init__.py | 0 causallearn/utils/cit.py | 26 +++ setup.py | 3 +- 4 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 causallearn/utils/RCIT/RCIT.py create mode 100644 causallearn/utils/RCIT/__init__.py diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py new file mode 100644 index 00000000..c9c931cb --- /dev/null +++ b/causallearn/utils/RCIT/RCIT.py @@ -0,0 +1,251 @@ +import numpy as np +import itertools +from scipy.stats import chi2 +from scipy.linalg import pinv +from momentchi2 import hbe, sw, lpb4 +from scipy.spatial.distance import pdist, squareform + + +class RCIT(object): + def __init__(self, approx="lpd4", num_f=100, num_f2=5, rcit=True): + self.approx = approx + self.num_f = num_f + self.num_f2 = num_f2 + self.rcit = rcit + + def compute_pvalue(self, data_x, data_y, data_z): + d = data_z.shape[1] + r = data_x.shape[0] + r1 = 500 if (r > 500) else r + + data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1) + data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1) + data_z = (data_z - data_z.mean(axis=0)) / data_z.std(axis=0, ddof=1) + + if self.rcit: + data_y = np.column_stack((data_y, data_z)) + + sigma = dict() + for key, value in [("x", data_x), ("y", data_y), ("z", data_z)]: + distances = pdist(value[:r1, :], metric='euclidean') + flattened_distances = squareform(distances).ravel() + non_zero_distances = flattened_distances[flattened_distances != 0] + sigma[key] = np.median(non_zero_distances) + + four_z = self.random_fourier_features(data_z, num_f=self.num_f, sigma=sigma["z"]) + four_x = self.random_fourier_features(data_x, num_f=self.num_f2, sigma=sigma["x"]) + four_y = self.random_fourier_features(data_y, num_f=self.num_f2, sigma=sigma["y"]) + + f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1) + f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1) + f_z = (four_z - four_z.mean(axis=0)) / four_z.std(axis=0, ddof=1) + + Cxy = f_x.T @ f_y / (f_x.shape[0] - 1) + + Czz = np.cov(f_z, rowvar=False) + + regularized_Czz = Czz + np.eye(self.num_f) * 1e-10 + L = np.linalg.cholesky(regularized_Czz) + i_Czz = np.linalg.inv(L).T.dot(np.linalg.inv(L)) + + Cxz = f_x.T @ f_z / (f_x.shape[0] - 1) + Czy = f_z.T @ f_y / (f_z.shape[0] - 1) + + z_i_Czz = f_z @ i_Czz + e_x_z = z_i_Czz @ Cxz.T + e_y_z = z_i_Czz @ Czy + + res_x = f_x - e_x_z + res_y = f_y - e_y_z + + if self.num_f2 == 1: + self.approx = "hbe" + + if self.approx == "perm": + Cxy_z = self.matrix_cov(res_x, res_y) + sta = r * np.sum(Cxy_z**2) + + nperm = 1000 + + stas = [] + for _ in range(nperm): + perm = np.random.choice(np.arange(r), size=r, replace=False) + Cxy = self.matrix_cov(res_x[perm, ], res_y) + sta_p = r * np.sum(Cxy**2) + stas.append(sta_p) + + p = 1 - (np.sum(sta >= stas) / len(stas)) + + else: + Cxy_z = Cxy - Cxz @ i_Czz @ Czy + sta = r * np.sum(Cxy_z**2) + + d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) + res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T + Cov = 1/r * res.T @ res + + if self.approx == "chi2": + i_Cov = pinv(Cov) + + sta = r * (np.dot(Cxy_z.flatten(), np.dot(i_Cov, Cxy_z.flatten()))) + p = 1 - chi2.cdf(sta, Cxy_z.size) + + else: + eigenvalues, eigenvectors = np.linalg.eigh(Cov) + eig_d = eigenvalues[eigenvalues > 0] + + if self.approx == "gamma": + p = 1 - sw(eig_d, sta) + + elif self.approx == "hbe": + p = 1 - hbe(eig_d, sta) + + elif self.approx == "lpd4": + try: + p = 1 - lpb4(eig_d, sta) + except Exception: + p = 1 - hbe(eig_d, sta) + if np.isnan(p): + p = 1 - hbe(eig_d, sta) + + if (p < 0): + p = 0 + + return p, sta + + def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): + + if num_f is None: + num_f = 25 + + r = x.shape[0] + c = x.shape[1] + + if ((sigma == 0) | (sigma is None)): + sigma = 1 + + if w is None: + w = (1/sigma) * np.random.normal(size=(num_f, c)) + b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r)) + + feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T + + return feat + + def matrix_cov(mat_a, mat_b): + n_obs = mat_a.shape[0] + + assert mat_a.shape == mat_b.shape + mat_a = mat_a - mat_a.mean(axis=0) + mat_b = mat_b - mat_b.mean(axis=0) + + mat_cov = mat_a.T @ mat_b / (n_obs - 1) + + return mat_cov + + +class RIT(object): + def __init__(self, approx="lpd4"): + self.approx = approx + + def compute_pvalue(self, data_x, data_y): + r = data_x.shape[0] + r1 = 500 if (r > 500) else r + + data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1) + data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1) + + sigma = dict() + for key, value in [("x", data_x), ("y", data_y)]: + distances = pdist(value[:r1, :], metric='euclidean') + flattened_distances = squareform(distances).ravel() + non_zero_distances = flattened_distances[flattened_distances != 0] + sigma[key] = np.median(non_zero_distances) + + four_x = self.random_fourier_features(data_x, num_f=5, sigma=sigma["x"]) + four_y = self.random_fourier_features(data_y, num_f=5, sigma=sigma["y"]) + + f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1) + f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1) + + Cxy = self.matrix_cov(f_x, f_y) + sta = r * np.sum(Cxy**2) + + if self.approx == "perm": + nperm = 1000 + + stas = [] + for _ in range(nperm): + perm = np.random.choice(np.arange(r), size=r, replace=False) + Cxy = self.matrix_cov(f_x[perm, ], f_y) + sta_p = r * np.sum(Cxy**2) + stas.append(sta_p) + + p = 1 - (np.sum(sta >= stas) / len(stas)) + + else: + res_x = f_x - np.meshgrid(f_x.mean(axis=0), (r, 1)) + res_y = f_y - np.meshgrid(f_y.mean(axis=0), (r, 1)) + + d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) + res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T + Cov = 1/r * res.T @ res + + if self.approx == "chi2": + i_Cov = pinv(Cov) + + sta = r * (np.dot(Cxy.flatten(), np.dot(i_Cov, Cxy.flatten()))) + p = 1 - chi2.cdf(sta, Cxy.size) + + else: + eigenvalues, eigenvectors = np.linalg.eigh(Cov) + eig_d = eigenvalues[eigenvalues > 0] + + if self.approx == "gamma": + p = 1 - sw(eig_d, sta) + + elif self.approx == "hbe": + p = 1 - hbe(eig_d, sta) + + elif self.approx == "lpd4": + try: + p = 1 - lpb4(eig_d, sta) + except Exception: + p = 1 - hbe(eig_d, sta) + if np.isnan(p): + p = 1 - hbe(eig_d, sta) + + if (p < 0): + p = 0 + + return p, sta + + def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): + + if num_f is None: + num_f = 25 + + r = x.shape[0] + c = x.shape[1] + + if ((sigma == 0) | (sigma is None)): + sigma = 1 + + if w is None: + w = (1/sigma) * np.random.normal(size=(num_f, c)) + b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r)) + + feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T + + return feat + + def matrix_cov(mat_a, mat_b): + n_obs = mat_a.shape[0] + + assert mat_a.shape == mat_b.shape + mat_a = mat_a - mat_a.mean(axis=0) + mat_b = mat_b - mat_b.mean(axis=0) + + mat_cov = mat_a.T @ mat_b / (n_obs - 1) + + return mat_cov diff --git a/causallearn/utils/RCIT/__init__.py b/causallearn/utils/RCIT/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index be168e40..c8858a0b 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -6,6 +6,7 @@ from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd +from causallearn.utils.RCIT.RCIT import RCIT, RIT from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -14,6 +15,7 @@ mv_fisherz = "mv_fisherz" mc_fisherz = "mc_fisherz" kci = "kci" +rcit = "rcit" fastkci = "fastkci" chisq = "chisq" gsq = "gsq" @@ -36,6 +38,8 @@ def CIT(data, method='fisherz', **kwargs): return KCI(data, **kwargs) elif method == fastkci: return FastKCI(data, **kwargs) + elif method == rcit: + return RCIT(data, **kwargs) elif method in [chisq, gsq]: return Chisq_or_Gsq(data, method_name=method, **kwargs) elif method == mv_fisherz: @@ -218,6 +222,28 @@ def __call__(self, X, Y, condition_set=None): self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] self.pvalue_cache[cache_key] = p return p + +class RCIT(CIT_Base): + def __init__(self, data, **kwargs): + super().__init__(data, **kwargs) + rit_kwargs = {k: v for k, v in kwargs.items() if k in + ['approx']} + rcit_kwargs = {k: v for k, v in kwargs.items() if k in + ['approx', 'num_f', 'num_f2', 'rcit']} + self.check_cache_method_consistent( + 'kci', hashlib.md5(json.dumps(rcit_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) + self.assert_input_data_is_valid() + self.rit = RIT(**rit_kwargs) + self.rcit = RCIT(**rcit_kwargs) + + def __call__(self, X, Y, condition_set=None): + # Kernel-based conditional independence test. + Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set) + if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key] + p = self.rit.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \ + self.rcit.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] + self.pvalue_cache[cache_key] = p + return p class Chisq_or_Gsq(CIT_Base): def __init__(self, data, method_name, **kwargs): diff --git a/setup.py b/setup.py index f132a020..f40a536d 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,8 @@ 'matplotlib', 'networkx', 'pydot', - 'tqdm' + 'tqdm', + 'momentchi2' ], url='https://github.com/py-why/causal-learn', packages=setuptools.find_packages(), From cd61fa424cef3b2f337e9dc37de610c7a13e7021 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:15:47 +0200 Subject: [PATCH 06/16] redo naming of rcit class Signed-off-by: Oliver Schacht --- causallearn/utils/cit.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index c8858a0b..2d664e9b 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -6,7 +6,8 @@ from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd -from causallearn.utils.RCIT.RCIT import RCIT, RIT +from causallearn.utils.RCIT.RCIT import RCIT as RCIT_CInd +from causallearn.utils.RCIT.RCIT import RIT as RCIT_UInd from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -222,7 +223,7 @@ def __call__(self, X, Y, condition_set=None): self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] self.pvalue_cache[cache_key] = p return p - + class RCIT(CIT_Base): def __init__(self, data, **kwargs): super().__init__(data, **kwargs) @@ -233,8 +234,8 @@ def __init__(self, data, **kwargs): self.check_cache_method_consistent( 'kci', hashlib.md5(json.dumps(rcit_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) self.assert_input_data_is_valid() - self.rit = RIT(**rit_kwargs) - self.rcit = RCIT(**rcit_kwargs) + self.rit = RCIT_UInd(**rit_kwargs) + self.rcit = RCIT_CInd(**rcit_kwargs) def __call__(self, X, Y, condition_set=None): # Kernel-based conditional independence test. From 173b91330a9c7e5f56805655d6daa48e6aa7a50c Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:11:44 +0200 Subject: [PATCH 07/16] debugging Signed-off-by: Oliver Schacht --- causallearn/utils/RCIT/RCIT.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py index c9c931cb..e1bb8dbc 100644 --- a/causallearn/utils/RCIT/RCIT.py +++ b/causallearn/utils/RCIT/RCIT.py @@ -132,7 +132,7 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat - def matrix_cov(mat_a, mat_b): + def matrix_cov(self, mat_a, mat_b): n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape @@ -184,8 +184,8 @@ def compute_pvalue(self, data_x, data_y): p = 1 - (np.sum(sta >= stas) / len(stas)) else: - res_x = f_x - np.meshgrid(f_x.mean(axis=0), (r, 1)) - res_y = f_y - np.meshgrid(f_y.mean(axis=0), (r, 1)) + res_x = f_x - f_x.mean(axis=0) + res_y = f_y - f_y.mean(axis=0) d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T @@ -239,7 +239,7 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat - def matrix_cov(mat_a, mat_b): + def matrix_cov(self, mat_a, mat_b): n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape From e83e4f05a948dde022b3f414c281476b610b1618 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:09:09 +0200 Subject: [PATCH 08/16] add docstrings Signed-off-by: Oliver Schacht --- causallearn/utils/RCIT/RCIT.py | 156 ++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py index e1bb8dbc..11608ed0 100644 --- a/causallearn/utils/RCIT/RCIT.py +++ b/causallearn/utils/RCIT/RCIT.py @@ -7,13 +7,56 @@ class RCIT(object): + """ + Python implementation of Randomized Conditional Independence Test (RCIT) test. + The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master + + References + ---------- + [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional + independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017. + """ def __init__(self, approx="lpd4", num_f=100, num_f2=5, rcit=True): + """ + Initialize the RCIT object. + + Parameters + ---------- + approx : str + Method for approximating the null distribution. + - "lpd4" for the Lindsay-Pilla-Basak method + - "hbe" for the Hall-Buckley-Eagleson method + - "gamma" for the Satterthwaite-Welch method + - "chi2" for a normalized chi-squared statistic + - "perm" for permutation testing + Default is "lpd4". + num_f : int + Number of features for conditioning set. Default is 25. + num_f2 : int + Number of features for non-conditioning sets. Default is 5. + rcit : bool + Whether to use RCIT or RCoT. Default is True. + """ self.approx = approx self.num_f = num_f self.num_f2 = num_f2 self.rcit = rcit def compute_pvalue(self, data_x, data_y, data_z): + """ + Compute the p value and return it together with the test statistic. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + ------- + p: p value + sta: test statistic + """ d = data_z.shape[1] r = data_x.shape[0] r1 = 500 if (r > 500) else r @@ -114,7 +157,27 @@ def compute_pvalue(self, data_x, data_y, data_z): return p, sta def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): - + """ + Generate random Fourier features. + + Parameters + ---------- + x : np.ndarray + Random variable x. + w : np.ndarray + RRandom coefficients. + b : np.ndarray + Random offsets. + num_f : int + Number of random Fourier features. + sigma : float + Smooth parameter of RBF kernel. + + Returns + ------- + feat : np.ndarray + Random Fourier features. + """ if num_f is None: num_f = 25 @@ -133,6 +196,22 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat def matrix_cov(self, mat_a, mat_b): + """ + Compute the covariance matrix between two matrices. + Equivalent to ``cov()`` between two matrices in R. + + Parameters + ---------- + mat_a : np.ndarray + First data matrix. + mat_b : np.ndarray + Second data matrix. + + Returns + ------- + mat_cov : np.ndarray + Covariance matrix. + """ n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape @@ -145,10 +224,47 @@ def matrix_cov(self, mat_a, mat_b): class RIT(object): + """ + Python implementation of Randomized Independence Test (RIT) test. + The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master + + References + ---------- + [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional + independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017. + """ def __init__(self, approx="lpd4"): + """ + Initialize the RIT object. + + Parameters + ---------- + approx : str + Method for approximating the null distribution. + - "lpd4" for the Lindsay-Pilla-Basak method + - "hbe" for the Hall-Buckley-Eagleson method + - "gamma" for the Satterthwaite-Welch method + - "chi2" for a normalized chi-squared statistic + - "perm" for permutation testing + Default is "lpd4". + """ self.approx = approx def compute_pvalue(self, data_x, data_y): + """ + Compute the p value and return it together with the test statistic. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + ------- + p: p value + sta: test statistic + """ r = data_x.shape[0] r1 = 500 if (r > 500) else r @@ -221,7 +337,27 @@ def compute_pvalue(self, data_x, data_y): return p, sta def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): - + """ + Generate random Fourier features. + + Parameters + ---------- + x : np.ndarray + Random variable x. + w : np.ndarray + RRandom coefficients. + b : np.ndarray + Random offsets. + num_f : int + Number of random Fourier features. + sigma : float + Smooth parameter of RBF kernel. + + Returns + ------- + feat : np.ndarray + Random Fourier features. + """ if num_f is None: num_f = 25 @@ -240,6 +376,22 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat def matrix_cov(self, mat_a, mat_b): + """ + Compute the covariance matrix between two matrices. + Equivalent to ``cov()`` between two matrices in R. + + Parameters + ---------- + mat_a : np.ndarray + First data matrix. + mat_b : np.ndarray + Second data matrix. + + Returns + ------- + mat_cov : np.ndarray + Covariance matrix. + """ n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape From fba553f59ba927c045631537dad0cf9e460cc248 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:30:57 +0200 Subject: [PATCH 09/16] linting Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 110 +++++++++++++-------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 5b614dd9..407977dd 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -9,19 +9,21 @@ from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF import warnings + class FastKCI_CInd(object): """ - Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. + Unconditional version. References ---------- [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, - "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + "A kernel-based conditional independence test and application in causal discovery", In UAI 2011. [2] M. Zhang, and S. Williamson, - "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) - """ - def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimming_thresh = 1e-3, use_gp=False): + """ + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming_thresh=1e-3, use_gp=False): """ Initialize the FastKCI_CInd object. @@ -33,7 +35,7 @@ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimmi epsilon: Penalty for the matrix ridge regression. eig_threshold: Threshold for Eigenvalues. use_gp: Whether to use Gaussian Process Regression to determine the kernel widths - """ + """ self.K = K self.J = J self.alpha = alpha @@ -42,7 +44,6 @@ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimmi self.trimming_thresh = trimming_thresh self.use_gp = use_gp self.nullss = 5000 - # TODO: Adjust to causal-learn API def compute_pvalue(self, data_x=None, data_y=None, data_z=None): """ @@ -67,21 +68,21 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): self.Z_proposal, self.prob_Z = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) - + log_likelihood = np.array(log_likelihood) self.prob_Z += log_likelihood - self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used + self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used self.all_null_samples = np.vstack(null_samples) self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) self.all_test_stats = np.array(test_stat) - self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) - self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) + self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) # experimental, not used - self.pvalue_alt = np.sum(np.average(null_samples, axis = 0, weights = self.prob_Z) > np.average(np.array(test_stat), weights = self.prob_Z)) / float(self.nullss) + self.pvalue_alt = np.sum(np.average(null_samples, axis=0, weights=self.prob_Z) > np.average(np.array(test_stat), weights=self.prob_Z)) / float(self.nullss) self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) - return self.pvalue, self.test_stat + return self.pvalue, self.test_stat def partition_data(self): """ @@ -94,18 +95,18 @@ def partition_data(self): """ Z_mean = self.data_z.mean(axis=0) Z_sd = self.data_z.std(axis=0) - mu_k = np.random.normal(Z_mean, Z_sd, size = (self.K,self.data_z.shape[1])) + mu_k = np.random.normal(Z_mean, Z_sd, size=(self.K, self.data_z.shape[1])) sigma_k = np.eye(self.data_z.shape[1]) pi_j = np.random.dirichlet([self.alpha]*self.K) - ll = np.tile(np.log(pi_j),(self.n,1)) + ll = np.tile(np.log(pi_j), (self.n, 1)) for k in range(self.K): - ll[:,k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k,:], cov=sigma_k, allow_singular=True) - Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + ll[:, k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k, :], cov=sigma_k, allow_singular=True) + Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) le = LabelEncoder() Z = le.fit_transform(Z) - prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed + prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed return Z, prob_Z - + def pvalue_onblocks(self, Z_proposal): """ Calculate p value on given partitions of the data. @@ -113,7 +114,7 @@ def pvalue_onblocks(self, Z_proposal): Parameters ---------- Z_proposal: partition of the data into K clusters (nxd1 array) - + Returns _________ test_stat: test statistic (scalar) @@ -123,13 +124,13 @@ def pvalue_onblocks(self, Z_proposal): unique_Z_j = np.unique(Z_proposal) test_stat = 0 log_likelihood = 0 - null_samples = np.zeros((1,self.nullss)) + null_samples = np.zeros((1, self.nullss)) for k in unique_Z_j: - K_mask = (Z_proposal==k) + K_mask = (Z_proposal == k) X_k = np.copy(self.data[0][K_mask]) Y_k = np.copy(self.data[1][K_mask]) Z_k = np.copy(self.data_z[K_mask]) - if (Z_k.shape[0]<6): # small blocks cause problems in GP, experimental + if (Z_k.shape[0] < 6): # small blocks cause problems in GP, experimental continue Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) @@ -143,7 +144,6 @@ def pvalue_onblocks(self, Z_proposal): log_likelihood += likelihood_x + likelihood_y return test_stat, null_samples, log_likelihood - def kernel_matrix(self, data_x, data_y, data_z): """ Calculates the Gaussian Kernel for given data inputs as well as the shared kernel. @@ -157,10 +157,10 @@ def kernel_matrix(self, data_x, data_y, data_z): data_x = stats.zscore(data_x, ddof=1, axis=0) data_x[np.isnan(data_x)] = 0. - + data_y = stats.zscore(data_y, ddof=1, axis=0) data_y[np.isnan(data_y)] = 0. - + data_z = stats.zscore(data_z, ddof=1, axis=0) data_z[np.isnan(data_z)] = 0. @@ -189,23 +189,23 @@ def kernel_matrix(self, data_x, data_y, data_z): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=Warning) # P(X|Z) - gpx.fit(X = data_z, y = data_x) + gpx.fit(X=data_z, y=data_x) likelihood_x = gpx.log_marginal_likelihood_value_ gpy = GaussianProcessRegressor() with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=Warning) # P(Y|X,Z) - gpy.fit(X = np.c_[data_x,data_z], y=data_y) + gpy.fit(X=np.c_[data_x, data_z], y=data_y) likelihood_y = gpy.log_marginal_likelihood_value_ - else: + else: n, Dz = data_z.shape - + widthz = np.sqrt(1.0 / (kernelX.width * data_x.shape[1])) # Instantiate a Gaussian Process model for x wx, vx = eigh(Kx) - topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])])) idx = np.argsort(-wx) wx = wx[idx] vx = vx[:, idx] @@ -228,7 +228,7 @@ def kernel_matrix(self, data_x, data_y, data_z): # Instantiate a Gaussian Process model for y wy, vy = eigh(Ky) - topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])])) idy = np.argsort(-wy) wy = wy[idy] vy = vy[:, idy] @@ -297,7 +297,7 @@ def get_uuprod(self, Kx, Ky): uu_prod = uu.T.dot(uu) return uu_prod, size_u - + def get_kappa(self, mean_appr, var_appr): """ Get parameters for the approximated gamma distribution @@ -333,11 +333,12 @@ def null_sample_spectral(self, uu_prod, size_u, T): eig_uu = -np.sort(-eig_uu) eig_uu = eig_uu[0:np.min((T, size_u))] eig_uu = eig_uu[eig_uu > np.max(eig_uu) * self.eig_thresh] - + f_rand = np.random.chisquare(1, (eig_uu.shape[0], self.nullss)) null_dstr = eig_uu.T.dot(f_rand) return null_dstr + class FastKCI_UInd(object): """ Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. @@ -348,8 +349,8 @@ class FastKCI_UInd(object): "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) - """ - def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): + """ + def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): """ Construct the FastKCI_UInd model. @@ -359,14 +360,13 @@ def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): J: Number of independent repittitions. alpha: Parameter for the Dirichlet distribution. trimming_thresh: Threshold for trimming the propensity weights. - """ + """ self.K = K self.J = J self.alpha = alpha self.trimming_thresh = trimming_thresh self.nullss = 5000 self.eig_thresh = 1e-5 - # TODO: Adjust to causal-learn API def compute_pvalue(self, data_x=None, data_y=None): """ @@ -385,17 +385,17 @@ def compute_pvalue(self, data_x=None, data_y=None): self.data_x = data_x self.data_y = data_y self.n = data_x.shape[0] - + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) self.Z_proposal, self.prob_Y = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) - self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) - self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) + self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) - return self.pvalue, self.test_stat + return self.pvalue, self.test_stat def partition_data(self): """ @@ -408,25 +408,25 @@ def partition_data(self): """ Y_mean = self.data_y.mean(axis=0) Y_sd = self.data_y.std(axis=0) - mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.data_y.shape[1])) + mu_k = np.random.normal(Y_mean, Y_sd, size=(self.K, self.data_y.shape[1])) sigma_k = np.eye(self.data_y.shape[1]) pi_j = np.random.dirichlet([self.alpha]*self.K) - ll = np.tile(np.log(pi_j),(self.n,1)) + ll = np.tile(np.log(pi_j), (self.n, 1)) for k in range(self.K): - ll[:,k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k,:], cov=sigma_k, allow_singular=True) - Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + ll[:, k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k, :], cov=sigma_k, allow_singular=True) + Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) prop_Y = np.take_along_axis(ll, Z[:, None], axis=1).sum() le = LabelEncoder() Z = le.fit_transform(Z) return (Z, prop_Y) - + def pvalue_onblocks(self, Z_proposal): unique_Z_j = np.unique(Z_proposal) test_stat = 0 log_likelihood = 0 - null_samples = np.zeros((1,self.nullss)) + null_samples = np.zeros((1, self.nullss)) for k in unique_Z_j: - K_mask = (Z_proposal==k) + K_mask = (Z_proposal == k) X_k = np.copy(self.data_x[K_mask]) Y_k = np.copy(self.data_y[K_mask]) @@ -442,12 +442,12 @@ def pvalue_onblocks(self, Z_proposal): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=Warning) # P(X|Y) - gpx.fit(X = Y_k, y = X_k) + gpx.fit(X=Y_k, y=X_k) likelihood = gpx.log_marginal_likelihood_value_ log_likelihood += likelihood - + return test_stat, null_samples, log_likelihood - + def kernel_matrix(self, data): """ Calculates the Gaussian Kernel for given data inputs. @@ -460,12 +460,12 @@ def kernel_matrix(self, data): kernel_obj.set_width_empirical_hsic(data) data = stats.zscore(data, ddof=1, axis=0) - data[np.isnan(data)] = 0. + data[np.isnan(data)] = 0. K = kernel_obj.kernel(data) return K - + def get_kappa(self, mean_appr, var_appr): """ Get parameters for the approximated gamma distribution @@ -513,7 +513,7 @@ def null_sample_spectral(self, Kxc, Kyc): f_rand = np.random.chisquare(1, (lambda_prod.shape[0], self.nullss)) null_dstr = lambda_prod.T.dot(f_rand) / T return null_dstr - + def HSIC_V_statistic(self, Kx, Ky): """ Compute V test statistic from kernel matrices Kx and Ky From 96518245d8f0e13b884e4a81e56b0a5031a63ae4 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:59:32 +0200 Subject: [PATCH 10/16] edit docstring Signed-off-by: Oliver Schacht --- causallearn/utils/cit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 2d664e9b..fbc95e95 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -28,8 +28,8 @@ def CIT(data, method='fisherz', **kwargs): Parameters ---------- data: numpy.ndarray of shape (n_samples, n_features) - method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "chisq", "gsq"] - kwargs: placeholder for future arguments, or for KCI specific arguments now + method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "rcit", "fastkci", "chisq", "gsq"] + kwargs: placeholder for future arguments, or for KCI, FastKCI or RCIT specific arguments now TODO: utimately kwargs should be replaced by explicit named parameters. check https://github.com/cmu-phil/causal-learn/pull/62#discussion_r927239028 ''' @@ -52,6 +52,7 @@ def CIT(data, method='fisherz', **kwargs): else: raise ValueError("Unknown method: {}".format(method)) + class CIT_Base(object): # Base class for CIT, contains basic operations for input check and caching, etc. def __init__(self, data, cache_path=None, **kwargs): From 3d159b6d5d4b6edbd3b2aae4385d722c7b1464ac Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:00:21 +0200 Subject: [PATCH 11/16] remove experimental stuff Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 407977dd..91b99ad8 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -21,6 +21,9 @@ class FastKCI_CInd(object): "A kernel-based conditional independence test and application in causal discovery", In UAI 2011. [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) + [3] O. Schacht, and B. Huang + "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + Working Paper. """ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming_thresh=1e-3, use_gp=False): @@ -65,21 +68,17 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): self.n = data_x.shape[0] Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) - self.Z_proposal, self.prob_Z = zip(*Z_proposal) + self.Z_proposal = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) log_likelihood = np.array(log_likelihood) - self.prob_Z += log_likelihood - self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used self.all_null_samples = np.vstack(null_samples) self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) self.all_test_stats = np.array(test_stat) self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) - # experimental, not used - self.pvalue_alt = np.sum(np.average(null_samples, axis=0, weights=self.prob_Z) > np.average(np.array(test_stat), weights=self.prob_Z)) / float(self.nullss) self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) return self.pvalue, self.test_stat @@ -104,8 +103,7 @@ def partition_data(self): Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) le = LabelEncoder() Z = le.fit_transform(Z) - prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed - return Z, prob_Z + return Z def pvalue_onblocks(self, Z_proposal): """ @@ -130,7 +128,7 @@ def pvalue_onblocks(self, Z_proposal): X_k = np.copy(self.data[0][K_mask]) Y_k = np.copy(self.data[1][K_mask]) Z_k = np.copy(self.data_z[K_mask]) - if (Z_k.shape[0] < 6): # small blocks cause problems in GP, experimental + if (Z_k.shape[0] < 6): # small blocks cause problems in GP continue Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) @@ -214,7 +212,8 @@ def kernel_matrix(self, data_x, data_y, data_z): vx = vx[:, np.abs(wx) > np.abs(wx).max() * 1e-10] wx = wx[np.abs(wx) > np.abs(wx).max() * 1e-10] vx = 2 * np.sqrt(n) * vx.dot(np.diag(np.sqrt(wx))) / np.sqrt(wx[0]) - kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \ + + WhiteKernel(0.1, (1e-10, 1e+1)) gpx = GaussianProcessRegressor(kernel=kernelx) # fit Gaussian process, including hyperparameter optimization with warnings.catch_warnings(): @@ -237,7 +236,8 @@ def kernel_matrix(self, data_x, data_y, data_z): vy = vy[:, np.abs(wy) > np.abs(wy).max() * 1e-10] wy = wy[np.abs(wy) > np.abs(wy).max() * 1e-10] vy = 2 * np.sqrt(n) * vy.dot(np.diag(np.sqrt(wy))) / np.sqrt(wy[0]) - kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \ + + WhiteKernel(0.1, (1e-10, 1e+1)) gpy = GaussianProcessRegressor(kernel=kernely) # fit Gaussian process, including hyperparameter optimization with warnings.catch_warnings(): @@ -341,7 +341,8 @@ def null_sample_spectral(self, uu_prod, size_u, T): class FastKCI_UInd(object): """ - Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. + Unconditional version. References ---------- @@ -349,6 +350,9 @@ class FastKCI_UInd(object): "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + [3] O. Schacht, and B. Huang + "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + Working Paper. """ def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): """ From cf9d425ea6cb62637dfd0b2cba0fe785422b94d5 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:13:00 +0200 Subject: [PATCH 12/16] remove propensity trimming Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 91b99ad8..6e431568 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -26,7 +26,7 @@ class FastKCI_CInd(object): Working Paper. """ - def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming_thresh=1e-3, use_gp=False): + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, use_gp=False): """ Initialize the FastKCI_CInd object. @@ -44,7 +44,6 @@ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming self.alpha = alpha self.epsilon = epsilon self.eig_thresh = eig_thresh - self.trimming_thresh = trimming_thresh self.use_gp = use_gp self.nullss = 5000 @@ -67,8 +66,7 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): self.data_z = data_z self.n = data_x.shape[0] - Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) - self.Z_proposal = zip(*Z_proposal) + self.Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) @@ -354,7 +352,7 @@ class FastKCI_UInd(object): "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", Working Paper. """ - def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): + def __init__(self, K=10, J=8, alpha=500): """ Construct the FastKCI_UInd model. @@ -363,12 +361,10 @@ def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): K: Number of Gaussians that are assumed to be in the mixture J: Number of independent repittitions. alpha: Parameter for the Dirichlet distribution. - trimming_thresh: Threshold for trimming the propensity weights. """ self.K = K self.J = J self.alpha = alpha - self.trimming_thresh = trimming_thresh self.nullss = 5000 self.eig_thresh = 1e-5 From 2642628bc003a737740bb5617a76678f2d7db04d Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:15:40 +0200 Subject: [PATCH 13/16] add unit tests Signed-off-by: Oliver Schacht --- tests/TestCIT_FastKCI.py | 33 +++++++++++++++++++++++++++++++++ tests/TestCIT_RCIT.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/TestCIT_FastKCI.py create mode 100644 tests/TestCIT_RCIT.py diff --git a/tests/TestCIT_FastKCI.py b/tests/TestCIT_FastKCI.py new file mode 100644 index 00000000..06a84baf --- /dev/null +++ b/tests/TestCIT_FastKCI.py @@ -0,0 +1,33 @@ +import unittest + +import numpy as np + +import causallearn.utils.cit as cit + + +class TestCIT_FastKCI(unittest.TestCase): + def test_Gaussian_dist(self): + np.random.seed(10) + X = np.random.randn(1200, 1) + X_prime = np.random.randn(1200, 1) + Y = X + 0.5 * np.random.randn(1200, 1) + Z = Y + 0.5 * np.random.randn(1200, 1) + data = np.hstack((X, X_prime, Y, Z)) + + pvalue01 = [] + pvalue03 = [] + pvalue032 = [] + for K in [3, 10]: + for J in [8, 16]: + for use_gp in [True, False]: + cit_CIT = cit.CIT(data, 'fastkci', K=K, J=J, use_gp=use_gp) + pvalue01.append(round(cit_CIT(0, 1), 4)) + pvalue03.append(round(cit_CIT(0, 3), 4)) + pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + + self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), + "pvalue01 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), + "pvalue03 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)), + "pvalue032 contains invalid values") diff --git a/tests/TestCIT_RCIT.py b/tests/TestCIT_RCIT.py new file mode 100644 index 00000000..2482f971 --- /dev/null +++ b/tests/TestCIT_RCIT.py @@ -0,0 +1,35 @@ +import unittest + +import numpy as np + +import causallearn.utils.cit as cit + + +class TestCIT_RCIT(unittest.TestCase): + def test_Gaussian_dist(self): + np.random.seed(10) + X = np.random.randn(300, 1) + X_prime = np.random.randn(300, 1) + Y = X + 0.5 * np.random.randn(300, 1) + Z = Y + 0.5 * np.random.randn(300, 1) + data = np.hstack((X, X_prime, Y, Z)) + + pvalue01 = [] + pvalue03 = [] + pvalue032 = [] + for approx in ["lpd4", "hbe", "gamma", "chi2", "perm"]: + for num_f in [50, 100]: + for num_f2 in [5, 10]: + for rcit in [True, False]: + cit_CIT = cit.CIT(data, 'rcit', approx=approx, num_f=num_f, + num_f2=num_f2, rcit=rcit) + pvalue01.append(round(cit_CIT(0, 1), 4)) + pvalue03.append(round(cit_CIT(0, 3), 4)) + pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + + self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), + "pvalue01 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), + "pvalue03 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)), + "pvalue032 contains invalid values") From 6a837f6ee4499868aeca8171401faba92676b33a Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:06:09 +0200 Subject: [PATCH 14/16] spelling Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 6e431568..f5fcc506 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -22,7 +22,7 @@ class FastKCI_CInd(object): [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) [3] O. Schacht, and B. Huang - "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery", Working Paper. """ @@ -349,7 +349,7 @@ class FastKCI_UInd(object): [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) [3] O. Schacht, and B. Huang - "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery", Working Paper. """ def __init__(self, K=10, J=8, alpha=500): From 6a580ed2e52aa48f19b5ea78bc9f7dc69222290d Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:06:22 +0200 Subject: [PATCH 15/16] fix: array dtype Signed-off-by: Oliver Schacht --- tests/TestCIT_FastKCI.py | 3 +++ tests/TestCIT_RCIT.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tests/TestCIT_FastKCI.py b/tests/TestCIT_FastKCI.py index 06a84baf..b8507f78 100644 --- a/tests/TestCIT_FastKCI.py +++ b/tests/TestCIT_FastKCI.py @@ -25,6 +25,9 @@ def test_Gaussian_dist(self): pvalue03.append(round(cit_CIT(0, 3), 4)) pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + pvalue01 = np.array(pvalue01) + pvalue03 = np.array(pvalue03) + pvalue032 = np.array(pvalue032) self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), "pvalue01 contains invalid values") self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), diff --git a/tests/TestCIT_RCIT.py b/tests/TestCIT_RCIT.py index 2482f971..ef6406fd 100644 --- a/tests/TestCIT_RCIT.py +++ b/tests/TestCIT_RCIT.py @@ -27,6 +27,9 @@ def test_Gaussian_dist(self): pvalue03.append(round(cit_CIT(0, 3), 4)) pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + pvalue01 = np.array(pvalue01) + pvalue03 = np.array(pvalue03) + pvalue032 = np.array(pvalue032) self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), "pvalue01 contains invalid values") self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), From b803533ea492a2c394eb71ba16c7fbd302289d1a Mon Sep 17 00:00:00 2001 From: Haoyue Dai Date: Tue, 5 Nov 2024 02:30:42 -0500 Subject: [PATCH 16/16] Update KCI.py --- causallearn/utils/KCI/KCI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 681d48d4..41cbd467 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -306,8 +306,8 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): k_appr, theta_appr = self.get_kappa(uu_prod) pvalue = 1 - stats.gamma.cdf(test_stat, k_appr, 0, theta_appr) else: - self.null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) - pvalue = sum(self.null_samples > test_stat) / float(self.nullss) + null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + pvalue = sum(null_samples > test_stat) / float(self.nullss) return pvalue, test_stat def kernel_matrix(self, data_x, data_y, data_z):