From 4e43d28d5fb80b748e9c2435c5209c55e51c8188 Mon Sep 17 00:00:00 2001 From: mbruhns Date: Sun, 10 Dec 2023 16:07:01 +0100 Subject: [PATCH 1/6] Update. --- .gitignore | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 148 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 03bc42c..19c423a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,148 @@ -__pycache__ -*.pdf -build -dist -*.egg-info -.eggs \ No newline at end of file +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Sublime workspace +*.sublime-workspace +.DS_Store + +#Custom folders +results/ +figures/ + +*.sublime-workspace +*.sublime-project + +# Jupyter notebooks +*.ipynb + +.idea/ + +*.h5ad + From d6604d8c79206c04cfb9f28a5e7f9a13f96f2346 Mon Sep 17 00:00:00 2001 From: mbruhns Date: Sun, 10 Dec 2023 16:11:31 +0100 Subject: [PATCH 2/6] Basic reformatting. --- harmony/harmony.py | 18 +++++++++--------- harmony/utils.py | 8 +++++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index 77cc1f3..9248d35 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -120,9 +120,12 @@ def harmonize( device_type = "cuda" if verbose: print("Use GPU mode.") - else: + elif torch.backends.mps.is_available(): + device_type = "mps" if verbose: - print("CUDA is not available on your machine. Use CPU mode instead.") + print("Use Metal (MPS) mode.") + elif verbose: + print("Neither CUDA nor MPS is available on your machine. Use CPU mode instead.") (stride_0, stride_1) = X.strides if stride_0 < 0 or stride_1 < 0: @@ -156,7 +159,7 @@ def harmonize( theta = theta.view(1, -1) assert block_proportion > 0 and block_proportion <= 1 - assert correction_method in ["fast", "original"] + assert correction_method in {"fast", "original"} np.random.seed(random_state) @@ -206,13 +209,10 @@ def harmonize( if is_convergent_harmony(objectives_harmony, tol=tol_harmony): if verbose: - print("Reach convergence after {} iteration(s).".format(i + 1)) + print(f"Reach convergence after {i + 1} iteration(s).") break - if device_type == "cpu": - return Z_hat.numpy() - else: - return Z_hat.cpu().numpy() + return Z_hat.numpy() if device_type == "cpu" else Z_hat.cpu().numpy() def initialize_centroids( @@ -287,7 +287,7 @@ def clustering( objectives_clustering = [] - for i in range(max_iter): + for _ in range(max_iter): # Compute Cluster Centroids Y = torch.matmul(R.t(), Z_norm) Y_norm = normalize(Y, p=2, dim=1) diff --git a/harmony/utils.py b/harmony/utils.py index b132138..857e4ea 100644 --- a/harmony/utils.py +++ b/harmony/utils.py @@ -2,9 +2,11 @@ def get_batch_codes(batch_mat, batch_key): - if type(batch_key) is str or len(batch_key) == 1: - if not type(batch_key) is str: - batch_key = batch_key[0] + if type(batch_key) is str: + batch_vec = batch_mat[batch_key] + + elif len(batch_key) == 1: + batch_key = batch_key[0] batch_vec = batch_mat[batch_key] From ae8cd474ec7a71dba485b91d16dc32dfd8bff619 Mon Sep 17 00:00:00 2001 From: mbruhns Date: Sun, 10 Dec 2023 16:14:48 +0100 Subject: [PATCH 3/6] Run Ruff. --- harmony/__init__.py | 2 +- harmony/harmony.py | 55 +++++---- harmony/utils.py | 10 +- setup.py | 2 +- test/test.py | 266 ++++++++++++++++++++++++++++++-------------- test/test_gpu.py | 110 +++++++++++------- 6 files changed, 286 insertions(+), 159 deletions(-) diff --git a/harmony/__init__.py b/harmony/__init__.py index dd0606b..6663ba6 100644 --- a/harmony/__init__.py +++ b/harmony/__init__.py @@ -6,7 +6,7 @@ from importlib_metadata import version, PackageNotFoundError try: - __version__ = version('harmony-pytorch') + __version__ = version("harmony-pytorch") del version except PackageNotFoundError: pass diff --git a/harmony/harmony.py b/harmony/harmony.py index 9248d35..8a2f7c9 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -10,7 +10,6 @@ from .utils import one_hot_tensor, get_batch_codes - def harmonize( X: np.array, batch_mat: pd.DataFrame, @@ -105,13 +104,16 @@ def harmonize( >>> X_harmony = harmonize(adata.obsm['X_pca'], adata.obs, ['Channel', 'Lab']) """ - assert(isinstance(X, np.ndarray)) + assert isinstance(X, np.ndarray) if n_jobs < 0: import psutil - n_jobs = psutil.cpu_count(logical=False) # get physical cores + + n_jobs = psutil.cpu_count(logical=False) # get physical cores if n_jobs is None: - n_jobs = psutil.cpu_count(logical=True) # if undetermined, use logical cores instead + n_jobs = psutil.cpu_count( + logical=True + ) # if undetermined, use logical cores instead torch.set_num_threads(n_jobs) device_type = "cpu" @@ -125,7 +127,9 @@ def harmonize( if verbose: print("Use Metal (MPS) mode.") elif verbose: - print("Neither CUDA nor MPS is available on your machine. Use CPU mode instead.") + print( + "Neither CUDA nor MPS is available on your machine. Use CPU mode instead." + ) (stride_0, stride_1) = X.strides if stride_0 < 0 or stride_1 < 0: @@ -229,17 +233,19 @@ def initialize_centroids( ): n_cells = Z_norm.shape[0] - kmeans_params = {'n_clusters': n_clusters, - 'init': "k-means++", - 'n_init': n_init, - 'random_state': random_state, - 'max_iter': 25, - } + kmeans_params = { + "n_clusters": n_clusters, + "init": "k-means++", + "n_init": n_init, + "random_state": random_state, + "max_iter": 25, + } kmeans = KMeans(**kmeans_params) from threadpoolctl import threadpool_limits - with threadpool_limits(limits = n_jobs): + + with threadpool_limits(limits=n_jobs): if device_type == "cpu": kmeans.fit(Z_norm) else: @@ -249,9 +255,7 @@ def initialize_centroids( Y_norm = normalize(Y, p=2, dim=1) # Initialize R - R = torch.exp( - -2 / sigma * (1 - torch.matmul(Z_norm, Y_norm.t())) - ) + R = torch.exp(-2 / sigma * (1 - torch.matmul(Z_norm, Y_norm.t()))) R = normalize(R, p=1, dim=1) E = torch.matmul(Pr_b, torch.sum(R, dim=0, keepdim=True)) @@ -282,7 +286,6 @@ def clustering( device_type, n_init=10, ): - n_cells = Z_norm.shape[0] objectives_clustering = [] @@ -298,12 +301,8 @@ def clustering( pos = 0 while pos < len(idx_list): idx_in = idx_list[pos : (pos + block_size)] - R_in = R[ - idx_in, - ] - Phi_in = Phi[ - idx_in, - ] + R_in = R[idx_in,] + Phi_in = Phi[idx_in,] # Compute O and E on left out data. O -= torch.matmul(Phi_in.t(), R_in) @@ -347,14 +346,12 @@ def correction_original(X, R, Phi, ridge_lambda, device_type): Phi_1 = torch.cat((torch.ones(n_cells, 1, device=device_type), Phi), dim=1) Z = X.clone() - id_mat = torch.eye(n_batches + 1, n_batches + 1, device = device_type) + id_mat = torch.eye(n_batches + 1, n_batches + 1, device=device_type) id_mat[0, 0] = 0 Lambda = ridge_lambda * id_mat for k in range(n_clusters): Phi_t_diag_R = Phi_1.t() * R[:, k].view(1, -1) - inv_mat = torch.inverse( - torch.matmul(Phi_t_diag_R, Phi_1) + Lambda - ) + inv_mat = torch.inverse(torch.matmul(Phi_t_diag_R, Phi_1) + Lambda) W = torch.matmul(inv_mat, torch.matmul(Phi_t_diag_R, X)) W[0, :] = 0 Z -= torch.matmul(Phi_t_diag_R.t(), W) @@ -375,7 +372,7 @@ def correction_fast(X, R, Phi, O, ridge_lambda, device_type): N_k = torch.sum(O_k) factor = 1 / (O_k + ridge_lambda) - c = N_k + torch.sum(-factor * O_k ** 2) + c = N_k + torch.sum(-factor * O_k**2) c_inv = 1 / c P[0, 1:] = -factor * O_k @@ -401,7 +398,9 @@ def compute_objective( Y_norm, Z_norm, R, theta, sigma, O, E, objective_arr, device_type ): kmeans_error = torch.sum(R * 2 * (1 - torch.matmul(Z_norm, Y_norm.t()))) - entropy_term = sigma * torch.sum(-torch.distributions.Categorical(probs=R).entropy()) + entropy_term = sigma * torch.sum( + -torch.distributions.Categorical(probs=R).entropy() + ) diversity_penalty = sigma * torch.sum( torch.matmul(theta, O * torch.log(torch.div(O + 1, E + 1))) ) diff --git a/harmony/utils.py b/harmony/utils.py index 857e4ea..3c382a9 100644 --- a/harmony/utils.py +++ b/harmony/utils.py @@ -11,17 +11,19 @@ def get_batch_codes(batch_mat, batch_key): batch_vec = batch_mat[batch_key] else: - df = batch_mat[batch_key].astype('str') - batch_vec = df.apply(lambda row: ','.join(row), axis = 1) + df = batch_mat[batch_key].astype("str") + batch_vec = df.apply(lambda row: ",".join(row), axis=1) return batch_vec.astype("category") def one_hot_tensor(X, device_type): - ids = torch.as_tensor(X.cat.codes.values.copy(), dtype = torch.long, device = device_type).view(-1, 1) + ids = torch.as_tensor( + X.cat.codes.values.copy(), dtype=torch.long, device=device_type + ).view(-1, 1) n_row = X.size n_col = X.cat.categories.size - Phi = torch.zeros(n_row, n_col, dtype=torch.float, device = device_type) + Phi = torch.zeros(n_row, n_col, dtype=torch.float, device=device_type) Phi.scatter_(dim=1, index=ids, value=1.0) return Phi diff --git a/setup.py b/setup.py index 711a8cf..16997ee 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ url="https://github.com/lilab-bcb/harmony-pytorch", author="Yiming Yang, Bo Li", author_email="yyang43@mgh.harvard.edu, bli28@mgh.harvard.edu", - classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", diff --git a/test/test.py b/test/test.py index 008e593..e8c6ec4 100644 --- a/test/test.py +++ b/test/test.py @@ -13,7 +13,8 @@ from scipy.sparse import csr_matrix -metric_dict = {'r': 'Correlation', 'L2': 'L2 Error'} +metric_dict = {"r": "Correlation", "L2": "L2 Error"} + def check_metric(Z_torch, Z_py, Z_R, prefix, norm): assert Z_torch.shape == Z_py.shape and Z_py.shape == Z_R.shape @@ -23,22 +24,36 @@ def check_metric(Z_torch, Z_py, Z_R, prefix, norm): m = get_measure(Z_torch[:, i], Z_R[:, i], norm) metric_torch.append(m) - print("Mean {metric} by harmony-pytorch = {value:.4f}".format(metric = metric_dict[norm], value = np.mean(metric_torch))) - np.savetxt("./result/{prefix}_{metric}_torch.txt".format(prefix = prefix, metric = norm), metric_torch) + print( + "Mean {metric} by harmony-pytorch = {value:.4f}".format( + metric=metric_dict[norm], value=np.mean(metric_torch) + ) + ) + np.savetxt( + "./result/{prefix}_{metric}_torch.txt".format(prefix=prefix, metric=norm), + metric_torch, + ) metric_py = [] for i in range(Z_py.shape[1]): m = get_measure(Z_py[:, i], Z_R[:, i], norm) metric_py.append(m) - print("Mean {metric} by harmonypy = {value:.4f}".format(metric = metric_dict[norm], value = np.mean(metric_py))) - np.savetxt("./result/{prefix}_{metric}_py.txt".format(prefix = prefix, metric = norm), metric_py) + print( + "Mean {metric} by harmonypy = {value:.4f}".format( + metric=metric_dict[norm], value=np.mean(metric_py) + ) + ) + np.savetxt( + "./result/{prefix}_{metric}_py.txt".format(prefix=prefix, metric=norm), + metric_py, + ) def get_measure(x, base, norm): - assert norm in ['r', 'L2'] + assert norm in ["r", "L2"] - if norm == 'r': + if norm == "r": corr, _ = pearsonr(x, base) return corr else: @@ -47,40 +62,58 @@ def get_measure(x, base, norm): def plot_umap(adata, Z_torch, Z_py, Z_R, prefix, batch_key): if adata is not None: - adata.obsm['X_torch'] = Z_torch - adata.obsm['X_py'] = Z_py - adata.obsm['X_harmony'] = Z_R + adata.obsm["X_torch"] = Z_torch + adata.obsm["X_py"] = Z_py + adata.obsm["X_harmony"] = Z_R - pg.neighbors(adata, rep = 'torch') - pg.umap(adata, rep = 'torch', out_basis = 'umap_torch') + pg.neighbors(adata, rep="torch") + pg.umap(adata, rep="torch", out_basis="umap_torch") - pg.neighbors(adata, rep = 'py') - pg.umap(adata, rep = 'py', out_basis = 'umap_py') + pg.neighbors(adata, rep="py") + pg.umap(adata, rep="py", out_basis="umap_py") - pg.neighbors(adata, rep = 'harmony') - pg.umap(adata, rep = 'harmony', out_basis = 'umap_harmony') + pg.neighbors(adata, rep="harmony") + pg.umap(adata, rep="harmony", out_basis="umap_harmony") pg.write_output(adata, "./result/{}_result".format(prefix)) else: print("Use precalculated AnnData result.") - if os.system("pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.before.umap.pdf".format(name = prefix, attr = batch_key)): + if os.system( + "pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.before.umap.pdf".format( + name=prefix, attr=batch_key + ) + ): sys.exit(1) - if os.system("pegasus plot scatter --basis umap_torch --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.torch.umap.pdf".format(name = prefix, attr = batch_key)): + if os.system( + "pegasus plot scatter --basis umap_torch --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.torch.umap.pdf".format( + name=prefix, attr=batch_key + ) + ): sys.exit(1) - if os.system("pegasus plot scatter --basis umap_py --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.py.umap.pdf".format(name = prefix, attr = batch_key)): + if os.system( + "pegasus plot scatter --basis umap_py --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.py.umap.pdf".format( + name=prefix, attr=batch_key + ) + ): sys.exit(1) - if os.system("pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.harmony.umap.pdf".format(name = prefix, attr = batch_key)): + if os.system( + "pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.harmony.umap.pdf".format( + name=prefix, attr=batch_key + ) + ): sys.exit(1) def test_cell_lines(): print("Testing on cell lines dataset...") - z_files = [f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)] + z_files = [ + f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f) + ] if len(z_files) < 3 or not os.path.exists("./result/cell_lines_result.h5ad"): X = np.loadtxt("./data/cell_lines/pca.txt") df_metadata = pd.read_csv("./data/cell_lines/metadata.csv") @@ -91,10 +124,12 @@ def test_cell_lines(): print("Precalculated embedding by harmony-pytorch is loaded.") else: start_torch = time.time() - Z_torch = harmonize(X, df_metadata, batch_key = 'dataset') + Z_torch = harmonize(X, df_metadata, batch_key="dataset") end_torch = time.time() - print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch)) + print( + "Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch) + ) np.save("./result/cell_lines_torch_z.npy", Z_torch) if os.path.exists("./result/cell_lines_py_z.npy"): @@ -102,7 +137,7 @@ def test_cell_lines(): print("Precalculated embedding by harmonypy is loaded.") else: start_py = time.time() - ho = run_harmony(X, df_metadata, ['dataset']) + ho = run_harmony(X, df_metadata, ["dataset"]) end_py = time.time() print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py)) @@ -113,34 +148,46 @@ def test_cell_lines(): Z_R = np.loadtxt("./result/cell_lines_harmony_z.txt") - check_metric(Z_torch, Z_py, Z_R, prefix = "cell_lines", norm = 'r') - check_metric(Z_torch, Z_py, Z_R, prefix = "cell_lines", norm = 'L2') + check_metric(Z_torch, Z_py, Z_R, prefix="cell_lines", norm="r") + check_metric(Z_torch, Z_py, Z_R, prefix="cell_lines", norm="L2") if os.path.exists("./result/cell_lines_result.h5ad"): adata = None else: n_obs = X.shape[0] - adata = AnnData(X = csr_matrix((n_obs, 2)), obs = df_metadata) - adata.obsm['X_pca'] = X + adata = AnnData(X=csr_matrix((n_obs, 2)), obs=df_metadata) + adata.obsm["X_pca"] = X - pg.neighbors(adata, rep = 'pca') + pg.neighbors(adata, rep="pca") pg.umap(adata) umap_list = [f for f in os.listdir("./plots") if re.match("cell_lines.*.pdf", f)] if len(umap_list) < 4: - plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "cell_lines", batch_key = "dataset") + plot_umap(adata, Z_torch, Z_py, Z_R, prefix="cell_lines", batch_key="dataset") if os.path.exists("./result/cell_lines_result.h5ad"): - adata = pg.read_input("./result/cell_lines_result.h5ad", h5ad_mode = 'r') - - stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'harmony') - print("kBET for Harmony: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate)) - - stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'py') - print("kBET for harmonypy: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate)) - - stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'torch') - print("kBET for harmony-pytorch: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate)) + adata = pg.read_input("./result/cell_lines_result.h5ad", h5ad_mode="r") + + stat, pvalue, ac_rate = pg.calc_kBET(adata, attr="dataset", rep="harmony") + print( + "kBET for Harmony: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format( + stat=stat, pval=pvalue, ac_rate=ac_rate + ) + ) + + stat, pvalue, ac_rate = pg.calc_kBET(adata, attr="dataset", rep="py") + print( + "kBET for harmonypy: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format( + stat=stat, pval=pvalue, ac_rate=ac_rate + ) + ) + + stat, pvalue, ac_rate = pg.calc_kBET(adata, attr="dataset", rep="torch") + print( + "kBET for harmony-pytorch: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format( + stat=stat, pval=pvalue, ac_rate=ac_rate + ) + ) def test_pbmc(): @@ -155,10 +202,12 @@ def test_pbmc(): print("Precalculated embedding by harmony-pytorch is loaded.") else: start_torch = time.time() - Z_torch = harmonize(adata.obsm['X_pca'], adata.obs, batch_key = 'Channel') + Z_torch = harmonize(adata.obsm["X_pca"], adata.obs, batch_key="Channel") end_torch = time.time() - print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch)) + print( + "Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch) + ) np.save("./result/pbmc_torch_z.npy", Z_torch) if os.path.exists("./result/pbmc_py_z.npy"): @@ -166,7 +215,7 @@ def test_pbmc(): print("Precalculated embedding by harmonypy is loaded.") else: start_py = time.time() - ho = run_harmony(adata.obsm['X_pca'], adata.obs, ['Channel']) + ho = run_harmony(adata.obsm["X_pca"], adata.obs, ["Channel"]) end_py = time.time() print(ho.objective_harmony) @@ -177,34 +226,40 @@ def test_pbmc(): Z_R = np.loadtxt("./result/pbmc_harmony_z.txt") - check_metric(Z_torch, Z_py, Z_R, prefix = "pbmc", norm = 'r') - check_metric(Z_torch, Z_py, Z_R, prefix = "pbmc", norm = 'L2') + check_metric(Z_torch, Z_py, Z_R, prefix="pbmc", norm="r") + check_metric(Z_torch, Z_py, Z_R, prefix="pbmc", norm="L2") if os.path.exists("./result/pbmc_result.h5ad"): adata = None umap_list = [f for f in os.listdir("./plots") if re.match("pbmc.*.pdf", f)] if len(umap_list) < 4: - plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "pbmc", batch_key = "Channel") + plot_umap(adata, Z_torch, Z_py, Z_R, prefix="pbmc", batch_key="Channel") def test_mantonbm(): print("Testing on MantonBM dataset...") - z_files = [f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)] + z_files = [ + f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f) + ] if len(z_files) < 3 or not os.path.exists("./result/MantonBM_result.h5ad"): adata = pg.read_input("./data/MantonBM/original_data.h5ad") - adata.obs['Individual'] = pd.Categorical(adata.obs['Channel'].apply(lambda s: s.split('_')[0][-1])) + adata.obs["Individual"] = pd.Categorical( + adata.obs["Channel"].apply(lambda s: s.split("_")[0][-1]) + ) if os.path.exists("./result/MantonBM_torch_z.npy"): Z_torch = np.load("./result/MantonBM_torch_z.npy") print("Precalculated embedding by harmony-pytorch is loaded.") else: start_torch = time.time() - Z_torch = harmonize(adata.obsm['X_pca'], adata.obs, batch_key = 'Channel') + Z_torch = harmonize(adata.obsm["X_pca"], adata.obs, batch_key="Channel") end_torch = time.time() - print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch)) + print( + "Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch) + ) np.save("./result/MantonBM_torch_z.npy", Z_torch) if os.path.exists("./result/MantonBM_py_z.npy"): @@ -212,7 +267,7 @@ def test_mantonbm(): print("Precalculated embedding by harmonypy is loaded.") else: start_py = time.time() - ho = run_harmony(adata.obsm['X_pca'], adata.obs, ['Channel']) + ho = run_harmony(adata.obsm["X_pca"], adata.obs, ["Channel"]) end_py = time.time() print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py)) @@ -220,67 +275,112 @@ def test_mantonbm(): Z_py = np.transpose(ho.Z_corr) np.save("./result/MantonBM_py_z.npy", Z_py) - Z_R = np.loadtxt("./result/MantonBM_harmony_z.txt") - check_metric(Z_torch, Z_py, Z_R, prefix = "MantonBM", norm = 'r') - check_metric(Z_torch, Z_py, Z_R, prefix = "MantonBM", norm = 'L2') + check_metric(Z_torch, Z_py, Z_R, prefix="MantonBM", norm="r") + check_metric(Z_torch, Z_py, Z_R, prefix="MantonBM", norm="L2") if os.path.exists("./result/MantonBM_result.h5ad"): adata = None umap_list = [f for f in os.listdir("./plots") if re.match("MantonBM.*.pdf", f)] if len(umap_list) < 4: - plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "MantonBM", batch_key = "Individual") + plot_umap(adata, Z_torch, Z_py, Z_R, prefix="MantonBM", batch_key="Individual") def gen_plot(norm): - # Cell Lines metric_celllines_torch = np.loadtxt("./result/cell_lines_{}_torch.txt".format(norm)) metric_celllines_py = np.loadtxt("./result/cell_lines_{}_py.txt".format(norm)) - df1 = pd.DataFrame({'dataset' : np.repeat(['Cell Lines'], metric_celllines_torch.size + metric_celllines_py.size), - 'package' : np.concatenate((np.repeat(['Torch'], metric_celllines_torch.size), - np.repeat(['Py'], metric_celllines_py.size)), axis = 0), - 'metric' : np.concatenate((metric_celllines_torch, metric_celllines_py), axis = 0)}) + df1 = pd.DataFrame( + { + "dataset": np.repeat( + ["Cell Lines"], metric_celllines_torch.size + metric_celllines_py.size + ), + "package": np.concatenate( + ( + np.repeat(["Torch"], metric_celllines_torch.size), + np.repeat(["Py"], metric_celllines_py.size), + ), + axis=0, + ), + "metric": np.concatenate( + (metric_celllines_torch, metric_celllines_py), axis=0 + ), + } + ) # PBMC metric_pbmc_torch = np.loadtxt("./result/pbmc_{}_torch.txt".format(norm)) metric_pbmc_py = np.loadtxt("./result/pbmc_{}_py.txt".format(norm)) - df2 = pd.DataFrame({'dataset' : np.repeat(['10x PBMC'], metric_pbmc_torch.size + metric_pbmc_py.size), - 'package' : np.concatenate((np.repeat(['Torch'], metric_pbmc_torch.size), - np.repeat(['Py'], metric_pbmc_py.size)), axis = 0), - 'metric' : np.concatenate((metric_pbmc_torch, metric_pbmc_py), axis = 0)}) + df2 = pd.DataFrame( + { + "dataset": np.repeat( + ["10x PBMC"], metric_pbmc_torch.size + metric_pbmc_py.size + ), + "package": np.concatenate( + ( + np.repeat(["Torch"], metric_pbmc_torch.size), + np.repeat(["Py"], metric_pbmc_py.size), + ), + axis=0, + ), + "metric": np.concatenate((metric_pbmc_torch, metric_pbmc_py), axis=0), + } + ) # MantonBM metric_mantonbm_torch = np.loadtxt("./result/MantonBM_{}_torch.txt".format(norm)) metric_mantonbm_py = np.loadtxt("./result/MantonBM_{}_py.txt".format(norm)) - df3 = pd.DataFrame({'dataset' : np.repeat(['Bone Marrow'], metric_mantonbm_torch.size + metric_mantonbm_py.size), - 'package' : np.concatenate((np.repeat(['Torch'], metric_mantonbm_torch.size), - np.repeat(['Py'], metric_mantonbm_py.size)), axis = 0), - 'metric' : np.concatenate((metric_mantonbm_torch, metric_mantonbm_py), axis = 0)}) + df3 = pd.DataFrame( + { + "dataset": np.repeat( + ["Bone Marrow"], metric_mantonbm_torch.size + metric_mantonbm_py.size + ), + "package": np.concatenate( + ( + np.repeat(["Torch"], metric_mantonbm_torch.size), + np.repeat(["Py"], metric_mantonbm_py.size), + ), + axis=0, + ), + "metric": np.concatenate( + (metric_mantonbm_torch, metric_mantonbm_py), axis=0 + ), + } + ) df = pd.concat([df1, df2, df3]) # Plot - ax = sns.violinplot(x = "dataset", y = "metric", hue = "package", data = df, palette = "muted", split = True, cut = 0) - ax.set_title("{} between Harmonypy and Harmony-pytorch Integration".format(metric_dict[norm])) - ax.set(xlabel = 'Dataset', ylabel = "{} on PCs".format(metric_dict[norm])) - if norm == 'r': - ax.set(ylim = (0.98, 1.001)) + ax = sns.violinplot( + x="dataset", + y="metric", + hue="package", + data=df, + palette="muted", + split=True, + cut=0, + ) + ax.set_title( + "{} between Harmonypy and Harmony-pytorch Integration".format(metric_dict[norm]) + ) + ax.set(xlabel="Dataset", ylabel="{} on PCs".format(metric_dict[norm])) + if norm == "r": + ax.set(ylim=(0.98, 1.001)) else: - ax.set(ylim = (0, 0.1)) + ax.set(ylim=(0, 0.1)) figure = ax.get_figure() - legend_loc = 'lower right' if norm == 'r' else 'upper right' - figure.get_axes()[0].legend(title = "Package", loc = legend_loc) - figure.savefig("./plots/{}_stats.png".format(norm), dpi = 400) + legend_loc = "lower right" if norm == "r" else "upper right" + figure.get_axes()[0].legend(title="Package", loc=legend_loc) + figure.savefig("./plots/{}_stats.png".format(norm), dpi=400) plt.close() -if __name__ == '__main__': +if __name__ == "__main__": dataset = sys.argv[1] assert dataset in ["cell_lines", "pbmc", "MantonBM", "plot"] @@ -293,12 +393,12 @@ def gen_plot(norm): if os.system("mkdir ./plots"): sys.exit(1) - if dataset == 'cell_lines': + if dataset == "cell_lines": test_cell_lines() - elif dataset == 'pbmc': + elif dataset == "pbmc": test_pbmc() - elif dataset == 'MantonBM': + elif dataset == "MantonBM": test_mantonbm() else: - gen_plot('r') - gen_plot('L2') + gen_plot("r") + gen_plot("L2") diff --git a/test/test_gpu.py b/test/test_gpu.py index 9a0ffdd..751cd5a 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -22,47 +22,69 @@ def check_metrics(Z, base, prefix): err = np.linalg.norm(Z[:, i] - base[:, i]) / np.linalg.norm(base[:, i]) errors.append(err) - print("For {name}, mean r = {cor:.4f}, mean L2 error = {err:.4f}.".format(name = prefix, cor = np.mean(cors), err = np.mean(errors))) + print( + "For {name}, mean r = {cor:.4f}, mean L2 error = {err:.4f}.".format( + name=prefix, cor=np.mean(cors), err=np.mean(errors) + ) + ) np.savetxt("./result/{}_r.txt".format(prefix), cors) np.savetxt("./result/{}_L2.txt".format(prefix), errors) def plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix, batch_key): if adata is not None: - adata.obsm['X_cpu'] = Z_cpu - adata.obsm['X_gpu'] = Z_gpu - adata.obsm['X_harmony'] = Z_R + adata.obsm["X_cpu"] = Z_cpu + adata.obsm["X_gpu"] = Z_gpu + adata.obsm["X_harmony"] = Z_R - pg.neighbors(adata, rep = 'cpu') - pg.umap(adata, rep = 'cpu', out_basis = 'umap_cpu') + pg.neighbors(adata, rep="cpu") + pg.umap(adata, rep="cpu", out_basis="umap_cpu") - pg.neighbors(adata, rep = 'gpu') - pg.umap(adata, rep = 'gpu', out_basis = 'umap_gpu') + pg.neighbors(adata, rep="gpu") + pg.umap(adata, rep="gpu", out_basis="umap_gpu") - pg.neighbors(adata, rep = 'harmony') - pg.umap(adata, rep = 'harmony', out_basis = 'umap_harmony') + pg.neighbors(adata, rep="harmony") + pg.umap(adata, rep="harmony", out_basis="umap_harmony") pg.write_output(adata, "./result/{}_result".format(prefix)) else: print("Use precalculated AnnData result.") - if os.system("pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.before.umap.pdf".format(attr = batch_key, prefix = prefix)): + if os.system( + "pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.before.umap.pdf".format( + attr=batch_key, prefix=prefix + ) + ): sys.exit(1) - if os.system("pegasus plot scatter --basis umap_cpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.cpu.umap.pdf".format(attr = batch_key, prefix = prefix)): + if os.system( + "pegasus plot scatter --basis umap_cpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.cpu.umap.pdf".format( + attr=batch_key, prefix=prefix + ) + ): sys.exit(1) - if os.system("pegasus plot scatter --basis umap_gpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.gpu.umap.pdf".format(attr = batch_key, prefix = prefix)): + if os.system( + "pegasus plot scatter --basis umap_gpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.gpu.umap.pdf".format( + attr=batch_key, prefix=prefix + ) + ): sys.exit(1) - if os.system("pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.harmony.umap.pdf".format(attr = batch_key, prefix = prefix)): + if os.system( + "pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.harmony.umap.pdf".format( + attr=batch_key, prefix=prefix + ) + ): sys.exit(1) def test_cell_lines(): print("Testing on Cell Lines...") - z_files = [f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)] + z_files = [ + f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f) + ] if len(z_files) < 3 or not os.path.exists("./result/cell_lines_result.h5ad"): X = np.loadtxt("./data/cell_lines/pca.txt") df_metadata = pd.read_csv("./data/cell_lines/metadata.csv") @@ -72,7 +94,7 @@ def test_cell_lines(): print("Precalculated CPU mode result is loaded.") else: start_cpu = time.time() - Z_cpu = harmonize(X, df_metadata, 'dataset') + Z_cpu = harmonize(X, df_metadata, "dataset") end_cpu = time.time() print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu)) @@ -83,7 +105,7 @@ def test_cell_lines(): print("Precalculated GPU mode result is loaded.") else: start_gpu = time.time() - Z_gpu = harmonize(X, df_metadata, 'dataset', use_gpu = True) + Z_gpu = harmonize(X, df_metadata, "dataset", use_gpu=True) end_gpu = time.time() print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu)) @@ -91,37 +113,37 @@ def test_cell_lines(): Z_R = np.loadtxt("./result/cell_lines_harmony_z.txt") - check_metrics(Z_cpu, Z_R, prefix = "cell_lines_cpu") - check_metrics(Z_gpu, Z_R, prefix = "cell_lines_gpu") + check_metrics(Z_cpu, Z_R, prefix="cell_lines_cpu") + check_metrics(Z_gpu, Z_R, prefix="cell_lines_gpu") if os.path.exists("./result/cell_lines_result.h5ad"): adata = None else: n_obs = X.shape[0] - adata = AnnData(X = csr_matrix((n_obs, 2)), obs = df_metadata) - adata.obsm['X_pca'] = X + adata = AnnData(X=csr_matrix((n_obs, 2)), obs=df_metadata) + adata.obsm["X_pca"] = X - pg.neighbors(adata, rep = 'pca') + pg.neighbors(adata, rep="pca") pg.umap(adata) umap_list = [f for f in os.listdir("./plots") if re.match("cell_lines.*.pdf")] if len(umap_list) < 4: - plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "cell_lines", batch_key = 'dataset') + plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix="cell_lines", batch_key="dataset") def test_pbmc(): print("Testing on 10x PBMC...") z_files = [f for f in os.listdir("./result") if re.match("pbmc.*_z.(txt|npy)", f)] - if len(z_files) < 3 - adata = pg.read_input("./data/10x_pbmc/original_data.h5ad") + if len(z_files) < 3: + adata = pg.read_input("./data/10x_pbmc/original_data.h5ad") if os.path.exists("./result/pbmc_cpu_z.npy"): Z_cpu = np.load("./result/pbmc_cpu_z.npy") print("Precalculated CPU mode result is loaded.") else: start_cpu = time.time() - Z_cpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel') + Z_cpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel") end_cpu = time.time() print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu)) @@ -132,7 +154,7 @@ def test_pbmc(): print("Precalculated GPU mode result is loaded.") else: start_gpu = time.time() - Z_gpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel', use_gpu = True) + Z_gpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel", use_gpu=True) end_gpu = time.time() print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu)) @@ -140,31 +162,35 @@ def test_pbmc(): Z_R = np.loadtxt("./result/pbmc_harmony_z.txt") - check_metrics(Z_cpu, Z_R, prefix = "pbmc_cpu") - check_metrics(Z_gpu, Z_R, prefix = "pbmc_gpu") + check_metrics(Z_cpu, Z_R, prefix="pbmc_cpu") + check_metrics(Z_gpu, Z_R, prefix="pbmc_gpu") if os.path.exists("./result/pbmc_result.h5ad"): adata = None umap_list = [f for f in os.listdir("./plots") if re.match("pbmc.*.pdf", f)] if len(umap_list) < 4: - plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "pbmc", batch_key = 'Channel') + plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix="pbmc", batch_key="Channel") def test_mantonbm(): print("Testing on MantonBM...") - z_files = [f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)] + z_files = [ + f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f) + ] if len(z_files) < 3: adata = pg.read_input("./data/MantonBM/original_data.h5ad") - adata.obs['Individual'] = pd.Categorical(adata.obs['Channel'].apply(lambda s: s.split('_')[0][-1])) + adata.obs["Individual"] = pd.Categorical( + adata.obs["Channel"].apply(lambda s: s.split("_")[0][-1]) + ) if os.path.exists("./result/MantonBM_cpu_z.npy"): Z_cpu = np.load("./result/MantonBM_cpu_z.npy") print("Precalculated CPU mode result is loaded.") else: start_cpu = time.time() - Z_cpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel') + Z_cpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel") end_cpu = time.time() print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu)) @@ -175,7 +201,7 @@ def test_mantonbm(): print("Precalculated GPU mode result is loaded.") else: start_gpu = time.time() - Z_gpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel', use_gpu = True) + Z_gpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel", use_gpu=True) end_gpu = time.time() print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu)) @@ -183,24 +209,24 @@ def test_mantonbm(): Z_R = np.loadtxt("./result/MantonBM_harmony_z.txt") - check_metrics(Z_cpu, Z_R, prefix = "MantonBM_cpu") - check_metrics(Z_gpu, Z_R, prefix = "MantonBM_gpu") + check_metrics(Z_cpu, Z_R, prefix="MantonBM_cpu") + check_metrics(Z_gpu, Z_R, prefix="MantonBM_gpu") if os.path.exists("./result/MantonBM_result.h5ad"): adata = None umap_list = [f for f in os.listdir("./plots") if re.match("MantonBM.*.pdf", f)] if len(umap_list) < 4: - plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "MantonBM", batch_key = 'Individual') + plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix="MantonBM", batch_key="Individual") -if __name__ == '__main__': +if __name__ == "__main__": dataset = sys.argv[1] - assert dataset in ['cell_lines', 'pbmc', 'MantonBM'] - if dataset == 'cell_lines': + assert dataset in ["cell_lines", "pbmc", "MantonBM"] + if dataset == "cell_lines": test_cell_lines() - elif dataset == 'pbmc': + elif dataset == "pbmc": test_pbmc() else: - test_mantonbm() \ No newline at end of file + test_mantonbm() From b281cad3e609a03ec2ece68dcf0c1b74c258ea78 Mon Sep 17 00:00:00 2001 From: mbruhns Date: Sun, 10 Dec 2023 16:19:15 +0100 Subject: [PATCH 4/6] Update requirements to be able to use MPS backend. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 16997ee..a1e90d2 100644 --- a/setup.py +++ b/setup.py @@ -7,13 +7,13 @@ long_description = f.read() requires = [ - "torch", + "torch>=1.12", "numpy", "pandas", "psutil", "threadpoolctl", "scikit-learn>=0.23", - "importlib_metadata>=0.7; python_version < '3.8'", + "importlib_metadata>=0.7;", ] setup( From 91ffe50ece4d9b17a2cba48b52dba38264c5cc3f Mon Sep 17 00:00:00 2001 From: mbruhns Date: Sun, 10 Dec 2023 16:22:36 +0100 Subject: [PATCH 5/6] Fix typo in requirements. --- harmony/version.py | 16 ++++++++++++++++ setup.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 harmony/version.py diff --git a/harmony/version.py b/harmony/version.py new file mode 100644 index 0000000..47cd6ee --- /dev/null +++ b/harmony/version.py @@ -0,0 +1,16 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '0.1.dev101+gb281cad.d20231210' +__version_tuple__ = version_tuple = (0, 1, 'dev101', 'gb281cad.d20231210') diff --git a/setup.py b/setup.py index a1e90d2..06ac7ff 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ "psutil", "threadpoolctl", "scikit-learn>=0.23", - "importlib_metadata>=0.7;", + "importlib_metadata>=0.7", ] setup( From cb3065b22470e10ac85d28cf493564c69fb4533c Mon Sep 17 00:00:00 2001 From: mbruhns Date: Mon, 11 Dec 2023 09:16:59 +0100 Subject: [PATCH 6/6] Remove version.py, remove importlib_metadata from setup, remove whitespace in classifiers in setup. --- harmony/version.py | 16 ---------------- setup.py | 5 ++--- 2 files changed, 2 insertions(+), 19 deletions(-) delete mode 100644 harmony/version.py diff --git a/harmony/version.py b/harmony/version.py deleted file mode 100644 index 47cd6ee..0000000 --- a/harmony/version.py +++ /dev/null @@ -1,16 +0,0 @@ -# file generated by setuptools_scm -# don't change, don't track in version control -TYPE_CHECKING = False -if TYPE_CHECKING: - from typing import Tuple, Union - VERSION_TUPLE = Tuple[Union[int, str], ...] -else: - VERSION_TUPLE = object - -version: str -__version__: str -__version_tuple__: VERSION_TUPLE -version_tuple: VERSION_TUPLE - -__version__ = version = '0.1.dev101+gb281cad.d20231210' -__version_tuple__ = version_tuple = (0, 1, 'dev101', 'gb281cad.d20231210') diff --git a/setup.py b/setup.py index 06ac7ff..c36c44b 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,7 @@ "pandas", "psutil", "threadpoolctl", - "scikit-learn>=0.23", - "importlib_metadata>=0.7", + "scikit-learn>=0.23" ] setup( @@ -24,7 +23,7 @@ url="https://github.com/lilab-bcb/harmony-pytorch", author="Yiming Yang, Bo Li", author_email="yyang43@mgh.harvard.edu, bli28@mgh.harvard.edu", - classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research",