From 4e43d28d5fb80b748e9c2435c5209c55e51c8188 Mon Sep 17 00:00:00 2001
From: mbruhns <matthias.bruhns@uni-tuebingen.de>
Date: Sun, 10 Dec 2023 16:07:01 +0100
Subject: [PATCH 1/6] Update.

---
 .gitignore | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 148 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 03bc42c..19c423a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,148 @@
-__pycache__
-*.pdf
-build
-dist
-*.egg-info
-.eggs
\ No newline at end of file
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Sublime workspace
+*.sublime-workspace
+.DS_Store
+
+#Custom folders
+results/
+figures/
+
+*.sublime-workspace
+*.sublime-project
+
+# Jupyter notebooks
+*.ipynb
+
+.idea/
+
+*.h5ad
+

From d6604d8c79206c04cfb9f28a5e7f9a13f96f2346 Mon Sep 17 00:00:00 2001
From: mbruhns <matthias.bruhns@uni-tuebingen.de>
Date: Sun, 10 Dec 2023 16:11:31 +0100
Subject: [PATCH 2/6] Basic reformatting.

---
 harmony/harmony.py | 18 +++++++++---------
 harmony/utils.py   |  8 +++++---
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/harmony/harmony.py b/harmony/harmony.py
index 77cc1f3..9248d35 100644
--- a/harmony/harmony.py
+++ b/harmony/harmony.py
@@ -120,9 +120,12 @@ def harmonize(
             device_type = "cuda"
             if verbose:
                 print("Use GPU mode.")
-        else:
+        elif torch.backends.mps.is_available():
+            device_type = "mps"
             if verbose:
-                print("CUDA is not available on your machine. Use CPU mode instead.")
+                print("Use Metal (MPS) mode.")
+        elif verbose:
+            print("Neither CUDA nor MPS is available on your machine. Use CPU mode instead.")
 
     (stride_0, stride_1) = X.strides
     if stride_0 < 0 or stride_1 < 0:
@@ -156,7 +159,7 @@ def harmonize(
     theta = theta.view(1, -1)
 
     assert block_proportion > 0 and block_proportion <= 1
-    assert correction_method in ["fast", "original"]
+    assert correction_method in {"fast", "original"}
 
     np.random.seed(random_state)
 
@@ -206,13 +209,10 @@ def harmonize(
 
         if is_convergent_harmony(objectives_harmony, tol=tol_harmony):
             if verbose:
-                print("Reach convergence after {} iteration(s).".format(i + 1))
+                print(f"Reach convergence after {i + 1} iteration(s).")
             break
 
-    if device_type == "cpu":
-        return Z_hat.numpy()
-    else:
-        return Z_hat.cpu().numpy()
+    return Z_hat.numpy() if device_type == "cpu" else Z_hat.cpu().numpy()
 
 
 def initialize_centroids(
@@ -287,7 +287,7 @@ def clustering(
 
     objectives_clustering = []
 
-    for i in range(max_iter):
+    for _ in range(max_iter):
         # Compute Cluster Centroids
         Y = torch.matmul(R.t(), Z_norm)
         Y_norm = normalize(Y, p=2, dim=1)
diff --git a/harmony/utils.py b/harmony/utils.py
index b132138..857e4ea 100644
--- a/harmony/utils.py
+++ b/harmony/utils.py
@@ -2,9 +2,11 @@
 
 
 def get_batch_codes(batch_mat, batch_key):
-    if type(batch_key) is str or len(batch_key) == 1:
-        if not type(batch_key) is str:
-            batch_key = batch_key[0]
+    if type(batch_key) is str:
+        batch_vec = batch_mat[batch_key]
+
+    elif len(batch_key) == 1:
+        batch_key = batch_key[0]
 
         batch_vec = batch_mat[batch_key]
 

From ae8cd474ec7a71dba485b91d16dc32dfd8bff619 Mon Sep 17 00:00:00 2001
From: mbruhns <matthias.bruhns@uni-tuebingen.de>
Date: Sun, 10 Dec 2023 16:14:48 +0100
Subject: [PATCH 3/6] Run Ruff.

---
 harmony/__init__.py |   2 +-
 harmony/harmony.py  |  55 +++++----
 harmony/utils.py    |  10 +-
 setup.py            |   2 +-
 test/test.py        | 266 ++++++++++++++++++++++++++++++--------------
 test/test_gpu.py    | 110 +++++++++++-------
 6 files changed, 286 insertions(+), 159 deletions(-)

diff --git a/harmony/__init__.py b/harmony/__init__.py
index dd0606b..6663ba6 100644
--- a/harmony/__init__.py
+++ b/harmony/__init__.py
@@ -6,7 +6,7 @@
     from importlib_metadata import version, PackageNotFoundError
 
 try:
-    __version__ = version('harmony-pytorch')
+    __version__ = version("harmony-pytorch")
     del version
 except PackageNotFoundError:
     pass
diff --git a/harmony/harmony.py b/harmony/harmony.py
index 9248d35..8a2f7c9 100644
--- a/harmony/harmony.py
+++ b/harmony/harmony.py
@@ -10,7 +10,6 @@
 from .utils import one_hot_tensor, get_batch_codes
 
 
-
 def harmonize(
     X: np.array,
     batch_mat: pd.DataFrame,
@@ -105,13 +104,16 @@ def harmonize(
     >>> X_harmony = harmonize(adata.obsm['X_pca'], adata.obs, ['Channel', 'Lab'])
     """
 
-    assert(isinstance(X, np.ndarray))
+    assert isinstance(X, np.ndarray)
 
     if n_jobs < 0:
         import psutil
-        n_jobs = psutil.cpu_count(logical=False) # get physical cores
+
+        n_jobs = psutil.cpu_count(logical=False)  # get physical cores
         if n_jobs is None:
-            n_jobs = psutil.cpu_count(logical=True) # if undetermined, use logical cores instead
+            n_jobs = psutil.cpu_count(
+                logical=True
+            )  # if undetermined, use logical cores instead
     torch.set_num_threads(n_jobs)
 
     device_type = "cpu"
@@ -125,7 +127,9 @@ def harmonize(
             if verbose:
                 print("Use Metal (MPS) mode.")
         elif verbose:
-            print("Neither CUDA nor MPS is available on your machine. Use CPU mode instead.")
+            print(
+                "Neither CUDA nor MPS is available on your machine. Use CPU mode instead."
+            )
 
     (stride_0, stride_1) = X.strides
     if stride_0 < 0 or stride_1 < 0:
@@ -229,17 +233,19 @@ def initialize_centroids(
 ):
     n_cells = Z_norm.shape[0]
 
-    kmeans_params = {'n_clusters': n_clusters,
-                     'init': "k-means++",
-                     'n_init': n_init,
-                     'random_state': random_state,
-                     'max_iter': 25,
-                    }
+    kmeans_params = {
+        "n_clusters": n_clusters,
+        "init": "k-means++",
+        "n_init": n_init,
+        "random_state": random_state,
+        "max_iter": 25,
+    }
 
     kmeans = KMeans(**kmeans_params)
 
     from threadpoolctl import threadpool_limits
-    with threadpool_limits(limits = n_jobs):
+
+    with threadpool_limits(limits=n_jobs):
         if device_type == "cpu":
             kmeans.fit(Z_norm)
         else:
@@ -249,9 +255,7 @@ def initialize_centroids(
     Y_norm = normalize(Y, p=2, dim=1)
 
     # Initialize R
-    R = torch.exp(
-        -2 / sigma * (1 - torch.matmul(Z_norm, Y_norm.t()))
-    )
+    R = torch.exp(-2 / sigma * (1 - torch.matmul(Z_norm, Y_norm.t())))
     R = normalize(R, p=1, dim=1)
 
     E = torch.matmul(Pr_b, torch.sum(R, dim=0, keepdim=True))
@@ -282,7 +286,6 @@ def clustering(
     device_type,
     n_init=10,
 ):
-
     n_cells = Z_norm.shape[0]
 
     objectives_clustering = []
@@ -298,12 +301,8 @@ def clustering(
         pos = 0
         while pos < len(idx_list):
             idx_in = idx_list[pos : (pos + block_size)]
-            R_in = R[
-                idx_in,
-            ]
-            Phi_in = Phi[
-                idx_in,
-            ]
+            R_in = R[idx_in,]
+            Phi_in = Phi[idx_in,]
 
             # Compute O and E on left out data.
             O -= torch.matmul(Phi_in.t(), R_in)
@@ -347,14 +346,12 @@ def correction_original(X, R, Phi, ridge_lambda, device_type):
     Phi_1 = torch.cat((torch.ones(n_cells, 1, device=device_type), Phi), dim=1)
 
     Z = X.clone()
-    id_mat = torch.eye(n_batches + 1, n_batches + 1, device = device_type)
+    id_mat = torch.eye(n_batches + 1, n_batches + 1, device=device_type)
     id_mat[0, 0] = 0
     Lambda = ridge_lambda * id_mat
     for k in range(n_clusters):
         Phi_t_diag_R = Phi_1.t() * R[:, k].view(1, -1)
-        inv_mat = torch.inverse(
-            torch.matmul(Phi_t_diag_R, Phi_1) + Lambda
-        )
+        inv_mat = torch.inverse(torch.matmul(Phi_t_diag_R, Phi_1) + Lambda)
         W = torch.matmul(inv_mat, torch.matmul(Phi_t_diag_R, X))
         W[0, :] = 0
         Z -= torch.matmul(Phi_t_diag_R.t(), W)
@@ -375,7 +372,7 @@ def correction_fast(X, R, Phi, O, ridge_lambda, device_type):
         N_k = torch.sum(O_k)
 
         factor = 1 / (O_k + ridge_lambda)
-        c = N_k + torch.sum(-factor * O_k ** 2)
+        c = N_k + torch.sum(-factor * O_k**2)
         c_inv = 1 / c
 
         P[0, 1:] = -factor * O_k
@@ -401,7 +398,9 @@ def compute_objective(
     Y_norm, Z_norm, R, theta, sigma, O, E, objective_arr, device_type
 ):
     kmeans_error = torch.sum(R * 2 * (1 - torch.matmul(Z_norm, Y_norm.t())))
-    entropy_term = sigma * torch.sum(-torch.distributions.Categorical(probs=R).entropy())
+    entropy_term = sigma * torch.sum(
+        -torch.distributions.Categorical(probs=R).entropy()
+    )
     diversity_penalty = sigma * torch.sum(
         torch.matmul(theta, O * torch.log(torch.div(O + 1, E + 1)))
     )
diff --git a/harmony/utils.py b/harmony/utils.py
index 857e4ea..3c382a9 100644
--- a/harmony/utils.py
+++ b/harmony/utils.py
@@ -11,17 +11,19 @@ def get_batch_codes(batch_mat, batch_key):
         batch_vec = batch_mat[batch_key]
 
     else:
-        df = batch_mat[batch_key].astype('str')
-        batch_vec = df.apply(lambda row: ','.join(row), axis = 1)
+        df = batch_mat[batch_key].astype("str")
+        batch_vec = df.apply(lambda row: ",".join(row), axis=1)
 
     return batch_vec.astype("category")
 
 
 def one_hot_tensor(X, device_type):
-    ids = torch.as_tensor(X.cat.codes.values.copy(), dtype = torch.long, device = device_type).view(-1, 1)
+    ids = torch.as_tensor(
+        X.cat.codes.values.copy(), dtype=torch.long, device=device_type
+    ).view(-1, 1)
     n_row = X.size
     n_col = X.cat.categories.size
-    Phi = torch.zeros(n_row, n_col, dtype=torch.float, device = device_type)
+    Phi = torch.zeros(n_row, n_col, dtype=torch.float, device=device_type)
     Phi.scatter_(dim=1, index=ids, value=1.0)
 
     return Phi
diff --git a/setup.py b/setup.py
index 711a8cf..16997ee 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
     url="https://github.com/lilab-bcb/harmony-pytorch",
     author="Yiming Yang, Bo Li",
     author_email="yyang43@mgh.harvard.edu, bli28@mgh.harvard.edu",
-    classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers
+    classifiers=[  # https://pypi.python.org/pypi?%3Aaction=list_classifiers
         "Development Status :: 3 - Alpha",
         "Intended Audience :: Developers",
         "Intended Audience :: Science/Research",
diff --git a/test/test.py b/test/test.py
index 008e593..e8c6ec4 100644
--- a/test/test.py
+++ b/test/test.py
@@ -13,7 +13,8 @@
 from scipy.sparse import csr_matrix
 
 
-metric_dict = {'r': 'Correlation', 'L2': 'L2 Error'}
+metric_dict = {"r": "Correlation", "L2": "L2 Error"}
+
 
 def check_metric(Z_torch, Z_py, Z_R, prefix, norm):
     assert Z_torch.shape == Z_py.shape and Z_py.shape == Z_R.shape
@@ -23,22 +24,36 @@ def check_metric(Z_torch, Z_py, Z_R, prefix, norm):
         m = get_measure(Z_torch[:, i], Z_R[:, i], norm)
         metric_torch.append(m)
 
-    print("Mean {metric} by harmony-pytorch = {value:.4f}".format(metric = metric_dict[norm], value = np.mean(metric_torch)))
-    np.savetxt("./result/{prefix}_{metric}_torch.txt".format(prefix = prefix, metric = norm), metric_torch)
+    print(
+        "Mean {metric} by harmony-pytorch = {value:.4f}".format(
+            metric=metric_dict[norm], value=np.mean(metric_torch)
+        )
+    )
+    np.savetxt(
+        "./result/{prefix}_{metric}_torch.txt".format(prefix=prefix, metric=norm),
+        metric_torch,
+    )
 
     metric_py = []
     for i in range(Z_py.shape[1]):
         m = get_measure(Z_py[:, i], Z_R[:, i], norm)
         metric_py.append(m)
 
-    print("Mean {metric} by harmonypy = {value:.4f}".format(metric = metric_dict[norm], value = np.mean(metric_py)))
-    np.savetxt("./result/{prefix}_{metric}_py.txt".format(prefix = prefix, metric = norm), metric_py)
+    print(
+        "Mean {metric} by harmonypy = {value:.4f}".format(
+            metric=metric_dict[norm], value=np.mean(metric_py)
+        )
+    )
+    np.savetxt(
+        "./result/{prefix}_{metric}_py.txt".format(prefix=prefix, metric=norm),
+        metric_py,
+    )
 
 
 def get_measure(x, base, norm):
-    assert norm in ['r', 'L2']
+    assert norm in ["r", "L2"]
 
-    if norm == 'r':
+    if norm == "r":
         corr, _ = pearsonr(x, base)
         return corr
     else:
@@ -47,40 +62,58 @@ def get_measure(x, base, norm):
 
 def plot_umap(adata, Z_torch, Z_py, Z_R, prefix, batch_key):
     if adata is not None:
-        adata.obsm['X_torch'] = Z_torch
-        adata.obsm['X_py'] = Z_py
-        adata.obsm['X_harmony'] = Z_R
+        adata.obsm["X_torch"] = Z_torch
+        adata.obsm["X_py"] = Z_py
+        adata.obsm["X_harmony"] = Z_R
 
-        pg.neighbors(adata, rep = 'torch')
-        pg.umap(adata, rep = 'torch', out_basis = 'umap_torch')
+        pg.neighbors(adata, rep="torch")
+        pg.umap(adata, rep="torch", out_basis="umap_torch")
 
-        pg.neighbors(adata, rep = 'py')
-        pg.umap(adata, rep = 'py', out_basis = 'umap_py')
+        pg.neighbors(adata, rep="py")
+        pg.umap(adata, rep="py", out_basis="umap_py")
 
-        pg.neighbors(adata, rep = 'harmony')
-        pg.umap(adata, rep = 'harmony', out_basis = 'umap_harmony')
+        pg.neighbors(adata, rep="harmony")
+        pg.umap(adata, rep="harmony", out_basis="umap_harmony")
 
         pg.write_output(adata, "./result/{}_result".format(prefix))
     else:
         print("Use precalculated AnnData result.")
 
-    if os.system("pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.before.umap.pdf".format(name = prefix, attr = batch_key)):
+    if os.system(
+        "pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.before.umap.pdf".format(
+            name=prefix, attr=batch_key
+        )
+    ):
         sys.exit(1)
 
-    if os.system("pegasus plot scatter --basis umap_torch --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.torch.umap.pdf".format(name = prefix, attr = batch_key)):
+    if os.system(
+        "pegasus plot scatter --basis umap_torch --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.torch.umap.pdf".format(
+            name=prefix, attr=batch_key
+        )
+    ):
         sys.exit(1)
 
-    if os.system("pegasus plot scatter --basis umap_py --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.py.umap.pdf".format(name = prefix, attr = batch_key)):
+    if os.system(
+        "pegasus plot scatter --basis umap_py --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.py.umap.pdf".format(
+            name=prefix, attr=batch_key
+        )
+    ):
         sys.exit(1)
 
-    if os.system("pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.harmony.umap.pdf".format(name = prefix, attr = batch_key)):
+    if os.system(
+        "pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{name}_result.h5ad ./plots/{name}.harmony.umap.pdf".format(
+            name=prefix, attr=batch_key
+        )
+    ):
         sys.exit(1)
 
 
 def test_cell_lines():
     print("Testing on cell lines dataset...")
 
-    z_files = [f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)]
+    z_files = [
+        f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)
+    ]
     if len(z_files) < 3 or not os.path.exists("./result/cell_lines_result.h5ad"):
         X = np.loadtxt("./data/cell_lines/pca.txt")
         df_metadata = pd.read_csv("./data/cell_lines/metadata.csv")
@@ -91,10 +124,12 @@ def test_cell_lines():
         print("Precalculated embedding by harmony-pytorch is loaded.")
     else:
         start_torch = time.time()
-        Z_torch = harmonize(X, df_metadata, batch_key = 'dataset')
+        Z_torch = harmonize(X, df_metadata, batch_key="dataset")
         end_torch = time.time()
 
-        print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch))
+        print(
+            "Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch)
+        )
         np.save("./result/cell_lines_torch_z.npy", Z_torch)
 
     if os.path.exists("./result/cell_lines_py_z.npy"):
@@ -102,7 +137,7 @@ def test_cell_lines():
         print("Precalculated embedding by harmonypy is loaded.")
     else:
         start_py = time.time()
-        ho = run_harmony(X, df_metadata, ['dataset'])
+        ho = run_harmony(X, df_metadata, ["dataset"])
         end_py = time.time()
 
         print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py))
@@ -113,34 +148,46 @@ def test_cell_lines():
 
     Z_R = np.loadtxt("./result/cell_lines_harmony_z.txt")
 
-    check_metric(Z_torch, Z_py, Z_R, prefix = "cell_lines", norm = 'r')
-    check_metric(Z_torch, Z_py, Z_R, prefix = "cell_lines", norm = 'L2')
+    check_metric(Z_torch, Z_py, Z_R, prefix="cell_lines", norm="r")
+    check_metric(Z_torch, Z_py, Z_R, prefix="cell_lines", norm="L2")
 
     if os.path.exists("./result/cell_lines_result.h5ad"):
         adata = None
     else:
         n_obs = X.shape[0]
-        adata = AnnData(X = csr_matrix((n_obs, 2)), obs = df_metadata)
-        adata.obsm['X_pca'] = X
+        adata = AnnData(X=csr_matrix((n_obs, 2)), obs=df_metadata)
+        adata.obsm["X_pca"] = X
 
-        pg.neighbors(adata, rep = 'pca')
+        pg.neighbors(adata, rep="pca")
         pg.umap(adata)
 
     umap_list = [f for f in os.listdir("./plots") if re.match("cell_lines.*.pdf", f)]
     if len(umap_list) < 4:
-        plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "cell_lines", batch_key = "dataset")
+        plot_umap(adata, Z_torch, Z_py, Z_R, prefix="cell_lines", batch_key="dataset")
 
     if os.path.exists("./result/cell_lines_result.h5ad"):
-       adata = pg.read_input("./result/cell_lines_result.h5ad", h5ad_mode = 'r')
-
-       stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'harmony')
-       print("kBET for Harmony: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate))
-
-       stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'py')
-       print("kBET for harmonypy: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate))
-
-       stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'torch')
-       print("kBET for harmony-pytorch: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate))
+        adata = pg.read_input("./result/cell_lines_result.h5ad", h5ad_mode="r")
+
+        stat, pvalue, ac_rate = pg.calc_kBET(adata, attr="dataset", rep="harmony")
+        print(
+            "kBET for Harmony: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(
+                stat=stat, pval=pvalue, ac_rate=ac_rate
+            )
+        )
+
+        stat, pvalue, ac_rate = pg.calc_kBET(adata, attr="dataset", rep="py")
+        print(
+            "kBET for harmonypy: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(
+                stat=stat, pval=pvalue, ac_rate=ac_rate
+            )
+        )
+
+        stat, pvalue, ac_rate = pg.calc_kBET(adata, attr="dataset", rep="torch")
+        print(
+            "kBET for harmony-pytorch: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(
+                stat=stat, pval=pvalue, ac_rate=ac_rate
+            )
+        )
 
 
 def test_pbmc():
@@ -155,10 +202,12 @@ def test_pbmc():
         print("Precalculated embedding by harmony-pytorch is loaded.")
     else:
         start_torch = time.time()
-        Z_torch = harmonize(adata.obsm['X_pca'], adata.obs, batch_key = 'Channel')
+        Z_torch = harmonize(adata.obsm["X_pca"], adata.obs, batch_key="Channel")
         end_torch = time.time()
 
-        print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch))
+        print(
+            "Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch)
+        )
         np.save("./result/pbmc_torch_z.npy", Z_torch)
 
     if os.path.exists("./result/pbmc_py_z.npy"):
@@ -166,7 +215,7 @@ def test_pbmc():
         print("Precalculated embedding by harmonypy is loaded.")
     else:
         start_py = time.time()
-        ho = run_harmony(adata.obsm['X_pca'], adata.obs, ['Channel'])
+        ho = run_harmony(adata.obsm["X_pca"], adata.obs, ["Channel"])
         end_py = time.time()
 
         print(ho.objective_harmony)
@@ -177,34 +226,40 @@ def test_pbmc():
 
     Z_R = np.loadtxt("./result/pbmc_harmony_z.txt")
 
-    check_metric(Z_torch, Z_py, Z_R, prefix = "pbmc", norm = 'r')
-    check_metric(Z_torch, Z_py, Z_R, prefix = "pbmc", norm = 'L2')
+    check_metric(Z_torch, Z_py, Z_R, prefix="pbmc", norm="r")
+    check_metric(Z_torch, Z_py, Z_R, prefix="pbmc", norm="L2")
 
     if os.path.exists("./result/pbmc_result.h5ad"):
         adata = None
 
     umap_list = [f for f in os.listdir("./plots") if re.match("pbmc.*.pdf", f)]
     if len(umap_list) < 4:
-        plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "pbmc", batch_key = "Channel")
+        plot_umap(adata, Z_torch, Z_py, Z_R, prefix="pbmc", batch_key="Channel")
 
 
 def test_mantonbm():
     print("Testing on MantonBM dataset...")
 
-    z_files = [f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)]
+    z_files = [
+        f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)
+    ]
     if len(z_files) < 3 or not os.path.exists("./result/MantonBM_result.h5ad"):
         adata = pg.read_input("./data/MantonBM/original_data.h5ad")
-        adata.obs['Individual'] = pd.Categorical(adata.obs['Channel'].apply(lambda s: s.split('_')[0][-1]))
+        adata.obs["Individual"] = pd.Categorical(
+            adata.obs["Channel"].apply(lambda s: s.split("_")[0][-1])
+        )
 
     if os.path.exists("./result/MantonBM_torch_z.npy"):
         Z_torch = np.load("./result/MantonBM_torch_z.npy")
         print("Precalculated embedding by harmony-pytorch is loaded.")
     else:
         start_torch = time.time()
-        Z_torch = harmonize(adata.obsm['X_pca'], adata.obs, batch_key = 'Channel')
+        Z_torch = harmonize(adata.obsm["X_pca"], adata.obs, batch_key="Channel")
         end_torch = time.time()
 
-        print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch))
+        print(
+            "Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch)
+        )
         np.save("./result/MantonBM_torch_z.npy", Z_torch)
 
     if os.path.exists("./result/MantonBM_py_z.npy"):
@@ -212,7 +267,7 @@ def test_mantonbm():
         print("Precalculated embedding by harmonypy is loaded.")
     else:
         start_py = time.time()
-        ho = run_harmony(adata.obsm['X_pca'], adata.obs, ['Channel'])
+        ho = run_harmony(adata.obsm["X_pca"], adata.obs, ["Channel"])
         end_py = time.time()
 
         print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py))
@@ -220,67 +275,112 @@ def test_mantonbm():
         Z_py = np.transpose(ho.Z_corr)
         np.save("./result/MantonBM_py_z.npy", Z_py)
 
-
     Z_R = np.loadtxt("./result/MantonBM_harmony_z.txt")
 
-    check_metric(Z_torch, Z_py, Z_R, prefix = "MantonBM", norm = 'r')
-    check_metric(Z_torch, Z_py, Z_R, prefix = "MantonBM", norm = 'L2')
+    check_metric(Z_torch, Z_py, Z_R, prefix="MantonBM", norm="r")
+    check_metric(Z_torch, Z_py, Z_R, prefix="MantonBM", norm="L2")
 
     if os.path.exists("./result/MantonBM_result.h5ad"):
         adata = None
 
     umap_list = [f for f in os.listdir("./plots") if re.match("MantonBM.*.pdf", f)]
     if len(umap_list) < 4:
-        plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "MantonBM", batch_key = "Individual")
+        plot_umap(adata, Z_torch, Z_py, Z_R, prefix="MantonBM", batch_key="Individual")
 
 
 def gen_plot(norm):
-
     # Cell Lines
     metric_celllines_torch = np.loadtxt("./result/cell_lines_{}_torch.txt".format(norm))
     metric_celllines_py = np.loadtxt("./result/cell_lines_{}_py.txt".format(norm))
 
-    df1 = pd.DataFrame({'dataset' : np.repeat(['Cell Lines'], metric_celllines_torch.size + metric_celllines_py.size),
-                        'package' : np.concatenate((np.repeat(['Torch'], metric_celllines_torch.size),
-                                                   np.repeat(['Py'], metric_celllines_py.size)), axis = 0),
-                        'metric' : np.concatenate((metric_celllines_torch, metric_celllines_py), axis = 0)})
+    df1 = pd.DataFrame(
+        {
+            "dataset": np.repeat(
+                ["Cell Lines"], metric_celllines_torch.size + metric_celllines_py.size
+            ),
+            "package": np.concatenate(
+                (
+                    np.repeat(["Torch"], metric_celllines_torch.size),
+                    np.repeat(["Py"], metric_celllines_py.size),
+                ),
+                axis=0,
+            ),
+            "metric": np.concatenate(
+                (metric_celllines_torch, metric_celllines_py), axis=0
+            ),
+        }
+    )
 
     # PBMC
     metric_pbmc_torch = np.loadtxt("./result/pbmc_{}_torch.txt".format(norm))
     metric_pbmc_py = np.loadtxt("./result/pbmc_{}_py.txt".format(norm))
 
-    df2 = pd.DataFrame({'dataset' : np.repeat(['10x PBMC'], metric_pbmc_torch.size + metric_pbmc_py.size),
-                        'package' : np.concatenate((np.repeat(['Torch'], metric_pbmc_torch.size),
-                                                    np.repeat(['Py'], metric_pbmc_py.size)), axis = 0),
-                        'metric' : np.concatenate((metric_pbmc_torch, metric_pbmc_py), axis = 0)})
+    df2 = pd.DataFrame(
+        {
+            "dataset": np.repeat(
+                ["10x PBMC"], metric_pbmc_torch.size + metric_pbmc_py.size
+            ),
+            "package": np.concatenate(
+                (
+                    np.repeat(["Torch"], metric_pbmc_torch.size),
+                    np.repeat(["Py"], metric_pbmc_py.size),
+                ),
+                axis=0,
+            ),
+            "metric": np.concatenate((metric_pbmc_torch, metric_pbmc_py), axis=0),
+        }
+    )
 
     # MantonBM
     metric_mantonbm_torch = np.loadtxt("./result/MantonBM_{}_torch.txt".format(norm))
     metric_mantonbm_py = np.loadtxt("./result/MantonBM_{}_py.txt".format(norm))
 
-    df3 = pd.DataFrame({'dataset' : np.repeat(['Bone Marrow'], metric_mantonbm_torch.size + metric_mantonbm_py.size),
-                        'package' : np.concatenate((np.repeat(['Torch'], metric_mantonbm_torch.size),
-                                                    np.repeat(['Py'], metric_mantonbm_py.size)), axis = 0),
-                        'metric' : np.concatenate((metric_mantonbm_torch, metric_mantonbm_py), axis = 0)})
+    df3 = pd.DataFrame(
+        {
+            "dataset": np.repeat(
+                ["Bone Marrow"], metric_mantonbm_torch.size + metric_mantonbm_py.size
+            ),
+            "package": np.concatenate(
+                (
+                    np.repeat(["Torch"], metric_mantonbm_torch.size),
+                    np.repeat(["Py"], metric_mantonbm_py.size),
+                ),
+                axis=0,
+            ),
+            "metric": np.concatenate(
+                (metric_mantonbm_torch, metric_mantonbm_py), axis=0
+            ),
+        }
+    )
 
     df = pd.concat([df1, df2, df3])
 
     # Plot
-    ax = sns.violinplot(x = "dataset", y = "metric", hue = "package", data = df, palette = "muted", split = True, cut = 0)
-    ax.set_title("{} between Harmonypy and Harmony-pytorch Integration".format(metric_dict[norm]))
-    ax.set(xlabel = 'Dataset', ylabel = "{} on PCs".format(metric_dict[norm]))
-    if norm == 'r':
-        ax.set(ylim = (0.98, 1.001))
+    ax = sns.violinplot(
+        x="dataset",
+        y="metric",
+        hue="package",
+        data=df,
+        palette="muted",
+        split=True,
+        cut=0,
+    )
+    ax.set_title(
+        "{} between Harmonypy and Harmony-pytorch Integration".format(metric_dict[norm])
+    )
+    ax.set(xlabel="Dataset", ylabel="{} on PCs".format(metric_dict[norm]))
+    if norm == "r":
+        ax.set(ylim=(0.98, 1.001))
     else:
-        ax.set(ylim = (0, 0.1))
+        ax.set(ylim=(0, 0.1))
     figure = ax.get_figure()
-    legend_loc = 'lower right' if norm == 'r' else 'upper right'
-    figure.get_axes()[0].legend(title = "Package", loc = legend_loc)
-    figure.savefig("./plots/{}_stats.png".format(norm), dpi = 400)
+    legend_loc = "lower right" if norm == "r" else "upper right"
+    figure.get_axes()[0].legend(title="Package", loc=legend_loc)
+    figure.savefig("./plots/{}_stats.png".format(norm), dpi=400)
     plt.close()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     dataset = sys.argv[1]
 
     assert dataset in ["cell_lines", "pbmc", "MantonBM", "plot"]
@@ -293,12 +393,12 @@ def gen_plot(norm):
         if os.system("mkdir ./plots"):
             sys.exit(1)
 
-    if dataset == 'cell_lines':
+    if dataset == "cell_lines":
         test_cell_lines()
-    elif dataset == 'pbmc':
+    elif dataset == "pbmc":
         test_pbmc()
-    elif dataset == 'MantonBM':
+    elif dataset == "MantonBM":
         test_mantonbm()
     else:
-        gen_plot('r')
-        gen_plot('L2')
+        gen_plot("r")
+        gen_plot("L2")
diff --git a/test/test_gpu.py b/test/test_gpu.py
index 9a0ffdd..751cd5a 100644
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@@ -22,47 +22,69 @@ def check_metrics(Z, base, prefix):
         err = np.linalg.norm(Z[:, i] - base[:, i]) / np.linalg.norm(base[:, i])
         errors.append(err)
 
-    print("For {name}, mean r = {cor:.4f}, mean L2 error = {err:.4f}.".format(name = prefix, cor = np.mean(cors), err = np.mean(errors)))
+    print(
+        "For {name}, mean r = {cor:.4f}, mean L2 error = {err:.4f}.".format(
+            name=prefix, cor=np.mean(cors), err=np.mean(errors)
+        )
+    )
     np.savetxt("./result/{}_r.txt".format(prefix), cors)
     np.savetxt("./result/{}_L2.txt".format(prefix), errors)
 
 
 def plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix, batch_key):
     if adata is not None:
-        adata.obsm['X_cpu'] = Z_cpu
-        adata.obsm['X_gpu'] = Z_gpu
-        adata.obsm['X_harmony'] = Z_R
+        adata.obsm["X_cpu"] = Z_cpu
+        adata.obsm["X_gpu"] = Z_gpu
+        adata.obsm["X_harmony"] = Z_R
 
-        pg.neighbors(adata, rep = 'cpu')
-        pg.umap(adata, rep = 'cpu', out_basis = 'umap_cpu')
+        pg.neighbors(adata, rep="cpu")
+        pg.umap(adata, rep="cpu", out_basis="umap_cpu")
 
-        pg.neighbors(adata, rep = 'gpu')
-        pg.umap(adata, rep = 'gpu', out_basis = 'umap_gpu')
+        pg.neighbors(adata, rep="gpu")
+        pg.umap(adata, rep="gpu", out_basis="umap_gpu")
 
-        pg.neighbors(adata, rep = 'harmony')
-        pg.umap(adata, rep = 'harmony', out_basis = 'umap_harmony')
+        pg.neighbors(adata, rep="harmony")
+        pg.umap(adata, rep="harmony", out_basis="umap_harmony")
 
         pg.write_output(adata, "./result/{}_result".format(prefix))
     else:
         print("Use precalculated AnnData result.")
 
-    if os.system("pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.before.umap.pdf".format(attr = batch_key, prefix = prefix)):
+    if os.system(
+        "pegasus plot scatter --basis umap --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.before.umap.pdf".format(
+            attr=batch_key, prefix=prefix
+        )
+    ):
         sys.exit(1)
 
-    if os.system("pegasus plot scatter --basis umap_cpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.cpu.umap.pdf".format(attr = batch_key, prefix = prefix)):
+    if os.system(
+        "pegasus plot scatter --basis umap_cpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.cpu.umap.pdf".format(
+            attr=batch_key, prefix=prefix
+        )
+    ):
         sys.exit(1)
 
-    if os.system("pegasus plot scatter --basis umap_gpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.gpu.umap.pdf".format(attr = batch_key, prefix = prefix)):
+    if os.system(
+        "pegasus plot scatter --basis umap_gpu --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.gpu.umap.pdf".format(
+            attr=batch_key, prefix=prefix
+        )
+    ):
         sys.exit(1)
 
-    if os.system("pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.harmony.umap.pdf".format(attr = batch_key, prefix = prefix)):
+    if os.system(
+        "pegasus plot scatter --basis umap_harmony --attributes {attr} --alpha 0.5 ./result/{prefix}_result.h5ad ./plots/{prefix}.harmony.umap.pdf".format(
+            attr=batch_key, prefix=prefix
+        )
+    ):
         sys.exit(1)
 
 
 def test_cell_lines():
     print("Testing on Cell Lines...")
 
-    z_files = [f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)]
+    z_files = [
+        f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)
+    ]
     if len(z_files) < 3 or not os.path.exists("./result/cell_lines_result.h5ad"):
         X = np.loadtxt("./data/cell_lines/pca.txt")
         df_metadata = pd.read_csv("./data/cell_lines/metadata.csv")
@@ -72,7 +94,7 @@ def test_cell_lines():
         print("Precalculated CPU mode result is loaded.")
     else:
         start_cpu = time.time()
-        Z_cpu = harmonize(X, df_metadata, 'dataset')
+        Z_cpu = harmonize(X, df_metadata, "dataset")
         end_cpu = time.time()
 
         print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu))
@@ -83,7 +105,7 @@ def test_cell_lines():
         print("Precalculated GPU mode result is loaded.")
     else:
         start_gpu = time.time()
-        Z_gpu = harmonize(X, df_metadata, 'dataset', use_gpu = True)
+        Z_gpu = harmonize(X, df_metadata, "dataset", use_gpu=True)
         end_gpu = time.time()
 
         print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu))
@@ -91,37 +113,37 @@ def test_cell_lines():
 
     Z_R = np.loadtxt("./result/cell_lines_harmony_z.txt")
 
-    check_metrics(Z_cpu, Z_R, prefix = "cell_lines_cpu")
-    check_metrics(Z_gpu, Z_R, prefix = "cell_lines_gpu")
+    check_metrics(Z_cpu, Z_R, prefix="cell_lines_cpu")
+    check_metrics(Z_gpu, Z_R, prefix="cell_lines_gpu")
 
     if os.path.exists("./result/cell_lines_result.h5ad"):
         adata = None
     else:
         n_obs = X.shape[0]
-        adata = AnnData(X = csr_matrix((n_obs, 2)), obs = df_metadata)
-        adata.obsm['X_pca'] = X
+        adata = AnnData(X=csr_matrix((n_obs, 2)), obs=df_metadata)
+        adata.obsm["X_pca"] = X
 
-        pg.neighbors(adata, rep = 'pca')
+        pg.neighbors(adata, rep="pca")
         pg.umap(adata)
 
     umap_list = [f for f in os.listdir("./plots") if re.match("cell_lines.*.pdf")]
     if len(umap_list) < 4:
-        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "cell_lines", batch_key = 'dataset')
+        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix="cell_lines", batch_key="dataset")
 
 
 def test_pbmc():
     print("Testing on 10x PBMC...")
 
     z_files = [f for f in os.listdir("./result") if re.match("pbmc.*_z.(txt|npy)", f)]
-    if len(z_files) < 3
-    adata = pg.read_input("./data/10x_pbmc/original_data.h5ad")
+    if len(z_files) < 3:
+        adata = pg.read_input("./data/10x_pbmc/original_data.h5ad")
 
     if os.path.exists("./result/pbmc_cpu_z.npy"):
         Z_cpu = np.load("./result/pbmc_cpu_z.npy")
         print("Precalculated CPU mode result is loaded.")
     else:
         start_cpu = time.time()
-        Z_cpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel')
+        Z_cpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel")
         end_cpu = time.time()
 
         print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu))
@@ -132,7 +154,7 @@ def test_pbmc():
         print("Precalculated GPU mode result is loaded.")
     else:
         start_gpu = time.time()
-        Z_gpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel', use_gpu = True)
+        Z_gpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel", use_gpu=True)
         end_gpu = time.time()
 
         print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu))
@@ -140,31 +162,35 @@ def test_pbmc():
 
     Z_R = np.loadtxt("./result/pbmc_harmony_z.txt")
 
-    check_metrics(Z_cpu, Z_R, prefix = "pbmc_cpu")
-    check_metrics(Z_gpu, Z_R, prefix = "pbmc_gpu")
+    check_metrics(Z_cpu, Z_R, prefix="pbmc_cpu")
+    check_metrics(Z_gpu, Z_R, prefix="pbmc_gpu")
 
     if os.path.exists("./result/pbmc_result.h5ad"):
         adata = None
 
     umap_list = [f for f in os.listdir("./plots") if re.match("pbmc.*.pdf", f)]
     if len(umap_list) < 4:
-        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "pbmc", batch_key = 'Channel')
+        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix="pbmc", batch_key="Channel")
 
 
 def test_mantonbm():
     print("Testing on MantonBM...")
 
-    z_files = [f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)]
+    z_files = [
+        f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)
+    ]
     if len(z_files) < 3:
         adata = pg.read_input("./data/MantonBM/original_data.h5ad")
-        adata.obs['Individual'] = pd.Categorical(adata.obs['Channel'].apply(lambda s: s.split('_')[0][-1]))
+        adata.obs["Individual"] = pd.Categorical(
+            adata.obs["Channel"].apply(lambda s: s.split("_")[0][-1])
+        )
 
     if os.path.exists("./result/MantonBM_cpu_z.npy"):
         Z_cpu = np.load("./result/MantonBM_cpu_z.npy")
         print("Precalculated CPU mode result is loaded.")
     else:
         start_cpu = time.time()
-        Z_cpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel')
+        Z_cpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel")
         end_cpu = time.time()
 
         print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu))
@@ -175,7 +201,7 @@ def test_mantonbm():
         print("Precalculated GPU mode result is loaded.")
     else:
         start_gpu = time.time()
-        Z_gpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel', use_gpu = True)
+        Z_gpu = harmonize(adata.obsm["X_pca"], adata.obs, "Channel", use_gpu=True)
         end_gpu = time.time()
 
         print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu))
@@ -183,24 +209,24 @@ def test_mantonbm():
 
     Z_R = np.loadtxt("./result/MantonBM_harmony_z.txt")
 
-    check_metrics(Z_cpu, Z_R, prefix = "MantonBM_cpu")
-    check_metrics(Z_gpu, Z_R, prefix = "MantonBM_gpu")
+    check_metrics(Z_cpu, Z_R, prefix="MantonBM_cpu")
+    check_metrics(Z_gpu, Z_R, prefix="MantonBM_gpu")
 
     if os.path.exists("./result/MantonBM_result.h5ad"):
         adata = None
 
     umap_list = [f for f in os.listdir("./plots") if re.match("MantonBM.*.pdf", f)]
     if len(umap_list) < 4:
-        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "MantonBM", batch_key = 'Individual')
+        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix="MantonBM", batch_key="Individual")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     dataset = sys.argv[1]
 
-    assert dataset in ['cell_lines', 'pbmc', 'MantonBM']
-    if dataset == 'cell_lines':
+    assert dataset in ["cell_lines", "pbmc", "MantonBM"]
+    if dataset == "cell_lines":
         test_cell_lines()
-    elif dataset == 'pbmc':
+    elif dataset == "pbmc":
         test_pbmc()
     else:
-        test_mantonbm()
\ No newline at end of file
+        test_mantonbm()

From b281cad3e609a03ec2ece68dcf0c1b74c258ea78 Mon Sep 17 00:00:00 2001
From: mbruhns <matthias.bruhns@uni-tuebingen.de>
Date: Sun, 10 Dec 2023 16:19:15 +0100
Subject: [PATCH 4/6] Update requirements to be able to use MPS backend.

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 16997ee..a1e90d2 100644
--- a/setup.py
+++ b/setup.py
@@ -7,13 +7,13 @@
     long_description = f.read()
 
 requires = [
-    "torch",
+    "torch>=1.12",
     "numpy",
     "pandas",
     "psutil",
     "threadpoolctl",
     "scikit-learn>=0.23",
-    "importlib_metadata>=0.7; python_version < '3.8'",
+    "importlib_metadata>=0.7;",
 ]
 
 setup(

From 91ffe50ece4d9b17a2cba48b52dba38264c5cc3f Mon Sep 17 00:00:00 2001
From: mbruhns <matthias.bruhns@uni-tuebingen.de>
Date: Sun, 10 Dec 2023 16:22:36 +0100
Subject: [PATCH 5/6] Fix typo in requirements.

---
 harmony/version.py | 16 ++++++++++++++++
 setup.py           |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 harmony/version.py

diff --git a/harmony/version.py b/harmony/version.py
new file mode 100644
index 0000000..47cd6ee
--- /dev/null
+++ b/harmony/version.py
@@ -0,0 +1,16 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+
+__version__ = version = '0.1.dev101+gb281cad.d20231210'
+__version_tuple__ = version_tuple = (0, 1, 'dev101', 'gb281cad.d20231210')
diff --git a/setup.py b/setup.py
index a1e90d2..06ac7ff 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
     "psutil",
     "threadpoolctl",
     "scikit-learn>=0.23",
-    "importlib_metadata>=0.7;",
+    "importlib_metadata>=0.7",
 ]
 
 setup(

From cb3065b22470e10ac85d28cf493564c69fb4533c Mon Sep 17 00:00:00 2001
From: mbruhns <matthias.bruhns@uni-tuebingen.de>
Date: Mon, 11 Dec 2023 09:16:59 +0100
Subject: [PATCH 6/6] Remove version.py, remove importlib_metadata from setup,
 remove whitespace in classifiers in setup.

---
 harmony/version.py | 16 ----------------
 setup.py           |  5 ++---
 2 files changed, 2 insertions(+), 19 deletions(-)
 delete mode 100644 harmony/version.py

diff --git a/harmony/version.py b/harmony/version.py
deleted file mode 100644
index 47cd6ee..0000000
--- a/harmony/version.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# file generated by setuptools_scm
-# don't change, don't track in version control
-TYPE_CHECKING = False
-if TYPE_CHECKING:
-    from typing import Tuple, Union
-    VERSION_TUPLE = Tuple[Union[int, str], ...]
-else:
-    VERSION_TUPLE = object
-
-version: str
-__version__: str
-__version_tuple__: VERSION_TUPLE
-version_tuple: VERSION_TUPLE
-
-__version__ = version = '0.1.dev101+gb281cad.d20231210'
-__version_tuple__ = version_tuple = (0, 1, 'dev101', 'gb281cad.d20231210')
diff --git a/setup.py b/setup.py
index 06ac7ff..c36c44b 100644
--- a/setup.py
+++ b/setup.py
@@ -12,8 +12,7 @@
     "pandas",
     "psutil",
     "threadpoolctl",
-    "scikit-learn>=0.23",
-    "importlib_metadata>=0.7",
+    "scikit-learn>=0.23"
 ]
 
 setup(
@@ -24,7 +23,7 @@
     url="https://github.com/lilab-bcb/harmony-pytorch",
     author="Yiming Yang, Bo Li",
     author_email="yyang43@mgh.harvard.edu, bli28@mgh.harvard.edu",
-    classifiers=[  # https://pypi.python.org/pypi?%3Aaction=list_classifiers
+    classifiers=[ # https://pypi.python.org/pypi?%3Aaction=list_classifiers
         "Development Status :: 3 - Alpha",
         "Intended Audience :: Developers",
         "Intended Audience :: Science/Research",