From 3968687cea8a1a36f2f05a9e10eec9ea9f481395 Mon Sep 17 00:00:00 2001
From: tdrose <tim@rose-4-you.de>
Date: Tue, 19 Dec 2023 17:02:26 +0100
Subject: [PATCH] Documentation for coloc module

---
 README.md                                     |  6 +-
 docs/examples.rst                             | 37 ++++++++++
 metaspace_converter/colocalization.py         | 23 ++++--
 metaspace_converter/constants.py              |  2 +
 .../tests/colocalization_test.py              | 73 +++++++++++--------
 5 files changed, 102 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index 6054664..e902323 100644
--- a/README.md
+++ b/README.md
@@ -16,10 +16,12 @@ of many packages of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) su
 [squidpy](https://squidpy.readthedocs.io/en/stable/index.html) for spatial omics analysis.
 
 Another supported format that is part of the [scverse](https://doi.org/10.1038/s41587-023-01733-8)
-is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. 
+is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data.
 This enables users to easily align and integrate METASPACE datasets
 to other spatial omics modalities.
 
+Additionally, the commonly used colocalization analysis for spatial metabolomics can be performed with the package.
+
 If you encounter any bugs or have suggestions for new features, please open an issue in the
 [GitHub repository](https://github.com/metaspace2020/metaspace-converter).
 
@@ -28,6 +30,7 @@ If you encounter any bugs or have suggestions for new features, please open an i
 Our package requires `python >= 3.9`.
 
 You can install the package directly from [PyPI](https://pypi.org/project/metaspace-converter/):
+
 ```bash
 pip install metaspace-converter
 ```
@@ -161,7 +164,6 @@ sdata.points["maldi_points"] = sdata.transform_element_to_coordinate_system(
 
 Unless specified otherwise in file headers or LICENSE files present in subdirectories, all files are licensed under the [Apache 2.0 license](https://github.com/metaspace2020/metaspace/blob/master/LICENSE).
 
-
 [badge-docs]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/docs.yml?label=documentation
 [badge-pypi]: https://img.shields.io/pypi/v/metaspace-converter
 [badge-tests]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/tests.yml?branch=master&label=tests
diff --git a/docs/examples.rst b/docs/examples.rst
index 2a4dfae..14a0cf4 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -114,3 +114,40 @@ Here using a reversed colormap which better represents intense values on bright
 
 .. image:: ./_static/img/example_img_sd.png
    :alt: Visualization with SpatialData
+
+
+Colocalization Analysis
+-----------------------
+
+A popular type of analysis for imaging Mass Spectrometry data or spatial metabolomics 
+is colocalization analysis. 
+In the `ColocML`_ publication, 
+the authors evaluated data processing and colocalization metrics.
+Their best method (median filtering, quantile thresholding, and cosine similarity),
+can be executed with our package:
+
+.. testcode::
+
+   from metaspace_converter import metaspace_to_anndata, colocalization
+
+   # Download data
+   adata = metaspace_to_anndata(dataset_id="2023-11-14_21h58m39s", fdr=0.1)
+
+   # Perform median filtering and quantile thresholding
+   # The processed data is saved as a layer `adata.layers["colocml_preprocessing"]`
+   # It has the same dimensions as `adata.X`
+   colocalization.colocML_preprocessing(adata, layer="colocml_preprocessing")
+
+   # Compute the pairwise colocalization metrix between all ion images
+   # As an input, the processed data from `adata.layers["colocml_preprocessing"]` is used
+   # The colocalization matrix is saved in `adata.uns["colocalization"]`
+   colocalization.colocalization(adata, layer="colocml_preprocessing")
+
+.. testoutput::
+   :hide:
+
+   ...
+
+
+
+.. _ColocML: https://doi.org/10.1093/bioinformatics/btaa085
diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py
index b476413..56a7c9a 100644
--- a/metaspace_converter/colocalization.py
+++ b/metaspace_converter/colocalization.py
@@ -5,6 +5,7 @@
 from scipy import ndimage
 
 from metaspace_converter.anndata_to_array import anndata_to_image_array
+from metaspace_converter.constants import COLOCALIZATION
 
 
 def colocML_preprocessing(
@@ -40,11 +41,17 @@ def colocML_preprocessing(
         None. The processed data is saved in ``layer``. If layer is net to None, ``adata.X`` will
         be overwritten
 
+    Raises:
+        ValueError: If no annotations are available in ``adata.X``.
+
     """
 
     # Extract image array from anndata:
     imarray = anndata_to_image_array(adata=adata)
 
+    if len(imarray) == 0:
+        raise ValueError("AnnData contains no annotations. ColocML preprocessing cannot be called.")
+
     # Median filtering
     imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1]))
 
@@ -66,10 +73,10 @@ def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing
     """
     Colocalization of ion images using the cosine similarity metric.
 
-    In combination with ``colocML_preprocessing`` this metric performed best in the
+    In combination with the ``colocML_preprocessing`` function, this metric performed best in the
     colocML publication (https://doi.org/10.1093/bioinformatics/btaa085).
 
-    We recommend to call the the ``colocML_preprocessing`` function beforehand.
+    It is recommended to call the the ``colocML_preprocessing`` function beforehand.
 
     Args:
         adata: An AnnData object.
@@ -78,21 +85,25 @@ def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing
             data per default in ``adata.layer['colocml_preprocessing']``.
 
     Returns:
-        None. The processed data is saved in ``adata.uns['colocalization]``.
+        None. The processed data is saved in ``adata.uns['colocalization']``.
     """
 
+    # Select data
     if layer == None or layer not in adata.layers.keys():
-        data = adata.X.transpose()
+        data = np.array(adata.X).transpose()
     else:
-        data = adata.layers[layer].transpose()
+        data = np.array(adata.layers[layer]).transpose()
 
+    # Compute colocalization
     coloc = _pairwise_cosine_similarity(data)
 
-    adata.uns["colocalization"] = coloc
+    # Save colocalization
+    adata.uns[COLOCALIZATION] = coloc
 
 
 def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray:
 
+    # Divide image vectors by euclidean norm
     norm_data = data / np.linalg.norm(data, axis=1, keepdims=True)
 
     # Compute pairwise cosine similarity
diff --git a/metaspace_converter/constants.py b/metaspace_converter/constants.py
index 99bf75e..1d4f4f9 100644
--- a/metaspace_converter/constants.py
+++ b/metaspace_converter/constants.py
@@ -28,3 +28,5 @@ class COL:
 XY: Final = (X, Y)
 YX: Final = (Y, X)
 YXC: Final = (Y, X, C)
+
+COLOCALIZATION = "colocalization"
diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py
index 2fd3e9e..408cb8a 100644
--- a/metaspace_converter/tests/colocalization_test.py
+++ b/metaspace_converter/tests/colocalization_test.py
@@ -10,7 +10,7 @@
 
 from metaspace_converter import anndata_to_image_array
 from metaspace_converter.colocalization import colocalization, colocML_preprocessing
-from metaspace_converter.constants import COL, METASPACE_KEY, X, Y
+from metaspace_converter.constants import COL, COLOCALIZATION, METASPACE_KEY, X, Y
 from metaspace_converter.to_anndata import all_image_pixel_coordinates
 
 
@@ -37,60 +37,71 @@ def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData:
 
 @pytest.mark.parametrize(
     "adata_dummy",
-    [dict(num_ions=5, height=10, width=10), dict(num_ions=4, height=4, width=4)],
+    [
+        dict(num_ions=5, height=10, width=10),
+        dict(num_ions=0, height=2, width=3),
+        dict(num_ions=4, height=4, width=4),
+    ],
     indirect=["adata_dummy"],
 )
 def test_colocml_preprocessing(adata_dummy):
 
+    COLOCML_LAYER = "colocml_preprocessing"
+
     adata = adata_dummy
 
-    # Test median filtering
-    colocML_preprocessing(
-        adata, median_filter_size=(3, 3), quantile_threshold=0, layer="colocml_preprocessing"
-    )
+    if adata.X.shape[1] == 0:
+        with pytest.raises(ValueError) as e_info:
+            colocML_preprocessing(
+                adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER
+            )
+    else:
 
-    actual = anndata_to_image_array(adata)
+        # Test median filtering
+        colocML_preprocessing(
+            adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER
+        )
 
-    # median filter
+        actual = anndata_to_image_array(adata)
 
-    expected_preprocessing = np.median(actual[0][:3, :3])
-    observed = adata.layers["colocml_preprocessing"][
-        adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0
-    ]
-    assert observed == expected_preprocessing
+        # median filter
 
-    expected_preprocessing = np.median(actual[1][:3, :3])
-    observed = adata.layers["colocml_preprocessing"][
-        adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1
-    ]
-    assert observed == expected_preprocessing
+        expected_preprocessing = np.median(actual[0][:3, :3])
+        observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0]
+        assert observed == expected_preprocessing
 
-    # Quantile thresholding
-    adata.X[0].reshape(-1)
+        expected_preprocessing = np.median(actual[1][:3, :3])
+        observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1]
+        assert observed == expected_preprocessing
 
-    colocML_preprocessing(
-        adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer="colocml_preprocessing"
-    )
+        # Quantile thresholding
+        adata.X[0].reshape(-1)
+
+        colocML_preprocessing(
+            adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer=COLOCML_LAYER
+        )
 
-    assert all(np.sum(adata.layers["colocml_preprocessing"] == 0, axis=0) <= 0.5 * adata.X.shape[0])
+        assert all(np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) <= 0.5 * adata.X.shape[0])
 
-    # Layer exists
-    assert "colocml_preprocessing" in adata.layers.keys()
+        # Layer exists
+        assert COLOCML_LAYER in adata.layers.keys()
 
-    # Layer sizes match
-    assert adata.X.shape == adata.layers["colocml_preprocessing"].shape
+        # Layer sizes match
+        assert adata.X.shape == adata.layers[COLOCML_LAYER].shape
 
 
 def test_colocalization():
 
     arr = np.array([[0.0, 1.0, 0.0], [0.0, 2.0, 0.0], [1.0, 0.0, 0.0]]).transpose()
+
+    expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+
     adata = AnnData(X=arr)
 
     colocalization(adata, layer=None)
 
-    assert "colocalization" in adata.uns.keys()
+    assert COLOCALIZATION in adata.uns.keys()
 
-    coloc = adata.uns["colocalization"]
+    coloc = adata.uns[COLOCALIZATION]
 
-    expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
     assert np.all(coloc == expected)