metaspace2020 · tdrose · Feb 6, 2024 · Dec 18, 2023 · Dec 19, 2023 · Dec 19, 2023
diff --git a/README.md b/README.md
@@ -16,10 +16,12 @@ of many packages of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) su
 [squidpy](https://squidpy.readthedocs.io/en/stable/index.html) for spatial omics analysis.
 
 Another supported format that is part of the [scverse](https://doi.org/10.1038/s41587-023-01733-8)
-is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. 
+is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data.
 This enables users to easily align and integrate METASPACE datasets
 to other spatial omics modalities.
 
+Additionally, the commonly used colocalization analysis for spatial metabolomics can be performed with the package.
+
 If you encounter any bugs or have suggestions for new features, please open an issue in the
 [GitHub repository](https://github.com/metaspace2020/metaspace-converter).
 
@@ -28,6 +30,7 @@ If you encounter any bugs or have suggestions for new features, please open an i
 Our package requires `python >= 3.9`.
 
 You can install the package directly from [PyPI](https://pypi.org/project/metaspace-converter/):
+
 ```bash
 pip install metaspace-converter
 ```
@@ -161,7 +164,6 @@ sdata.points["maldi_points"] = sdata.transform_element_to_coordinate_system(
 
 Unless specified otherwise in file headers or LICENSE files present in subdirectories, all files are licensed under the [Apache 2.0 license](https://github.com/metaspace2020/metaspace/blob/master/LICENSE).
 
-
 [badge-docs]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/docs.yml?label=documentation
 [badge-pypi]: https://img.shields.io/pypi/v/metaspace-converter
 [badge-tests]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/tests.yml?branch=master&label=tests

diff --git a/docs/examples.rst b/docs/examples.rst
@@ -114,3 +114,35 @@ Here using a reversed colormap which better represents intense values on bright
 
 .. image:: ./_static/img/example_img_sd.png
    :alt: Visualization with SpatialData
+
+
+Colocalization Analysis
+-----------------------
+
+A popular type of analysis for imaging Mass Spectrometry data or spatial metabolomics 
+is colocalization analysis. 
+In the `ColocML`_ publication, 
+the authors evaluated data processing and colocalization metrics.
+Their best method (median filtering, quantile thresholding, and cosine similarity),
+can be executed with our package:
+
+.. testcode::
+
+   from metaspace_converter import metaspace_to_anndata, colocalization
+
+   # Download data
+   adata = metaspace_to_anndata(dataset_id="2023-11-14_21h58m39s", fdr=0.1)
+
+   # Perform median filtering and quantile thresholding
+   # The processed data is saved as a layer `adata.layers["coloc_ml_preprocessing"]`
+   # It has the same dimensions as `adata.X`
+   colocalization.coloc_ml_preprocessing(adata, layer="colocml_preprocessing")
+
+   # Compute the pairwise colocalization matrix between all ion images
+   # As an input, the processed data from `adata.layers["coloc_ml_preprocessing"]` is used
+   # The colocalization matrix is saved in `adata.varp["colocalization"]`
+   colocalization.colocalization(adata, layer="colocml_preprocessing")
+
+
+
+.. _ColocML: https://doi.org/10.1093/bioinformatics/btaa085
diff --git a/metaspace_converter/__init__.py b/metaspace_converter/__init__.py
@@ -1,5 +1,11 @@
+from metaspace_converter import colocalization
 from metaspace_converter.anndata_to_array import anndata_to_image_array
 from metaspace_converter.to_anndata import metaspace_to_anndata
 from metaspace_converter.to_spatialdata import metaspace_to_spatialdata
 
-__all__ = ["metaspace_to_anndata", "metaspace_to_spatialdata", "anndata_to_image_array"]
+__all__ = [
+    "metaspace_to_anndata",
+    "metaspace_to_spatialdata",
+    "anndata_to_image_array",
+    "colocalization",
+]
diff --git a/metaspace_converter/anndata_to_array.py b/metaspace_converter/anndata_to_array.py
@@ -1,3 +1,4 @@
+from typing import Optional
 import numpy as np
 from anndata import AnnData
 
@@ -16,13 +17,15 @@ def _check_pixel_coordinates(adata: AnnData) -> bool:
     return np.all(np.equal(pixel_list, required_pixels))
 
 
-def anndata_to_image_array(adata: AnnData) -> np.ndarray:
+def anndata_to_image_array(adata: AnnData, layer: Optional[str]=None) -> np.ndarray:
     """
     Extracts an array of ion images from an AnnData object
     (that has been generated through the ``metaspace_to_anndata`` function).
 
     Args:
         adata: An AnnData object.
+        layer: ``AnnData.layer`` that should be extracted to an image array. 
+            Default is None, which means that ``adata.X`` will be used.
 
     Returns:
         A three-dimensional Numpy array in the following shape
@@ -36,8 +39,13 @@ def anndata_to_image_array(adata: AnnData) -> np.ndarray:
             E.g. Pixel have been removed/added and the number of pixels
             and their coordinates do not match the original image dimensions.
     """
-
-    pixel_array = adata.X.transpose().copy()
+    if layer is None:
+        pixel_array = np.array(adata.X).transpose().copy()
+    elif layer in adata.layers.keys():
+        pixel_array = np.array(adata.layers[layer]).transpose().copy()
+    else:
+        raise ValueError(f"Layer `{layer}` not found in adata.layers.")
+
     img_size = adata.uns[METASPACE_KEY]["image_size"]
 
     # Check if image dimensions are okay

diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py
@@ -0,0 +1,117 @@
+from typing import Optional, Tuple
+
+import numpy as np
+from anndata import AnnData
+from scipy import ndimage
+
+from metaspace_converter.anndata_to_array import anndata_to_image_array
+from metaspace_converter.constants import COLOCALIZATION
+
+
+def coloc_ml_preprocessing(
+    adata: AnnData,
+    layer: Optional[str] = "coloc_ml_preprocessing",
+    median_filter_size: Tuple[int, int] = (3, 3),
+    quantile_threshold: float = 0.5,
+):
+    """
+    Preprocessing for colocalization analysis according to the colocML publication
+    (https://doi.org/10.1093/bioinformatics/btaa085).
+
+    In the publication the authors evaluated colocalization metrics and preprocessing approaches.
+    They found the best performance for
+    1) median filtering of ion images with a (3, 3) kernel size and
+    2) quantile thresholding ad 50%, meaning all pixels with intensities below the 50%
+    quantile set to 0.
+
+    This function performs the same preprocessing steps.
+    Recommended for call before running the ``colocalization`` function.
+
+    Args:
+        adata: An AnnData object.
+        layer: Key for the adata.layers dict in which the processed data will be saved.
+            Default value is ``colocml_preprocessing``.
+            If None, ``adata.X`` will be overwritten with the processed data.
+        median_filter_size: 2-dimensional filter size for the median filtering that
+            will be performed per ion image.
+        quantile_threshold: Float between 0 and 1. The function computes the quantile value per
+            ion image and all pixels below the quantile threshold will be set to 0.
+
+    Returns:
+        None. The processed data is saved in ``layer``. If layer is net to None, ``adata.X`` will
+        be overwritten
+
+    Raises:
+        ValueError: If no annotations are available in ``adata.X``.
+
+    """
+
+    # Extract image array from anndata:
+    imarray = anndata_to_image_array(adata=adata)
+
+    if len(imarray) == 0:
+        raise ValueError("AnnData contains no annotations. ColocML preprocessing cannot be called.")
+
+    # Median filtering
+    imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1]))
+
+    # reshaping
+    imarray = imarray.reshape((imarray.shape[0], -1))
+
+    # Quantile thresholding
+    mask = imarray < np.percentile(imarray, q=quantile_threshold * 100, axis=1)[:, np.newaxis]
+    imarray[mask] = 0
+
+    # Inserting of processed data into adata object
+    if layer is None:
+        adata.X = imarray.transpose()
+    else:
+        adata.layers[layer] = imarray.transpose()
+
+
+def colocalization(adata: AnnData, layer: Optional[str] = "coloc_ml_preprocessing"):
+    """
+    Colocalization of ion images using the cosine similarity metric.
+
+    In combination with the ``colocML_preprocessing`` function, this metric performed best in the
+    colocML publication (https://doi.org/10.1093/bioinformatics/btaa085).
+
+    It is recommended to call the the ``coloc_ml_preprocessing`` function beforehand.
+
+    Args:
+        adata: An AnnData object.
+        layer: Key for ``adata.layer`` from which the ionimage_data for preprocessing taken.
+            If ``None``, ``adata.X`` is used. ``coloc_ml_preprocessing`` will save the preprocessed
+            data per default in ``adata.layer['coloc_ml_preprocessing']``.
+
+    Returns:
+        None. The processed data is saved in ``adata.varp['colocalization']``.
+
+    Raises:
+        ValueError: If layer is not found in adata.layers.
+    """
+
+    # Select data
+    if layer is None:
+        data = np.array(adata.X).transpose()
+    elif layer in adata.layers.keys():
+        data = np.array(adata.layers[layer]).transpose()
+    else:
+        raise ValueError(f"Layer `{layer}` not found in adata.layers.")
+
+    # Compute colocalization
+    coloc = _pairwise_cosine_similarity(data)
+
+    # Save colocalization
+    adata.varp[COLOCALIZATION] = coloc
+
+
+def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray:
+
+    # Divide image vectors by euclidean norm
+    norm_data = data / np.linalg.norm(data, axis=1, keepdims=True)
+
+    # Compute pairwise cosine similarity
+    similarity_matrix = np.dot(norm_data, norm_data.T)
+
+    return similarity_matrix
diff --git a/metaspace_converter/constants.py b/metaspace_converter/constants.py
@@ -28,3 +28,5 @@ class COL:
 XY: Final = (X, Y)
 YX: Final = (Y, X)
 YXC: Final = (Y, X, C)
+
+COLOCALIZATION = "colocalization"
diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py
@@ -0,0 +1,118 @@
+from typing import TYPE_CHECKING, Tuple
+
+import numpy as np
+import pandas as pd
+import pytest
+from anndata import AnnData
+
+if TYPE_CHECKING:
+    import _pytest.fixtures
+
+from metaspace_converter import anndata_to_image_array
+from metaspace_converter.colocalization import colocalization, coloc_ml_preprocessing
+from metaspace_converter.constants import COL, COLOCALIZATION, METASPACE_KEY, X, Y
+from metaspace_converter.to_anndata import all_image_pixel_coordinates
+
+
+@pytest.fixture
+def adata_dummy(request: "_pytest.fixtures.SubRequest") -> Tuple[AnnData, float]:
+    num_ions = request.param.get("num_ions", 0)
+    height = request.param.get("height", 0)
+    width = request.param.get("width", 0)
+    quantile_threshold = request.param.get("quantile_threshold", 0)
+    shape = (num_ions, height, width)
+    # Create a stack of ion images where every pixel has a different value
+    ion_images_stack = np.arange(np.prod(shape)).reshape(shape)
+    coordinates = all_image_pixel_coordinates(shape[1:])
+    # Create an AnnData with pixel coordinates
+    obs = pd.DataFrame(
+        {COL.ion_image_pixel_y: coordinates[:, 0], COL.ion_image_pixel_x: coordinates[:, 1]}
+    )
+    adata = AnnData(
+        X=ion_images_stack.reshape((height * width, num_ions)),
+        obs=obs,
+        uns={METASPACE_KEY: {"image_size": {X: width, Y: height}}},
+    )
+    return adata, quantile_threshold
+
+
+@pytest.mark.parametrize(
+    "adata_dummy",
+    [
+        dict(num_ions=5, height=10, width=10, quantile_threshold=0),
+        dict(num_ions=0, height=10, width=10, quantile_threshold=0),
+        dict(num_ions=5, height=3, width=4, quantile_threshold=0),
+        dict(num_ions=5, height=3, width=4, quantile_threshold=0.5)
+    ],
+    indirect=["adata_dummy"],
+)
+def test_colocml_preprocessing(adata_dummy):
+
+    COLOCML_LAYER = "coloc_ml_preprocessing"
+
+    adata, quantile_threshold = adata_dummy
+
+    if adata.X.shape[1] == 0:
+        with pytest.raises(ValueError) as e_info:
+            coloc_ml_preprocessing(
+                adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, 
+                layer=COLOCML_LAYER
+            )
+    else:
+
+        # Quantile thresholding and median filtering is tested separately
+        if quantile_threshold == 0:
+
+        # Test median filtering
+            coloc_ml_preprocessing(
+                adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, 
+                layer=COLOCML_LAYER
+            )
+
+            actual = anndata_to_image_array(adata)
+
+            # Layer exists
+            assert COLOCML_LAYER in adata.layers.keys()
+
+            # Layer sizes match
+            assert adata.X.shape == adata.layers[COLOCML_LAYER].shape
+
+            # median filter
+            # Probe a single pixel of the resulting array and compute the expected median filter.
+            x = 1
+            y = 1
+            width = adata.uns[METASPACE_KEY]["image_size"][X]
+            print(actual[:, y - 1:y + 2, x - 1:x + 2])
+            expected_pixel_features = np.median(actual[:, y - 1:y + 2, x - 1:x + 2], axis=[1, 2])
+            actual_pixel_features = adata.layers[COLOCML_LAYER][y * width + x, :]
+            np.testing.assert_allclose(actual_pixel_features, expected_pixel_features)
+
+        # Quantile thresholding
+        else:
+            coloc_ml_preprocessing(
+                adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, 
+                layer=COLOCML_LAYER
+            )
+            quantile_value = quantile_threshold * adata.X.shape[0]
+            actual_thresholded = np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0)
+            assert all(actual_thresholded <= quantile_value)
+
+
+
+
+
+def test_colocalization():
+
+    arr = np.array([[0.0, 1.0, 0.0], [0.0, 2.0, 0.0], [1.0, 0.0, 0.0]]).transpose()
+
+    expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+
+    adata = AnnData(X=arr)
+
+    colocalization(adata, layer=None)
+
+    assert COLOCALIZATION in adata.varp.keys()
+
+    coloc = adata.varp[COLOCALIZATION]
+
+    assert np.all(coloc == expected)
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "metaspace-converter"
-version = "1.0.1"
+version = "1.1.0"
 authors = [
     {name = "Tim Daniel Rose", email = "[email protected]"},
     {name = "Andreas Eisenbarth", email = "[email protected]"},