diff --git a/README.md b/README.md index 6054664..e902323 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ of many packages of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) su [squidpy](https://squidpy.readthedocs.io/en/stable/index.html) for spatial omics analysis. Another supported format that is part of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) -is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. +is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. This enables users to easily align and integrate METASPACE datasets to other spatial omics modalities. +Additionally, the commonly used colocalization analysis for spatial metabolomics can be performed with the package. + If you encounter any bugs or have suggestions for new features, please open an issue in the [GitHub repository](https://github.com/metaspace2020/metaspace-converter). @@ -28,6 +30,7 @@ If you encounter any bugs or have suggestions for new features, please open an i Our package requires `python >= 3.9`. You can install the package directly from [PyPI](https://pypi.org/project/metaspace-converter/): + ```bash pip install metaspace-converter ``` @@ -161,7 +164,6 @@ sdata.points["maldi_points"] = sdata.transform_element_to_coordinate_system( Unless specified otherwise in file headers or LICENSE files present in subdirectories, all files are licensed under the [Apache 2.0 license](https://github.com/metaspace2020/metaspace/blob/master/LICENSE). - [badge-docs]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/docs.yml?label=documentation [badge-pypi]: https://img.shields.io/pypi/v/metaspace-converter [badge-tests]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/tests.yml?branch=master&label=tests diff --git a/docs/examples.rst b/docs/examples.rst index 2a4dfae..d07920e 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -114,3 +114,35 @@ Here using a reversed colormap which better represents intense values on bright .. image:: ./_static/img/example_img_sd.png :alt: Visualization with SpatialData + + +Colocalization Analysis +----------------------- + +A popular type of analysis for imaging Mass Spectrometry data or spatial metabolomics +is colocalization analysis. +In the `ColocML`_ publication, +the authors evaluated data processing and colocalization metrics. +Their best method (median filtering, quantile thresholding, and cosine similarity), +can be executed with our package: + +.. testcode:: + + from metaspace_converter import metaspace_to_anndata, colocalization + + # Download data + adata = metaspace_to_anndata(dataset_id="2023-11-14_21h58m39s", fdr=0.1) + + # Perform median filtering and quantile thresholding + # The processed data is saved as a layer `adata.layers["coloc_ml_preprocessing"]` + # It has the same dimensions as `adata.X` + colocalization.coloc_ml_preprocessing(adata, layer="colocml_preprocessing") + + # Compute the pairwise colocalization matrix between all ion images + # As an input, the processed data from `adata.layers["coloc_ml_preprocessing"]` is used + # The colocalization matrix is saved in `adata.varp["colocalization"]` + colocalization.colocalization(adata, layer="colocml_preprocessing") + + + +.. _ColocML: https://doi.org/10.1093/bioinformatics/btaa085 diff --git a/metaspace_converter/__init__.py b/metaspace_converter/__init__.py index 87c9805..9f5dabe 100644 --- a/metaspace_converter/__init__.py +++ b/metaspace_converter/__init__.py @@ -1,5 +1,11 @@ +from metaspace_converter import colocalization from metaspace_converter.anndata_to_array import anndata_to_image_array from metaspace_converter.to_anndata import metaspace_to_anndata from metaspace_converter.to_spatialdata import metaspace_to_spatialdata -__all__ = ["metaspace_to_anndata", "metaspace_to_spatialdata", "anndata_to_image_array"] +__all__ = [ + "metaspace_to_anndata", + "metaspace_to_spatialdata", + "anndata_to_image_array", + "colocalization", +] diff --git a/metaspace_converter/anndata_to_array.py b/metaspace_converter/anndata_to_array.py index 61d98a2..2399895 100644 --- a/metaspace_converter/anndata_to_array.py +++ b/metaspace_converter/anndata_to_array.py @@ -1,3 +1,4 @@ +from typing import Optional import numpy as np from anndata import AnnData @@ -16,13 +17,15 @@ def _check_pixel_coordinates(adata: AnnData) -> bool: return np.all(np.equal(pixel_list, required_pixels)) -def anndata_to_image_array(adata: AnnData) -> np.ndarray: +def anndata_to_image_array(adata: AnnData, layer: Optional[str]=None) -> np.ndarray: """ Extracts an array of ion images from an AnnData object (that has been generated through the ``metaspace_to_anndata`` function). Args: adata: An AnnData object. + layer: ``AnnData.layer`` that should be extracted to an image array. + Default is None, which means that ``adata.X`` will be used. Returns: A three-dimensional Numpy array in the following shape @@ -36,8 +39,13 @@ def anndata_to_image_array(adata: AnnData) -> np.ndarray: E.g. Pixel have been removed/added and the number of pixels and their coordinates do not match the original image dimensions. """ - - pixel_array = adata.X.transpose().copy() + if layer is None: + pixel_array = np.array(adata.X).transpose().copy() + elif layer in adata.layers.keys(): + pixel_array = np.array(adata.layers[layer]).transpose().copy() + else: + raise ValueError(f"Layer `{layer}` not found in adata.layers.") + img_size = adata.uns[METASPACE_KEY]["image_size"] # Check if image dimensions are okay diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py new file mode 100644 index 0000000..8230390 --- /dev/null +++ b/metaspace_converter/colocalization.py @@ -0,0 +1,117 @@ +from typing import Optional, Tuple + +import numpy as np +from anndata import AnnData +from scipy import ndimage + +from metaspace_converter.anndata_to_array import anndata_to_image_array +from metaspace_converter.constants import COLOCALIZATION + + +def coloc_ml_preprocessing( + adata: AnnData, + layer: Optional[str] = "coloc_ml_preprocessing", + median_filter_size: Tuple[int, int] = (3, 3), + quantile_threshold: float = 0.5, +): + """ + Preprocessing for colocalization analysis according to the colocML publication + (https://doi.org/10.1093/bioinformatics/btaa085). + + In the publication the authors evaluated colocalization metrics and preprocessing approaches. + They found the best performance for + 1) median filtering of ion images with a (3, 3) kernel size and + 2) quantile thresholding ad 50%, meaning all pixels with intensities below the 50% + quantile set to 0. + + This function performs the same preprocessing steps. + Recommended for call before running the ``colocalization`` function. + + Args: + adata: An AnnData object. + layer: Key for the adata.layers dict in which the processed data will be saved. + Default value is ``colocml_preprocessing``. + If None, ``adata.X`` will be overwritten with the processed data. + median_filter_size: 2-dimensional filter size for the median filtering that + will be performed per ion image. + quantile_threshold: Float between 0 and 1. The function computes the quantile value per + ion image and all pixels below the quantile threshold will be set to 0. + + Returns: + None. The processed data is saved in ``layer``. If layer is set to None, ``adata.X`` will + be overwritten + + Raises: + ValueError: If no annotations are available in ``adata.X``. + + """ + + # Extract image array from anndata: + imarray = anndata_to_image_array(adata=adata) + + if len(imarray) == 0: + raise ValueError("AnnData contains no annotations. ColocML preprocessing cannot be called.") + + # Median filtering + imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1])) + + # reshaping + imarray = imarray.reshape((imarray.shape[0], -1)) + + # Quantile thresholding + mask = imarray < np.percentile(imarray, q=quantile_threshold * 100, axis=1)[:, np.newaxis] + imarray[mask] = 0 + + # Inserting of processed data into adata object + if layer is None: + adata.X = imarray.transpose() + else: + adata.layers[layer] = imarray.transpose() + + +def colocalization(adata: AnnData, layer: Optional[str] = "coloc_ml_preprocessing"): + """ + Colocalization of ion images using the cosine similarity metric. + + In combination with the ``colocML_preprocessing`` function, this metric performed best in the + colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). + + It is recommended to call the the ``coloc_ml_preprocessing`` function beforehand. + + Args: + adata: An AnnData object. + layer: Key for ``adata.layer`` from which the ionimage_data for preprocessing taken. + If ``None``, ``adata.X`` is used. ``coloc_ml_preprocessing`` will save the preprocessed + data per default in ``adata.layer['coloc_ml_preprocessing']``. + + Returns: + None. The processed data is saved in ``adata.varp['colocalization']``. + + Raises: + ValueError: If layer is not found in adata.layers. + """ + + # Select data + if layer is None: + data = np.array(adata.X).transpose() + elif layer in adata.layers.keys(): + data = np.array(adata.layers[layer]).transpose() + else: + raise ValueError(f"Layer `{layer}` not found in adata.layers.") + + # Compute colocalization + coloc = _pairwise_cosine_similarity(data) + + # Save colocalization + adata.varp[COLOCALIZATION] = coloc + + +def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray: + + # Divide image vectors by euclidean norm + norm_data = data / np.linalg.norm(data, axis=1, keepdims=True) + + # Compute pairwise cosine similarity + similarity_matrix = np.dot(norm_data, norm_data.T) + + return similarity_matrix diff --git a/metaspace_converter/constants.py b/metaspace_converter/constants.py index 99bf75e..1d4f4f9 100644 --- a/metaspace_converter/constants.py +++ b/metaspace_converter/constants.py @@ -28,3 +28,5 @@ class COL: XY: Final = (X, Y) YX: Final = (Y, X) YXC: Final = (Y, X, C) + +COLOCALIZATION = "colocalization" diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py new file mode 100644 index 0000000..fca5fd5 --- /dev/null +++ b/metaspace_converter/tests/colocalization_test.py @@ -0,0 +1,118 @@ +from typing import TYPE_CHECKING, Tuple + +import numpy as np +import pandas as pd +import pytest +from anndata import AnnData + +if TYPE_CHECKING: + import _pytest.fixtures + +from metaspace_converter import anndata_to_image_array +from metaspace_converter.colocalization import colocalization, coloc_ml_preprocessing +from metaspace_converter.constants import COL, COLOCALIZATION, METASPACE_KEY, X, Y +from metaspace_converter.to_anndata import all_image_pixel_coordinates + + +@pytest.fixture +def adata_dummy(request: "_pytest.fixtures.SubRequest") -> Tuple[AnnData, float]: + num_ions = request.param.get("num_ions", 0) + height = request.param.get("height", 0) + width = request.param.get("width", 0) + quantile_threshold = request.param.get("quantile_threshold", 0) + shape = (num_ions, height, width) + # Create a stack of ion images where every pixel has a different value + ion_images_stack = np.arange(np.prod(shape)).reshape(shape) + coordinates = all_image_pixel_coordinates(shape[1:]) + # Create an AnnData with pixel coordinates + obs = pd.DataFrame( + {COL.ion_image_pixel_y: coordinates[:, 0], COL.ion_image_pixel_x: coordinates[:, 1]} + ) + adata = AnnData( + X=ion_images_stack.reshape((height * width, num_ions)), + obs=obs, + uns={METASPACE_KEY: {"image_size": {X: width, Y: height}}}, + ) + return adata, quantile_threshold + + +@pytest.mark.parametrize( + "adata_dummy", + [ + dict(num_ions=5, height=10, width=10, quantile_threshold=0), + dict(num_ions=0, height=10, width=10, quantile_threshold=0), + dict(num_ions=5, height=3, width=4, quantile_threshold=0), + dict(num_ions=5, height=3, width=4, quantile_threshold=0.5) + ], + indirect=["adata_dummy"], +) +def test_colocml_preprocessing(adata_dummy): + + COLOCML_LAYER = "coloc_ml_preprocessing" + + adata, quantile_threshold = adata_dummy + + if adata.X.shape[1] == 0: + with pytest.raises(ValueError) as e_info: + coloc_ml_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, + layer=COLOCML_LAYER + ) + else: + + # Quantile thresholding and median filtering is tested separately + if quantile_threshold == 0: + + # Test median filtering + coloc_ml_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, + layer=COLOCML_LAYER + ) + + actual = anndata_to_image_array(adata) + + # Layer exists + assert COLOCML_LAYER in adata.layers.keys() + + # Layer sizes match + assert adata.X.shape == adata.layers[COLOCML_LAYER].shape + + # median filter + # Probe a single pixel of the resulting array and compute the expected median filter. + x = 1 + y = 1 + width = adata.uns[METASPACE_KEY]["image_size"][X] + print(actual[:, y - 1:y + 2, x - 1:x + 2]) + expected_pixel_features = np.median(actual[:, y - 1:y + 2, x - 1:x + 2], axis=[1, 2]) + actual_pixel_features = adata.layers[COLOCML_LAYER][y * width + x, :] + np.testing.assert_allclose(actual_pixel_features, expected_pixel_features) + + # Quantile thresholding + else: + coloc_ml_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, + layer=COLOCML_LAYER + ) + quantile_value = quantile_threshold * adata.X.shape[0] + actual_thresholded = np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) + assert all(actual_thresholded <= quantile_value) + + + + + +def test_colocalization(): + + arr = np.array([[0.0, 1.0, 0.0], [0.0, 2.0, 0.0], [1.0, 0.0, 0.0]]).transpose() + + expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + + adata = AnnData(X=arr) + + colocalization(adata, layer=None) + + assert COLOCALIZATION in adata.varp.keys() + + coloc = adata.varp[COLOCALIZATION] + + assert np.all(coloc == expected) diff --git a/pyproject.toml b/pyproject.toml index 8f1dbf6..860d151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "metaspace-converter" -version = "1.0.1" +version = "1.1.0" authors = [ {name = "Tim Daniel Rose", email = "tim.rose@embl.de"}, {name = "Andreas Eisenbarth", email = "andreas.eisenbarth@embl.de"},