From 3968687cea8a1a36f2f05a9e10eec9ea9f481395 Mon Sep 17 00:00:00 2001 From: tdrose Date: Tue, 19 Dec 2023 17:02:26 +0100 Subject: [PATCH] Documentation for coloc module --- README.md | 6 +- docs/examples.rst | 37 ++++++++++ metaspace_converter/colocalization.py | 23 ++++-- metaspace_converter/constants.py | 2 + .../tests/colocalization_test.py | 73 +++++++++++-------- 5 files changed, 102 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 6054664..e902323 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ of many packages of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) su [squidpy](https://squidpy.readthedocs.io/en/stable/index.html) for spatial omics analysis. Another supported format that is part of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) -is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. +is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. This enables users to easily align and integrate METASPACE datasets to other spatial omics modalities. +Additionally, the commonly used colocalization analysis for spatial metabolomics can be performed with the package. + If you encounter any bugs or have suggestions for new features, please open an issue in the [GitHub repository](https://github.com/metaspace2020/metaspace-converter). @@ -28,6 +30,7 @@ If you encounter any bugs or have suggestions for new features, please open an i Our package requires `python >= 3.9`. You can install the package directly from [PyPI](https://pypi.org/project/metaspace-converter/): + ```bash pip install metaspace-converter ``` @@ -161,7 +164,6 @@ sdata.points["maldi_points"] = sdata.transform_element_to_coordinate_system( Unless specified otherwise in file headers or LICENSE files present in subdirectories, all files are licensed under the [Apache 2.0 license](https://github.com/metaspace2020/metaspace/blob/master/LICENSE). - [badge-docs]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/docs.yml?label=documentation [badge-pypi]: https://img.shields.io/pypi/v/metaspace-converter [badge-tests]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/tests.yml?branch=master&label=tests diff --git a/docs/examples.rst b/docs/examples.rst index 2a4dfae..14a0cf4 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -114,3 +114,40 @@ Here using a reversed colormap which better represents intense values on bright .. image:: ./_static/img/example_img_sd.png :alt: Visualization with SpatialData + + +Colocalization Analysis +----------------------- + +A popular type of analysis for imaging Mass Spectrometry data or spatial metabolomics +is colocalization analysis. +In the `ColocML`_ publication, +the authors evaluated data processing and colocalization metrics. +Their best method (median filtering, quantile thresholding, and cosine similarity), +can be executed with our package: + +.. testcode:: + + from metaspace_converter import metaspace_to_anndata, colocalization + + # Download data + adata = metaspace_to_anndata(dataset_id="2023-11-14_21h58m39s", fdr=0.1) + + # Perform median filtering and quantile thresholding + # The processed data is saved as a layer `adata.layers["colocml_preprocessing"]` + # It has the same dimensions as `adata.X` + colocalization.colocML_preprocessing(adata, layer="colocml_preprocessing") + + # Compute the pairwise colocalization metrix between all ion images + # As an input, the processed data from `adata.layers["colocml_preprocessing"]` is used + # The colocalization matrix is saved in `adata.uns["colocalization"]` + colocalization.colocalization(adata, layer="colocml_preprocessing") + +.. testoutput:: + :hide: + + ... + + + +.. _ColocML: https://doi.org/10.1093/bioinformatics/btaa085 diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py index b476413..56a7c9a 100644 --- a/metaspace_converter/colocalization.py +++ b/metaspace_converter/colocalization.py @@ -5,6 +5,7 @@ from scipy import ndimage from metaspace_converter.anndata_to_array import anndata_to_image_array +from metaspace_converter.constants import COLOCALIZATION def colocML_preprocessing( @@ -40,11 +41,17 @@ def colocML_preprocessing( None. The processed data is saved in ``layer``. If layer is net to None, ``adata.X`` will be overwritten + Raises: + ValueError: If no annotations are available in ``adata.X``. + """ # Extract image array from anndata: imarray = anndata_to_image_array(adata=adata) + if len(imarray) == 0: + raise ValueError("AnnData contains no annotations. ColocML preprocessing cannot be called.") + # Median filtering imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1])) @@ -66,10 +73,10 @@ def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing """ Colocalization of ion images using the cosine similarity metric. - In combination with ``colocML_preprocessing`` this metric performed best in the + In combination with the ``colocML_preprocessing`` function, this metric performed best in the colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). - We recommend to call the the ``colocML_preprocessing`` function beforehand. + It is recommended to call the the ``colocML_preprocessing`` function beforehand. Args: adata: An AnnData object. @@ -78,21 +85,25 @@ def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing data per default in ``adata.layer['colocml_preprocessing']``. Returns: - None. The processed data is saved in ``adata.uns['colocalization]``. + None. The processed data is saved in ``adata.uns['colocalization']``. """ + # Select data if layer == None or layer not in adata.layers.keys(): - data = adata.X.transpose() + data = np.array(adata.X).transpose() else: - data = adata.layers[layer].transpose() + data = np.array(adata.layers[layer]).transpose() + # Compute colocalization coloc = _pairwise_cosine_similarity(data) - adata.uns["colocalization"] = coloc + # Save colocalization + adata.uns[COLOCALIZATION] = coloc def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray: + # Divide image vectors by euclidean norm norm_data = data / np.linalg.norm(data, axis=1, keepdims=True) # Compute pairwise cosine similarity diff --git a/metaspace_converter/constants.py b/metaspace_converter/constants.py index 99bf75e..1d4f4f9 100644 --- a/metaspace_converter/constants.py +++ b/metaspace_converter/constants.py @@ -28,3 +28,5 @@ class COL: XY: Final = (X, Y) YX: Final = (Y, X) YXC: Final = (Y, X, C) + +COLOCALIZATION = "colocalization" diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py index 2fd3e9e..408cb8a 100644 --- a/metaspace_converter/tests/colocalization_test.py +++ b/metaspace_converter/tests/colocalization_test.py @@ -10,7 +10,7 @@ from metaspace_converter import anndata_to_image_array from metaspace_converter.colocalization import colocalization, colocML_preprocessing -from metaspace_converter.constants import COL, METASPACE_KEY, X, Y +from metaspace_converter.constants import COL, COLOCALIZATION, METASPACE_KEY, X, Y from metaspace_converter.to_anndata import all_image_pixel_coordinates @@ -37,60 +37,71 @@ def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: @pytest.mark.parametrize( "adata_dummy", - [dict(num_ions=5, height=10, width=10), dict(num_ions=4, height=4, width=4)], + [ + dict(num_ions=5, height=10, width=10), + dict(num_ions=0, height=2, width=3), + dict(num_ions=4, height=4, width=4), + ], indirect=["adata_dummy"], ) def test_colocml_preprocessing(adata_dummy): + COLOCML_LAYER = "colocml_preprocessing" + adata = adata_dummy - # Test median filtering - colocML_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0, layer="colocml_preprocessing" - ) + if adata.X.shape[1] == 0: + with pytest.raises(ValueError) as e_info: + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER + ) + else: - actual = anndata_to_image_array(adata) + # Test median filtering + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER + ) - # median filter + actual = anndata_to_image_array(adata) - expected_preprocessing = np.median(actual[0][:3, :3]) - observed = adata.layers["colocml_preprocessing"][ - adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0 - ] - assert observed == expected_preprocessing + # median filter - expected_preprocessing = np.median(actual[1][:3, :3]) - observed = adata.layers["colocml_preprocessing"][ - adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1 - ] - assert observed == expected_preprocessing + expected_preprocessing = np.median(actual[0][:3, :3]) + observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0] + assert observed == expected_preprocessing - # Quantile thresholding - adata.X[0].reshape(-1) + expected_preprocessing = np.median(actual[1][:3, :3]) + observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1] + assert observed == expected_preprocessing - colocML_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer="colocml_preprocessing" - ) + # Quantile thresholding + adata.X[0].reshape(-1) + + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer=COLOCML_LAYER + ) - assert all(np.sum(adata.layers["colocml_preprocessing"] == 0, axis=0) <= 0.5 * adata.X.shape[0]) + assert all(np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) <= 0.5 * adata.X.shape[0]) - # Layer exists - assert "colocml_preprocessing" in adata.layers.keys() + # Layer exists + assert COLOCML_LAYER in adata.layers.keys() - # Layer sizes match - assert adata.X.shape == adata.layers["colocml_preprocessing"].shape + # Layer sizes match + assert adata.X.shape == adata.layers[COLOCML_LAYER].shape def test_colocalization(): arr = np.array([[0.0, 1.0, 0.0], [0.0, 2.0, 0.0], [1.0, 0.0, 0.0]]).transpose() + + expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + adata = AnnData(X=arr) colocalization(adata, layer=None) - assert "colocalization" in adata.uns.keys() + assert COLOCALIZATION in adata.uns.keys() - coloc = adata.uns["colocalization"] + coloc = adata.uns[COLOCALIZATION] - expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) assert np.all(coloc == expected)