Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix annotations mismatch for neutral loss #22

Merged
merged 4 commits into from
Mar 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions metaspace_converter/tests/to_anndata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,14 @@ def sm(metaspace_credentials) -> SMInstance:
@pytest.mark.parametrize(
("dataset_id", "database", "fdr", "metadata_as_obs", "add_optical_image"),
[
# Just downloading, metadata as uns
("2021-09-03_11h43m13s", ("CoreMetabolome", "v3"), 0.1, False, False),
aeisenbarth marked this conversation as resolved.
Show resolved Hide resolved
# Metadata as obs
("2021-09-03_11h43m13s", ("CoreMetabolome", "v3"), 0.1, True, False),
# Add optical image for SquidPy
("2021-09-03_11h43m13s", ("CoreMetabolome", "v3"), 0.1, False, True),
# Dataset with custom database, neutral losses
("2022-11-18_16h40m47s", ("AE_spacem_tests", "v1"), 0.5, False, False),
],
)
def test_metaspace_to_anndata(
Expand All @@ -56,6 +61,8 @@ def test_metaspace_to_anndata(
dataset = sm.dataset(id=dataset_id)
assert actual.n_obs == np.prod(get_ion_image_shape(dataset))
assert actual.n_vars == len(dataset.annotations(fdr=fdr, database=database))
assert actual.obs_names.is_unique
assert actual.var_names.is_unique
assert {
COL.ion_image_shape_y,
COL.ion_image_shape_x,
Expand Down
40 changes: 28 additions & 12 deletions metaspace_converter/to_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,13 @@ def metaspace_to_anndata(
database = DEFAULT_DATABASE

# Download annotations
annotations = dataset.results(database=database, fdr=fdr, **annotation_filter)
annotations = dataset.results(
database=database,
fdr=fdr,
include_chem_mods=True,
aeisenbarth marked this conversation as resolved.
Show resolved Hide resolved
include_neutral_losses=True,
**annotation_filter,
)
annotations = _add_annotations_index(annotations, index_name=VAR_INDEX_NAME)
annotations = _normalize_annotations_for_serialization(annotations)

Expand All @@ -111,8 +117,8 @@ def metaspace_to_anndata(
)
assert len(annotations) == len(isotope_images)

# Sort them matching the annotations.
isotope_images = _sort_isotope_images_like(isotope_images, annotations.index)
# Sort isotope images to match the annotations.
isotope_images = _sort_isotope_images_like(isotope_images, annotations)

# Create X matrix (all ion pixels flattened to primary axis)
shape = get_ion_image_shape(dataset)
Expand Down Expand Up @@ -146,13 +152,20 @@ def metaspace_to_anndata(
return adata


def create_annotation_id(formula: str, adduct: str) -> str:
return f"{formula}{adduct}"
def create_annotation_id(
formula: str, adduct: str, chem_mod: str = "", neutral_loss: str = ""
) -> str:
return f"{formula}{adduct}{chem_mod}{neutral_loss}"


def _add_annotations_index(df: pd.DataFrame, index_name: str = VAR_INDEX_NAME) -> pd.DataFrame:
df = df.reset_index()
df[index_name] = df.apply(lambda row: create_annotation_id(row.formula, row.adduct), axis=1)
df[index_name] = df.apply(
lambda row: create_annotation_id(
row.formula, row.adduct, getattr(row, "chemMod", ""), getattr(row, "neutralLoss", "")
),
axis=1,
)
return df.set_index(index_name)


Expand Down Expand Up @@ -201,14 +214,17 @@ def get_ion_image_shape(


def _sort_isotope_images_like(
isotope_images: list[IsotopeImages], index: pd.Index
isotope_images: list[IsotopeImages], df: pd.DataFrame
) -> list[IsotopeImages]:
images_dict = {}
for isotope_image in isotope_images:
annotation_id = create_annotation_id(isotope_image.formula, isotope_image.adduct)
images_dict[annotation_id] = isotope_image
images_dict = {
(img.formula, img.adduct, img.chem_mod, img.neutral_loss): img for img in isotope_images
}
# Return them in the requested order.
return [images_dict[key] for key in index]
# Note: pd.DataFrame.itertuples yields NamedTuple and is faster than iterrows.
return [
images_dict[(row.formula, row.adduct, row.chemMod, row.neutralLoss)]
for row in df.itertuples(index=False)
]


def _create_anndata_x(isotope_images: list[IsotopeImages], shape: Shape2d) -> np.ndarray:
Expand Down
Loading