Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modification of io/basic.py and add tl/add_shapes.py #3

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ dependencies = [
"scvi-tools",
"pydeseq2",
"decoupler",
"squidpy",
"spatialdata",
"spatialdata-io",
"spatialdata-plot",
#"napari-spatialdata",
"squidpy",
"adjustText",
#"torch==1.13.1",
#"adjustText",

# for debug logging (referenced from the issue template)
"session-info"
Expand Down
70 changes: 44 additions & 26 deletions src/scispy/io/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def load_merscope(
vpt_outputs: str = None,
region_name: str = "region_0",
z_layers: int = 2,
feature_key: str = "gene",
) -> sd.SpatialData:
"""Load vizgen merscope data as SpatialData object

Expand All @@ -24,22 +25,25 @@ def load_merscope(
slide_name id.
z_layers
z layers to load.

feature_key
default column for feature name in transcripts.

Returns
-------
SpatialData object
"""
sdata = spatialdata_io.merscope(
path=path, vpt_outputs=vpt_outputs, region_name=region_name, slide_name=slide_name, z_layers=z_layers
path=path, vpt_outputs=vpt_outputs, region_name=region_name,
slide_name=slide_name, z_layers=z_layers
)

sdata.table.obs_names.name = None
sdata['table'].obs_names.name = None
sdata['table'].layers["counts"] = sdata['table'].X.copy()
sdata['table'].uns["spatialdata_attrs"]["feature_key"] = feature_key

# if not sdata.locate_element(sdata.table.uns["spatialdata_attrs"]["region"]) == []:
# sdata[sdata.table.uns["spatialdata_attrs"]["region"]].index.name = None

sdata.table.uns["spatialdata_attrs"]["feature_key"] = "gene"


# Transform coordinates to mosaic pixel coordinates
# transformation_matrix = pd.read_csv(
# path + "/images/micron_to_mosaic_pixel_transform.csv", header=None, sep=" "
Expand All @@ -56,7 +60,6 @@ def load_merscope(
# coord_microns = sdata.table[["center_x", "center_y"]].to_numpy()
# sdata.table.obsm={"microns": coord_microns, "pixels": coord_pixels},

sdata.table.layers["counts"] = sdata.table.X.copy()
# percent_in_cell = sdata.table.obs.n_Counts.sum(axis=0) * 100 / len(sdata[key + '_transcripts'])
# print("\n" + slide_name)
# print("total cells=", sdata.table.obs.shape[0])
Expand All @@ -70,33 +73,40 @@ def load_merscope(

def load_xenium(
path: str,
index_table: bool = True,
region: str = "cell_boundaries",
feature_key: str = "feature_name",
n_jobs: int = 1,
) -> sd.SpatialData:
"""Load xenium data as SpatialData object

Parameters
----------
path
path to folder.
index_table
rename the index in table.obs with the cell_id
region
default shape element for region in table.obs.

feature_key
default column for feature name in transcripts.
n_jobs
number of jobs to load the xenium object
Returns
-------
SpatialData object
"""
sdata = spatialdata_io.xenium(path)
sdata.table.layers["counts"] = sdata.table.X.copy()
df = pd.DataFrame(sdata.table.obsm["spatial"])
df.columns = ["center_x", "center_y"]
sdata.table.obs[["center_x", "center_y"]] = df

sdata.table.obs.region = region
sdata.table.uns["spatialdata_attrs"]["region"] = region
sdata.table.uns["spatialdata_attrs"]["feature_key"] = "feature_name"

# sdata.table.obs = sdata.table.obs.set_index(sdata.table.obs.cell_id)
# sdata.table.obs.index.name= None
sdata = spatialdata_io.xenium(path, n_jobs = n_jobs)
sdata['table'].layers["counts"] = sdata['table'].X.copy()
sdata['table'].obs[["center_x", "center_y"]] = sdata['table'].obsm["spatial"]

sdata['table'].obs["region"] = region
sdata['table'].uns["spatialdata_attrs"]["region"] = region
sdata['table'].uns["spatialdata_attrs"]["feature_key"] = feature_key

if index_table:
sdata['table'].obs.index = sdata['table'].obs['cell_id']
sdata['table'].obs.index.name = None
# sdata.table.obs_names.name = None
# sdata['cell_circles'].index.name = None
# sdata['cell_boundaries'].index.name = None
Expand All @@ -108,6 +118,7 @@ def load_xenium(
def load_cosmx(
path: str,
dataset_id: str = "R5941_ColonTMA",
feature_key: str = "target",
) -> sd.SpatialData:
"""Load cosmx data as SpatialData object

Expand All @@ -117,17 +128,24 @@ def load_cosmx(
path to folder.
dataset_id
dataset_id.
feature_key
default column for feature name in transcripts.

Returns
-------
SpatialData object
"""
sdata = spatialdata_io.cosmx(path, dataset_id=dataset_id, transcripts=True)

sdata.table.layers["counts"] = sdata.table.X.copy()
df = pd.DataFrame(sdata.table.obsm["spatial"])
df.columns = ["center_x", "center_y"]
sdata.table.obs[["center_x", "center_y"]] = df
sdata.table.uns["spatialdata_attrs"]["feature_key"] = "target"
sdata['table'].layers["counts"] = sdata['table'].X.copy()
sdata['table'].obs[["center_x", "center_y"]] = sdata.['table'].obsm["spatial"]
sdata['table'].uns["spatialdata_attrs"]["feature_key"] = feature_key
# sdata.table.uns["spatialdata_attrs"]["region"] = region

return sdata


# make a class load_data
# parameter -> techno = xenium merscope or cosmx
# xenium = {"function" : spatialdata_io.xenium(path, n_jobs = n_jobs),
# "region" : region,
# "feature_key" : "feature_name"}
8 changes: 7 additions & 1 deletion src/scispy/tl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
from .basic import (
add_shapes_from_hdf5,
add_to_points,
add_to_shapes,
# add_to_shapes,
get_sdata_polygon,
prep_pseudobulk,
run_pseudobulk,
sdata_querybox,
sdata_rotate,
)

from .shapes import (
add_to_shapes,
shapes_of_cell_type
)

__all__ = [
"add_shapes_from_hdf5",
"add_to_points",
Expand All @@ -18,4 +23,5 @@
"run_pseudobulk",
"sdata_rotate",
"sdata_querybox",
"shapes_of_cell_type",
]
130 changes: 130 additions & 0 deletions src/scispy/tl/shapes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import spatialdata as sd
import pandas as pd
import geopandas as gpd
from spatialdata.models import PointsModel, ShapesModel
from shapely.geometry import Polygon
from shapely import count_coordinates, get_coordinates, affinity
from spatialdata.transformations import Identity
import shapely

def add_to_shapes(
sdata: sd.SpatialData,
shape_file: str,
shape_key: str = "myshapes",
scale_factor: float = 0.50825, # if shapes comes from xenium explorer
target_coordinates: str = "microns",
**kwargs,
):
"""Add shape element to SpatialData.

Parameters
----------
sdata
SpatialData object.
shape_file
coordinates.csv file from xenium explorer (region = "normal_1")
# vi coordinates.csv -> remove 2 first # lines
# dos2unix coordinates.csv
shape_key
key of element shape
scale_factor
scale factor conversion applied to x and y coordinates for real micron coordinates
target_coordinates
target_coordinates system

"""
if shape_key in list(sdata.shapes.keys()):
print(f'Shape "{shape_key}" is already present in the object.')
return

d = {"geometry": [], "name": []}
df = pd.read_csv(shape_file, **kwargs)
# if target_coordinates == 'global':
# print(f'Convert shape in micron to pixels with a pixel size of : {pixel_size}')
# df[['X', 'Y']] = df[['X', 'Y']] / pixel_size

for name, group in df.groupby("Selection"):
if len(group) >= 3:
poly = shapely.Polygon(zip(group.X, group.Y))
d["geometry"].append(poly)
d["name"].append(name)
else:
print("Shape with less than 3 points")

gdf = gpd.GeoDataFrame(d)

# scale because it comes from the xenium explorer !!!
gdf.geometry = gdf.geometry.scale(
xfact=scale_factor, yfact=scale_factor, origin=(0, 0)
)

# substract the initial image offset (x,y)
image_object_key = list(sdata.images.keys())[0]
matrix = sd.transformations.get_transformation(
sdata[image_object_key], target_coordinates
).to_affine_matrix(input_axes=["x", "y"], output_axes=["x", "y"])
x_translation = matrix[0][2]
y_translation = matrix[1][2]
gdf.geometry = gdf.geometry.apply(
affinity.translate, xoff=x_translation, yoff=y_translation
)

sdata.shapes[shape_key] = ShapesModel.parse(
gdf, transformations ={target_coordinates: Identity()}
)
print(f"New shape added : '{shape_key}'")
return




def shapes_of_cell_type(
sdata: sd.SpatialData,
celltype: str,
obs_key: str = "celltype_spatial",
shape_key: str = "myshapes",
) -> list:
"""Extract shapes from a celltype. First step for the mean shape.

Parameters
----------
sdata
SpatialData object.
celltype
name of the cell type we want to extract
obs_key
name of column where cell type can be found in sdata.table.obs
shape_key
key of element shape

Returns
-------
List of boundary coordinates for each cell.
"""
# Extract cell shapes of the defined cell type
idx_cells = sdata.table.obs[sdata.table.obs[obs_key] == celltype].index
gdf_shapes_cells = sdata[shape_key].loc[idx_cells]

if len(gdf_shapes_cells["geometry"].geom_type.unique()) != 1:
print("Geometry type is not unique !!!")

# Extract the x and y coordinates of each shape
shapes_coordinates = (
gdf_shapes_cells["geometry"].apply(lambda x: get_coordinates(x)).to_list()
)

# OLD extract
# shapes_coordinates = []
# for shape in gdf_shapes_cells["geometry"]:
# if shape.geom_type == "Polygon":
# # coordinates = get_coordinates(shape).tolist()
# coordinates = list(shape.exterior.coords)
# shapes_coordinates.append(coordinates)
# elif shape.geom_type == "MultiPolygon":
# print("MultiPolygon")
# for polygon in shape:
# # coordinates = shapely.get_coordinates(polygon).tolist()
# coordinates = list(polygon.exterior.coords)
# shapes_coordinates.append(coordinates)

return shapes_coordinates
Loading