cobioda · MorganeFi · Aug 7, 2024 · Apr 23, 2024 · Aug 7, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,13 +22,13 @@ dependencies = [
     "scvi-tools",
     "pydeseq2",
     "decoupler",
+    "squidpy",
     "spatialdata",
     "spatialdata-io",
     "spatialdata-plot",
     #"napari-spatialdata",
-    "squidpy",
-    "adjustText",
     #"torch==1.13.1",
+    #"adjustText",
 
     # for debug logging (referenced from the issue template)
     "session-info"

diff --git a/src/scispy/io/basic.py b/src/scispy/io/basic.py
@@ -9,6 +9,7 @@ def load_merscope(
     vpt_outputs: str = None,
     region_name: str = "region_0",
     z_layers: int = 2,
+    feature_key: str = "gene",
 ) -> sd.SpatialData:
     """Load vizgen merscope data as SpatialData object
 
@@ -24,22 +25,25 @@ def load_merscope(
         slide_name id.
     z_layers
         z layers to load.
-
+    feature_key
+        default column for feature name in transcripts.
+
     Returns
     -------
     SpatialData object
     """
     sdata = spatialdata_io.merscope(
-        path=path, vpt_outputs=vpt_outputs, region_name=region_name, slide_name=slide_name, z_layers=z_layers
+        path=path, vpt_outputs=vpt_outputs, region_name=region_name, 
+        slide_name=slide_name, z_layers=z_layers
     )
 
-    sdata.table.obs_names.name = None
+    sdata['table'].obs_names.name = None
+    sdata['table'].layers["counts"] = sdata['table'].X.copy()
+    sdata['table'].uns["spatialdata_attrs"]["feature_key"] = feature_key
 
     # if not sdata.locate_element(sdata.table.uns["spatialdata_attrs"]["region"]) == []:
     #    sdata[sdata.table.uns["spatialdata_attrs"]["region"]].index.name = None
-
-    sdata.table.uns["spatialdata_attrs"]["feature_key"] = "gene"
-
+
     # Transform coordinates to mosaic pixel coordinates
     # transformation_matrix = pd.read_csv(
     #    path + "/images/micron_to_mosaic_pixel_transform.csv", header=None, sep=" "
@@ -56,7 +60,6 @@ def load_merscope(
     # coord_microns = sdata.table[["center_x", "center_y"]].to_numpy()
     # sdata.table.obsm={"microns": coord_microns, "pixels": coord_pixels},
 
-    sdata.table.layers["counts"] = sdata.table.X.copy()
     # percent_in_cell = sdata.table.obs.n_Counts.sum(axis=0) * 100 / len(sdata[key + '_transcripts'])
     # print("\n" + slide_name)
     # print("total cells=", sdata.table.obs.shape[0])
@@ -70,33 +73,40 @@ def load_merscope(
 
 def load_xenium(
     path: str,
+    index_table: bool = True,
     region: str = "cell_boundaries",
+    feature_key: str = "feature_name",
+    n_jobs: int = 1,
 ) -> sd.SpatialData:
     """Load xenium data as SpatialData object
 
     Parameters
     ----------
     path
         path to folder.
+    index_table
+        rename the index in table.obs with the cell_id
     region
         default shape element for region in table.obs.
-
+    feature_key
+        default column for feature name in transcripts.
+    n_jobs
+        number of jobs to load the xenium object
     Returns
     -------
     SpatialData object
     """
-    sdata = spatialdata_io.xenium(path)
-    sdata.table.layers["counts"] = sdata.table.X.copy()
-    df = pd.DataFrame(sdata.table.obsm["spatial"])
-    df.columns = ["center_x", "center_y"]
-    sdata.table.obs[["center_x", "center_y"]] = df
-
-    sdata.table.obs.region = region
-    sdata.table.uns["spatialdata_attrs"]["region"] = region
-    sdata.table.uns["spatialdata_attrs"]["feature_key"] = "feature_name"
-
-    # sdata.table.obs = sdata.table.obs.set_index(sdata.table.obs.cell_id)
-    # sdata.table.obs.index.name= None
+    sdata = spatialdata_io.xenium(path, n_jobs = n_jobs)
+    sdata['table'].layers["counts"] = sdata['table'].X.copy()
+    sdata['table'].obs[["center_x", "center_y"]] = sdata['table'].obsm["spatial"]
+
+    sdata['table'].obs["region"] = region
+    sdata['table'].uns["spatialdata_attrs"]["region"] = region
+    sdata['table'].uns["spatialdata_attrs"]["feature_key"] = feature_key
+
+    if index_table:
+        sdata['table'].obs.index = sdata['table'].obs['cell_id']
+        sdata['table'].obs.index.name = None
     # sdata.table.obs_names.name = None
     # sdata['cell_circles'].index.name = None
     # sdata['cell_boundaries'].index.name = None
@@ -108,6 +118,7 @@ def load_xenium(
 def load_cosmx(
     path: str,
     dataset_id: str = "R5941_ColonTMA",
+    feature_key: str = "target",
 ) -> sd.SpatialData:
     """Load cosmx data as SpatialData object
 
@@ -117,17 +128,24 @@ def load_cosmx(
         path to folder.
     dataset_id
         dataset_id.
+    feature_key
+        default column for feature name in transcripts.
 
     Returns
     -------
     SpatialData object
     """
     sdata = spatialdata_io.cosmx(path, dataset_id=dataset_id, transcripts=True)
-
-    sdata.table.layers["counts"] = sdata.table.X.copy()
-    df = pd.DataFrame(sdata.table.obsm["spatial"])
-    df.columns = ["center_x", "center_y"]
-    sdata.table.obs[["center_x", "center_y"]] = df
-    sdata.table.uns["spatialdata_attrs"]["feature_key"] = "target"
+    sdata['table'].layers["counts"] = sdata['table'].X.copy()
+    sdata['table'].obs[["center_x", "center_y"]] = sdata.['table'].obsm["spatial"]
+    sdata['table'].uns["spatialdata_attrs"]["feature_key"] = feature_key
+    # sdata.table.uns["spatialdata_attrs"]["region"] = region
 
     return sdata
+
+
+# make a class load_data
+# parameter -> techno = xenium merscope or cosmx
+# xenium = {"function" : spatialdata_io.xenium(path, n_jobs = n_jobs),
+#           "region" : region,
+#           "feature_key" : "feature_name"}
diff --git a/src/scispy/tl/__init__.py b/src/scispy/tl/__init__.py
@@ -1,14 +1,19 @@
 from .basic import (
     add_shapes_from_hdf5,
     add_to_points,
-    add_to_shapes,
+    # add_to_shapes,
     get_sdata_polygon,
     prep_pseudobulk,
     run_pseudobulk,
     sdata_querybox,
     sdata_rotate,
 )
 
+from .shapes import (
+    add_to_shapes,
+    shapes_of_cell_type
+)
+
 __all__ = [
     "add_shapes_from_hdf5",
     "add_to_points",
@@ -18,4 +23,5 @@
     "run_pseudobulk",
     "sdata_rotate",
     "sdata_querybox",
+    "shapes_of_cell_type",
 ]
diff --git a/src/scispy/tl/shapes.py b/src/scispy/tl/shapes.py
@@ -0,0 +1,130 @@
+import spatialdata as sd
+import pandas as pd
+import geopandas as gpd
+from spatialdata.models import PointsModel, ShapesModel
+from shapely.geometry import Polygon
+from shapely import count_coordinates, get_coordinates, affinity
+from spatialdata.transformations import Identity
+import shapely
+
+def add_to_shapes(
+    sdata: sd.SpatialData,
+    shape_file: str,
+    shape_key: str = "myshapes",
+    scale_factor: float = 0.50825,  # if shapes comes from xenium explorer
+    target_coordinates: str = "microns",
+    **kwargs,
+):
+    """Add shape element to SpatialData.
+
+    Parameters
+    ----------
+    sdata
+        SpatialData object.
+    shape_file
+        coordinates.csv file from xenium explorer (region = "normal_1")
+        # vi coordinates.csv -> remove 2 first # lines
+        # dos2unix coordinates.csv
+    shape_key
+        key of element shape
+    scale_factor
+        scale factor conversion applied to x and y coordinates for real micron coordinates
+    target_coordinates
+        target_coordinates system
+
+    """
+    if shape_key in list(sdata.shapes.keys()):
+        print(f'Shape "{shape_key}" is already present in the object.')
+        return
+
+    d = {"geometry": [], "name": []}
+    df = pd.read_csv(shape_file, **kwargs) 
+    # if target_coordinates == 'global':
+    #     print(f'Convert shape in micron to pixels with a pixel size of : {pixel_size}')
+    #     df[['X', 'Y']] = df[['X', 'Y']] / pixel_size
+
+    for name, group in df.groupby("Selection"):
+        if len(group) >= 3:
+            poly = shapely.Polygon(zip(group.X, group.Y))
+            d["geometry"].append(poly)
+            d["name"].append(name)
+        else:
+            print("Shape with less than 3 points")
+
+    gdf = gpd.GeoDataFrame(d)
+
+    # scale because it comes from the xenium explorer !!!
+    gdf.geometry = gdf.geometry.scale(
+        xfact=scale_factor, yfact=scale_factor, origin=(0, 0)
+    )
+
+    # substract the initial image offset (x,y)
+    image_object_key = list(sdata.images.keys())[0]
+    matrix = sd.transformations.get_transformation(
+        sdata[image_object_key], target_coordinates
+    ).to_affine_matrix(input_axes=["x", "y"], output_axes=["x", "y"])
+    x_translation = matrix[0][2]
+    y_translation = matrix[1][2]
+    gdf.geometry = gdf.geometry.apply(
+        affinity.translate, xoff=x_translation, yoff=y_translation
+    )
+
+    sdata.shapes[shape_key] = ShapesModel.parse(
+        gdf, transformations ={target_coordinates: Identity()}
+    )
+    print(f"New shape added : '{shape_key}'")
+    return
+
+
+
+
+def shapes_of_cell_type(
+    sdata: sd.SpatialData,
+    celltype: str,
+    obs_key: str = "celltype_spatial",
+    shape_key: str = "myshapes",
+) -> list:
+    """Extract shapes from a celltype. First step for the mean shape.
+
+    Parameters
+    ----------
+    sdata
+        SpatialData object.
+    celltype
+        name of the cell type we want to extract
+    obs_key
+        name of column where cell type can be found in sdata.table.obs
+    shape_key
+        key of element shape
+
+    Returns
+    -------
+    List of boundary coordinates for each cell.
+    """
+    # Extract cell shapes of the defined cell type
+    idx_cells = sdata.table.obs[sdata.table.obs[obs_key] == celltype].index
+    gdf_shapes_cells = sdata[shape_key].loc[idx_cells]
+
+    if len(gdf_shapes_cells["geometry"].geom_type.unique()) != 1:
+        print("Geometry type is not unique !!!")
+
+    # Extract the x and y coordinates of each shape
+    shapes_coordinates = (
+        gdf_shapes_cells["geometry"].apply(lambda x: get_coordinates(x)).to_list()
+    )
+
+    # OLD extract
+    # shapes_coordinates = []
+    # for shape in gdf_shapes_cells["geometry"]:
+    #     if shape.geom_type == "Polygon":
+    #         # coordinates = get_coordinates(shape).tolist()
+    #         coordinates = list(shape.exterior.coords)
+    #         shapes_coordinates.append(coordinates)
+    #     elif shape.geom_type == "MultiPolygon":
+    #         print("MultiPolygon")
+    #         for polygon in shape:
+    #             # coordinates = shapely.get_coordinates(polygon).tolist()
+    #             coordinates = list(polygon.exterior.coords)
+    #             shapes_coordinates.append(coordinates)
+
+    return shapes_coordinates