From eac8ca2d2fe956fb8d87821950d56ccbf2a86d7b Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 16 Dec 2024 16:38:50 +0100 Subject: [PATCH 1/4] passing tests and pre-commits --- .github/ISSUE_TEMPLATE/bug_report.md | 4 +- .mypy.ini | 2 +- CHANGELOG.md | 238 +++++++++--------- README.md | 28 +-- pyproject.toml | 2 +- src/spatialdata/_core/operations/rasterize.py | 23 +- src/spatialdata/_core/operations/transform.py | 14 +- .../_core/query/relational_query.py | 8 +- src/spatialdata/_core/query/spatial_query.py | 4 +- src/spatialdata/_io/io_points.py | 2 +- src/spatialdata/_types.py | 14 +- src/spatialdata/_utils.py | 4 +- src/spatialdata/dataloader/datasets.py | 2 +- src/spatialdata/datasets.py | 2 +- src/spatialdata/models/models.py | 12 +- .../ngff/ngff_transformations.py | 24 +- tests/core/operations/test_rasterize.py | 49 +++- 17 files changed, 245 insertions(+), 187 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 87466cfa..8e6d6ff6 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -46,8 +46,8 @@ If applicable, add screenshots to help explain your problem. **Desktop (optional):** -- OS: [e.g. macOS, Windows, Linux] -- Version [e.g. 22] +- OS: [e.g. macOS, Windows, Linux] +- Version [e.g. 22] **Additional context** Add any other context about the problem here. diff --git a/.mypy.ini b/.mypy.ini index 77bf7465..f658d6f6 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -5,7 +5,7 @@ plugins = numpy.typing.mypy_plugin ignore_errors = False warn_redundant_casts = True warn_unused_configs = True -warn_unused_ignores = False +warn_unused_ignores = True disallow_untyped_calls = False disallow_untyped_defs = True diff --git a/CHANGELOG.md b/CHANGELOG.md index c91afcfe..52bed3be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,127 +12,127 @@ and this project adheres to [Semantic Versioning][]. ### Major -- Added attributes at the SpatialData object level (`.attrs`) +- Added attributes at the SpatialData object level (`.attrs`) ## [0.2.6] - 2024-11-26 ### Added -- Added `set_channel_names` method to `SpatialData` to change the channel names of an - image element in `SpatialData` #786 -- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk - without overwriting the image array itself. #786 +- Added `set_channel_names` method to `SpatialData` to change the channel names of an + image element in `SpatialData` #786 +- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk + without overwriting the image array itself. #786 ### Changed -- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 -- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced - by `get_channel_names` #786 -- Updated dependency of `multiscale-spatial-image` #792 -- Adjust to new version of `xarray` with `DataTree` # 752 +- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 +- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced + by `get_channel_names` #786 +- Updated dependency of `multiscale-spatial-image` #792 +- Adjust to new version of `xarray` with `DataTree` # 752 ### Fixed -- Updated deprecated default stages of `pre-commit` #771 -- Preserve points `feature_key` during queries #794 +- Updated deprecated default stages of `pre-commit` #771 +- Preserve points `feature_key` during queries #794 ## [0.2.5] - 2024-11-06 ### Fixed -- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 +- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 ## [0.2.4] - 2024-11-06 ### Major -- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 +- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 ### Minor -- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 -- Added `get_pyramid_levels()` utils API #719 -- Improved ergonomics of `concatenate()` when element names are non-unique #720 -- Improved performance of writing images with multiscales #577 +- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 +- Added `get_pyramid_levels()` utils API #719 +- Improved ergonomics of `concatenate()` when element names are non-unique #720 +- Improved performance of writing images with multiscales #577 ## [0.2.3] - 2024-09-25 ### Minor -- Added `clip: bool = False` parameter to `polygon_query()` #670 -- Add `sort` parameter to `PointsModel.parse()` #672 +- Added `clip: bool = False` parameter to `polygon_query()` #670 +- Add `sort` parameter to `PointsModel.parse()` #672 ### Fixed -- Fix interpolation artifact multiscale computation for labels #697 +- Fix interpolation artifact multiscale computation for labels #697 ## [0.2.2] - 2024-08-07 ### Major -- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 +- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 ### Minor -- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 -- Ability to save data using older disk formats #542 +- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 +- Ability to save data using older disk formats #542 ### Fixed -- Circles validation now checks for inf or nan radii #653 -- Bug with table name in torch dataset #654 @LLehner +- Circles validation now checks for inf or nan radii #653 +- Bug with table name in torch dataset #654 @LLehner ## [0.2.1] - 2024-07-04 ### Minor -- Relaxing `spatial-image` package requirement #616 +- Relaxing `spatial-image` package requirement #616 ## [0.2.0] - 2024-07-03 ### Changed -- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 -- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 -- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 +- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 +- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 +- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 ### Major -- Added operation: `to_polygons()` @quentinblampey #560 -- Extended `rasterize()` to support all the data types @quentinblampey #566 -- Added operation: `rasterize_bins()` @quentinblampey #578 -- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 +- Added operation: `to_polygons()` @quentinblampey #560 +- Extended `rasterize()` to support all the data types @quentinblampey #566 +- Added operation: `rasterize_bins()` @quentinblampey #578 +- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 ### Minor -- Removed `pygeos` dependency @omsai #545 -- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 -- Added `datasets` module -- Extended `get_values()` to `AnnData` tables #579 -- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 -- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 +- Removed `pygeos` dependency @omsai #545 +- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 +- Added `datasets` module +- Extended `get_values()` to `AnnData` tables #579 +- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 +- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 ### Fixed -- Preserve channel names of multi-scale images in `transform` (#379) -- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) +- Preserve channel names of multi-scale images in `transform` (#379) +- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) ## [0.1.2] - 2024-03-30 ### Minor -- Made `get_channels()` public. -- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). +- Made `get_channels()` public. +- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). ## [0.1.1] - 2024-03-28 ### Added -- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns +- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns ### Changed -- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. +- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. ## [0.1.0] - 2024-03-24 @@ -140,70 +140,70 @@ and this project adheres to [Semantic Versioning][]. #### Major -- Implemented support in `SpatialData` for storing multiple tables. -- These tables can annotate a `SpatialElement` but now not necessarily so. -- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. +- Implemented support in `SpatialData` for storing multiple tables. +- These tables can annotate a `SpatialElement` but now not necessarily so. +- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. -- Added join operations -- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. -- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. +- Added join operations +- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. +- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. -- Incremental IO of data and metadata: -- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. -- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. -- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` -- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` -- deprecated `save_transformations()` -- improved `__repr__()` with information on Zarr storage and Dask-backed files -- new utils `is_self_contained()`, `describe_elements_are_self_contained()` -- new utils `element_paths_in_memory()`, `element_paths_on_disk()` +- Incremental IO of data and metadata: +- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. +- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. +- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` +- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` +- deprecated `save_transformations()` +- improved `__repr__()` with information on Zarr storage and Dask-backed files +- new utils `is_self_contained()`, `describe_elements_are_self_contained()` +- new utils `element_paths_in_memory()`, `element_paths_on_disk()` #### Minor -- Multiple table helper functions -- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. -- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation - metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. -- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. -- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. -- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. -- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. -- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. -- Added `table_name` parameter to the `get_values()` function. - -- Utils -- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. -- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. -- added `SpatialData.subset()` API -- added `SpatialData.locate_element()` API -- added utils function: `get_centroids()` -- added utils function: `deepcopy()` -- added operation: `to_circles()` -- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale - -- Transformations-related - - - added utils function: `transform_to_data_extent()` - - added utils function: `are_extents_equal()` - - added utils function: `postpone_transformation()` - - added utils function: `remove_transformations_to_coordinate_system()` - -- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` +- Multiple table helper functions +- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. +- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation + metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. +- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. +- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. +- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. +- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. +- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. +- Added `table_name` parameter to the `get_values()` function. + +- Utils +- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. +- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. +- added `SpatialData.subset()` API +- added `SpatialData.locate_element()` API +- added utils function: `get_centroids()` +- added utils function: `deepcopy()` +- added operation: `to_circles()` +- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale + +- Transformations-related + + - added utils function: `transform_to_data_extent()` + - added utils function: `are_extents_equal()` + - added utils function: `postpone_transformation()` + - added utils function: `remove_transformations_to_coordinate_system()` + +- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` ### Changed/fixed #### Major -- refactored data loader for deep learning -- refactored `SpatialData.write()` to be more robust -- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 +- refactored data loader for deep learning +- refactored `SpatialData.write()` to be more robust +- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 #### Minor -- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. -- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth -- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` -- fixed wrong order of points after spatial queries +- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. +- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth +- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` +- fixed wrong order of points after spatial queries ## [0.0.14] - 2023-10-11 @@ -211,105 +211,105 @@ and this project adheres to [Semantic Versioning][]. #### Minor -- new API: sdata.rename_coordinate_systems() +- new API: sdata.rename_coordinate_systems() #### Technical -- decompose affine transformation into simpler transformations -- remove padding for blobs() +- decompose affine transformation into simpler transformations +- remove padding for blobs() #### Major -- get_extent() function to compute bounding box of the data +- get_extent() function to compute bounding box of the data #### Minor -- testing against pre-release packages +- testing against pre-release packages ### Fixed -- Fixed bug with get_values(): ignoring background channel in labels +- Fixed bug with get_values(): ignoring background channel in labels ## [0.0.13] - 2023-10-02 ### Added -- polygon_query() support for images #358 +- polygon_query() support for images #358 ### Fixed -- Fix missing c_coords argument in blobs multiscale #342 -- Replaced hardcoded string with instance_key #346 +- Fix missing c_coords argument in blobs multiscale #342 +- Replaced hardcoded string with instance_key #346 ## [0.0.12] - 2023-06-24 ### Added -- Add multichannel blobs sample data (by @melonora) +- Add multichannel blobs sample data (by @melonora) ## [0.0.11] - 2023-06-21 ### Improved -- Aggregation APIs. +- Aggregation APIs. ## [0.0.10] - 2023-06-06 ### Fixed -- Fix blobs (#282) +- Fix blobs (#282) ## [0.0.9] - 2023-05-23 ### Updated -- Update napari-spatialdata pin (#279) -- pin typing-extensions +- Update napari-spatialdata pin (#279) +- pin typing-extensions ## [0.0.8] - 2023-05-22 ### Merged -- Merge pull request #271 from scverse/fix/aggregation +- Merge pull request #271 from scverse/fix/aggregation ## [0.0.7] - 2023-05-20 ### Updated -- Update readme +- Update readme ## [0.0.6] - 2023-05-10 ### Added -- This release adds polygon spatial query. +- This release adds polygon spatial query. ## [0.0.5] - 2023-05-05 ### Fixed -- fix tests badge (#242) +- fix tests badge (#242) ## [0.0.4] - 2023-05-04 ### Tested -- This release tests distribution via pypi +- This release tests distribution via pypi ## [0.0.3] - 2023-05-02 ### Added -- This is an alpha release to test the release process. +- This is an alpha release to test the release process. ## [0.0.2] - 2023-05-02 ### Added -- make version dynamic +- make version dynamic ## [0.0.1.dev1] - 2023-03-25 ### Added -- Dev version, not official release yet +- Dev version, not official release yet diff --git a/README.md b/README.md index 8b5acd0c..7637ee7c 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for performant access, alignment, and processing of uni- and multi-modal spatial omics datasets. This repository contains the core spatialdata library. See the links below to learn more about other packages in the SpatialData ecosystem. -- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. -- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. -- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. +- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. +- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. +- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. [//]: # "numfocus-fiscal-sponsor-attribution" @@ -32,16 +32,16 @@ The spatialdata project also received support by the Chan Zuckerberg Initiative. ![SpatialDataOverview](https://github.com/scverse/spatialdata/assets/1120672/cb91071f-12a7-4b8e-9430-2b3a0f65e52f) -- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. -- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. +- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. +- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. ## Getting started Please refer to the [documentation][link-docs]. In particular: -- [API documentation][link-api]. -- [Design doc][link-design-doc]. -- [Example notebooks][link-notebooks]. +- [API documentation][link-api]. +- [Design doc][link-design-doc]. +- [Example notebooks][link-notebooks]. Another useful resource to get started is the source code of the [`spatialdata-io`](https://github.com/scverse/spatialdata-io) package, which shows example of how to read data from common technologies. @@ -61,20 +61,20 @@ mamba install -c conda-forge spatialdata napari-spatialdata spatialdata-io spati ## Limitations -- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. +- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. ## Contact To get involved in the discussion, or if you need help to get started, you are welcome to use the following options. -- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). -- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). -- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. -- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). +- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). +- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). +- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. +- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). Finally, especially relevant for for developers that are building a library upon `spatialdata`, please follow this channel for: -- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). +- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). ## Citation diff --git a/pyproject.toml b/pyproject.toml index 7e432679..6de3916a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "anndata>=0.9.1", "click", "dask-image", - "dask>=2024.4.1", + "dask>=2024.4.1,<=2024.11.2", "fsspec", "geopandas>=0.14", "multiscale_spatial_image>=2.0.2", diff --git a/src/spatialdata/_core/operations/rasterize.py b/src/spatialdata/_core/operations/rasterize.py index b7bdb929..969e685a 100644 --- a/src/spatialdata/_core/operations/rasterize.py +++ b/src/spatialdata/_core/operations/rasterize.py @@ -217,9 +217,9 @@ def rasterize( The table optionally containing the `value_key` and the name of the table in the returned `SpatialData` object. Must be `None` when `data` is a `SpatialData` object, otherwise it assumes the default value of `'table'`. return_regions_as_labels - By default, single-scale images of shape `(c, y, x)` are returned. If `True`, returns labels and shapes as - labels of shape `(y, x)` as opposed to an image of shape `(c, y, x)`. Points and images are always returned - as images, and multiscale raster data is always returned as single-scale data. + By default, single-scale images of shape `(c, y, x)` are returned. If `True`, returns labels, shapes and points + as labels of shape `(y, x)` as opposed to an image of shape `(c, y, x)`. Images are always returned as images, + and multiscale raster data is always returned as single-scale data. agg_func Available only when rasterizing points and shapes. A reduction function from datashader (its name, or a `Callable`). See the notes for more details on the default behavior. @@ -234,6 +234,11 @@ def rasterize( into a `DataArray` (not a `DataTree`). So if a `SpatialData` object with elements is passed, a `SpatialData` object with single-scale images and labels will be returned. + When `return_regions_as_labels` is `True`, the returned `DataArray` object will have an attribute called + `label_index_to_category` that maps the label index to the category name. You can access it via + `returned_data.attrs["label_index_to_category"]`. The returned labels will start from 1 (0 is reserved for the + background), and will be contiguous. + Notes ----- For images and labels, the parameters `value_key`, `table_name`, `agg_func`, and `return_single_channel` are not @@ -587,7 +592,7 @@ def rasterize_images_labels( ) assert isinstance(transformed_dask, DaskArray) channels = xdata.coords["c"].values if schema in (Image2DModel, Image3DModel) else None - transformed_data = schema.parse(transformed_dask, dims=xdata.dims, c_coords=channels) # type: ignore[call-arg,arg-type] + transformed_data = schema.parse(transformed_dask, dims=xdata.dims, c_coords=channels) # type: ignore[call-arg] if target_coordinate_system != "global": remove_transformation(transformed_data, "global") @@ -650,7 +655,7 @@ def rasterize_shapes_points( if value_key is not None: kwargs = {"sdata": sdata, "element_name": element_name} if element_name is not None else {"element": data} data[VALUES_COLUMN] = get_values(value_key, table_name=table_name, **kwargs).iloc[:, 0] # type: ignore[arg-type, union-attr] - elif isinstance(data, GeoDataFrame): + elif isinstance(data, GeoDataFrame) or isinstance(data, DaskDataFrame) and return_regions_as_labels is True: value_key = VALUES_COLUMN data[VALUES_COLUMN] = data.index.astype("category") else: @@ -706,6 +711,14 @@ def rasterize_shapes_points( agg = agg.fillna(0) if return_regions_as_labels: + if label_index_to_category is not None: + max_label = next(iter(reversed(label_index_to_category.keys()))) + else: + max_label = int(agg.max().values) + max_uint16 = np.iinfo(np.uint16).max + if max_label > max_uint16: + raise ValueError(f"Maximum label index is {max_label}. Values higher than {max_uint16} are not supported.") + agg = agg.astype(np.uint16) return Labels2DModel.parse(agg, transformations=transformations) agg = agg.expand_dims(dim={"c": 1}).transpose("c", "y", "x") diff --git a/src/spatialdata/_core/operations/transform.py b/src/spatialdata/_core/operations/transform.py index 769e19d2..b92b7757 100644 --- a/src/spatialdata/_core/operations/transform.py +++ b/src/spatialdata/_core/operations/transform.py @@ -52,7 +52,7 @@ def _transform_raster( c_shape: tuple[int, ...] c_shape = (data.shape[0],) if "c" in axes else () new_spatial_shape = tuple( - int(np.max(new_v[:, i]) - np.min(new_v[:, i])) for i in range(len(c_shape), n_spatial_dims + len(c_shape)) # type: ignore[operator] + int(np.max(new_v[:, i]) - np.min(new_v[:, i])) for i in range(len(c_shape), n_spatial_dims + len(c_shape)) ) output_shape = c_shape + new_spatial_shape translation_vector = np.min(new_v[:, :-1], axis=0) @@ -86,8 +86,8 @@ def _transform_raster( # min_y_inverse = np.min(new_v_inverse[:, 1]) if "c" in axes: - plt.imshow(da.moveaxis(transformed_dask, 0, 2), origin="lower", alpha=0.5) # type: ignore[attr-defined] - plt.imshow(da.moveaxis(im, 0, 2), origin="lower", alpha=0.5) # type: ignore[attr-defined] + plt.imshow(da.moveaxis(transformed_dask, 0, 2), origin="lower", alpha=0.5) + plt.imshow(da.moveaxis(im, 0, 2), origin="lower", alpha=0.5) else: plt.imshow(transformed_dask, origin="lower", alpha=0.5) plt.imshow(im, origin="lower", alpha=0.5) @@ -322,7 +322,7 @@ def _( ) c_coords = data.indexes["c"].values if "c" in data.indexes else None # mypy thinks that schema could be ShapesModel, PointsModel, ... - transformed_data = schema.parse(transformed_dask, dims=axes, c_coords=c_coords) # type: ignore[call-arg,arg-type] + transformed_data = schema.parse(transformed_dask, dims=axes, c_coords=c_coords) # type: ignore[call-arg] assert isinstance(transformed_data, DataArray) old_transformations = get_transformation(data, get_all=True) assert isinstance(old_transformations, dict) @@ -448,7 +448,7 @@ def _( for ax in axes: indices = xtransformed["dim"] == ax new_ax = xtransformed[:, indices] - transformed[ax] = new_ax.data.flatten() # type: ignore[attr-defined] + transformed[ax] = new_ax.data.flatten() old_transformations = get_transformation(data, get_all=True) assert isinstance(old_transformations, dict) @@ -481,9 +481,9 @@ def _( ) # TODO: nitpick, mypy expects a listof literals and here we have a list of strings. # I ignored but we may want to fix this - affine = transformation.to_affine(axes, axes) # type: ignore[arg-type] + affine = transformation.to_affine(axes, axes) matrix = affine.matrix - shapely_notation = matrix[:-1, :-1].ravel().tolist() + matrix[:-1, -1].tolist() + shapely_notation = matrix[:-1, :-1].ravel().tolist() + matrix[:-1, -1].tolist() # type: ignore[operator] transformed_geometry = data.geometry.affine_transform(shapely_notation) transformed_data = data.copy(deep=True) transformed_data.attrs[TRANSFORM_KEY] = {DEFAULT_COORDINATE_SYSTEM: Identity()} diff --git a/src/spatialdata/_core/query/relational_query.py b/src/spatialdata/_core/query/relational_query.py index c29a22a0..1ccb998a 100644 --- a/src/spatialdata/_core/query/relational_query.py +++ b/src/spatialdata/_core/query/relational_query.py @@ -214,7 +214,7 @@ def _filter_table_by_elements( # some instances have not a corresponding row in the table instances = np.setdiff1d(instances, n0) assert np.sum(to_keep) == len(instances) - assert sorted(set(instances.tolist())) == sorted(set(table.obs[instance_key].tolist())) + assert sorted(set(instances.tolist())) == sorted(set(table.obs[instance_key].tolist())) # type: ignore[type-var] table_df = pd.DataFrame({instance_key: table.obs[instance_key], "position": np.arange(len(instances))}) merged = pd.merge(table_df, pd.DataFrame(index=instances), left_on=instance_key, right_index=True, how="right") matched_positions = merged["position"].to_numpy() @@ -467,7 +467,11 @@ def _left_join_spatialelement_table( ) continue - joined_indices = joined_indices.dropna() if joined_indices is not None else None + if joined_indices is not None: + joined_indices = joined_indices.dropna() + # if nan were present, the dtype would have been changed to float + if joined_indices.dtype == float: + joined_indices = joined_indices.astype(int) joined_table = table[joined_indices, :].copy() if joined_indices is not None else None _inplace_fix_subset_categorical_obs(subset_adata=joined_table, original_adata=table) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index ecba815e..b6b78bb2 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -700,8 +700,8 @@ def _( bounding_box_mask = _bounding_box_mask_points( points=points_query_coordinate_system, axes=axes, - min_coordinate=min_c, - max_coordinate=max_c, + min_coordinate=min_c, # type: ignore[arg-type] + max_coordinate=max_c, # type: ignore[arg-type] ) if len(bounding_box_mask) == 1: bounding_box_mask = bounding_box_mask[0] diff --git a/src/spatialdata/_io/io_points.py b/src/spatialdata/_io/io_points.py index d4113fb3..3106c847 100644 --- a/src/spatialdata/_io/io_points.py +++ b/src/spatialdata/_io/io_points.py @@ -3,7 +3,7 @@ from pathlib import Path import zarr -from dask.dataframe import DataFrame as DaskDataFrame # type: ignore[attr-defined] +from dask.dataframe import DataFrame as DaskDataFrame from dask.dataframe import read_parquet from ome_zarr.format import Format diff --git a/src/spatialdata/_types.py b/src/spatialdata/_types.py index 3c527163..30d623a5 100644 --- a/src/spatialdata/_types.py +++ b/src/spatialdata/_types.py @@ -1,18 +1,14 @@ +from typing import Any + import numpy as np from xarray import DataArray, DataTree __all__ = ["ArrayLike", "ColorLike", "DTypeLike", "Raster_T"] -try: - from numpy.typing import DTypeLike, NDArray - - ArrayLike = NDArray[np.float64] - IntArrayLike = NDArray[np.int64] # or any np.integer +from numpy.typing import DTypeLike, NDArray -except (ImportError, TypeError): - ArrayLike = np.ndarray # type: ignore[misc] - IntArrayLike = np.ndarray # type: ignore[misc] - DTypeLike = np.dtype # type: ignore[misc, assignment] +ArrayLike = NDArray[np.floating[Any]] +IntArrayLike = NDArray[np.integer[Any]] Raster_T = DataArray | DataTree ColorLike = tuple[float, ...] | str diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py index 72faa51e..53e61b8a 100644 --- a/src/spatialdata/_utils.py +++ b/src/spatialdata/_utils.py @@ -57,7 +57,7 @@ def _affine_matrix_multiplication(matrix: ArrayLike, data: ArrayLike) -> ArrayLi offset_part = matrix[:-1, -1] result = data @ vector_part.T + offset_part assert result.shape[0] == data.shape[0] - return result # type: ignore[no-any-return] + return result def unpad_raster(raster: DataArray | DataTree) -> DataArray | DataTree: @@ -80,7 +80,7 @@ def _compute_paddings(data: DataArray, axis: str) -> tuple[int, int]: others = list(data.dims) others.remove(axis) # mypy (luca's pycharm config) can't see the isclose method of dask array - s = da.isclose(data.sum(dim=others), 0) # type: ignore[attr-defined] + s = da.isclose(data.sum(dim=others), 0) # TODO: rewrite this to use dask array; can't get it to work with it x = s.compute() non_zero = np.where(x == 0)[0] diff --git a/src/spatialdata/dataloader/datasets.py b/src/spatialdata/dataloader/datasets.py index 0b3e7e18..e4aee42c 100644 --- a/src/spatialdata/dataloader/datasets.py +++ b/src/spatialdata/dataloader/datasets.py @@ -144,7 +144,7 @@ def __init__( **dict(rasterize_kwargs), ) if rasterize - else bounding_box_query # type: ignore[assignment] + else bounding_box_query ) self._return = self._get_return(return_annotations, table_name) self.transform = transform diff --git a/src/spatialdata/datasets.py b/src/spatialdata/datasets.py index 203e33a7..160fbe82 100644 --- a/src/spatialdata/datasets.py +++ b/src/spatialdata/datasets.py @@ -182,7 +182,7 @@ def _image_blobs( masks = [] for i in range(n_channels): mask = self._generate_blobs(length=length, seed=i) - mask = (mask - mask.min()) / np.ptp(mask) # type: ignore[attr-defined] + mask = (mask - mask.min()) / np.ptp(mask) masks.append(mask) x = np.stack(masks, axis=0) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index a3239a7b..1777c7c0 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -179,7 +179,7 @@ def parse( else: if len(set(dims).symmetric_difference(cls.dims.dims)) > 0: raise ValueError(f"Wrong `dims`: {dims}. Expected {cls.dims.dims}.") - _reindex = lambda d: dims.index(d) # type: ignore[union-attr] + _reindex = lambda d: dims.index(d) else: raise ValueError(f"Unsupported data type: {type(data)}.") @@ -717,7 +717,7 @@ def _( stacklevel=2, ) if isinstance(data, pd.DataFrame): - table: DaskDataFrame = dd.from_pandas( # type: ignore[attr-defined] + table: DaskDataFrame = dd.from_pandas( pd.DataFrame(data[[coordinates[ax] for ax in axes]].to_numpy(), columns=axes, index=data.index), # we need to pass sort=True also when the index is sorted to ensure that the divisions are computed sort=sort, @@ -731,9 +731,9 @@ def _( data[feature_key].astype(str).astype("category"), sort=sort, **kwargs, - ) # type: ignore[attr-defined] + ) table[feature_key] = feature_categ - elif isinstance(data, dd.DataFrame): # type: ignore[attr-defined] + elif isinstance(data, dd.DataFrame): table = data[[coordinates[ax] for ax in axes]] table.columns = axes if feature_key is not None: @@ -774,7 +774,7 @@ def _add_metadata_and_validate( instance_key: str | None = None, transformations: MappingToCoordinateSystem_t | None = None, ) -> DaskDataFrame: - assert isinstance(data, dd.DataFrame) # type: ignore[attr-defined] + assert isinstance(data, dd.DataFrame) if feature_key is not None or instance_key is not None: data.attrs[ATTRS_KEY] = {} if feature_key is not None: @@ -797,7 +797,7 @@ def _add_metadata_and_validate( _parse_transformations(data, transformations) cls.validate(data) # false positive with the PyCharm mypy plugin - return data # type: ignore[no-any-return] + return data class TableModel: diff --git a/src/spatialdata/transformations/ngff/ngff_transformations.py b/src/spatialdata/transformations/ngff/ngff_transformations.py index fc8e0634..12556eec 100644 --- a/src/spatialdata/transformations/ngff/ngff_transformations.py +++ b/src/spatialdata/transformations/ngff/ngff_transformations.py @@ -303,7 +303,7 @@ def __init__( self.affine = self._parse_list_into_array(affine) @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: assert isinstance(d["affine"], list) last_row = [[0.0] * (len(d["affine"][0]) - 1) + [1.0]] return cls(d["affine"] + last_row) @@ -340,7 +340,7 @@ def transform_points(self, points: ArrayLike) -> ArrayLike: self._validate_transform_points_shapes(len(input_axes), points.shape) p = np.vstack([points.T, np.ones(points.shape[0])]) q = self.affine @ p - return q[: len(output_axes), :].T # type: ignore[no-any-return] + return q[: len(output_axes), :].T def to_affine(self) -> "NgffAffine": return NgffAffine( @@ -411,7 +411,7 @@ def __init__( # TODO: remove type: ignore[valid-type] when https://github.com/python/mypy/pull/14041 is merged @classmethod - def _from_dict(cls, _: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, _: Transformation_t) -> Self: return cls() def to_dict(self) -> Transformation_t: @@ -478,7 +478,7 @@ def __init__( self.map_axis = map_axis @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: return cls(d["mapAxis"]) def to_dict(self) -> Transformation_t: @@ -569,7 +569,7 @@ def __init__( self.translation = self._parse_list_into_array(translation) @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: return cls(d["translation"]) def to_dict(self) -> Transformation_t: @@ -636,7 +636,7 @@ def __init__( self.scale = self._parse_list_into_array(scale) @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: return cls(d["scale"]) def to_dict(self) -> Transformation_t: @@ -705,13 +705,13 @@ def __init__( self.rotation = self._parse_list_into_array(rotation) @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: x = d["rotation"] n = len(x) r = math.sqrt(n) assert n == int(r * r) m = np.array(x).reshape((int(r), int(r))).tolist() - return cls(m) + return cls(m) # type: ignore[arg-type] def to_dict(self) -> Transformation_t: d = { @@ -802,7 +802,7 @@ def __init__( self.output_coordinate_system = cs @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: return cls([NgffBaseTransformation.from_dict(t) for t in d["transformations"]]) def to_dict(self) -> Transformation_t: @@ -941,7 +941,7 @@ def to_affine(self) -> NgffAffine: for t in self.transformations: latest_output_cs, input_cs, output_cs = NgffSequence._inferring_cs_pre_action(t, latest_output_cs) a = t.to_affine() - composed = a.affine @ composed + composed = a.affine @ composed # type: ignore[assignment] NgffSequence._inferring_cs_post_action(t, input_cs, output_cs) if output_axes != latest_output_cs.axes_names: raise ValueError( @@ -1074,7 +1074,7 @@ def __init__( self.transformations = transformations @classmethod - def _from_dict(cls, d: Transformation_t) -> Self: # type: ignore[valid-type] + def _from_dict(cls, d: Transformation_t) -> Self: return cls([NgffBaseTransformation.from_dict(t) for t in d["transformations"]]) def to_dict(self) -> Transformation_t: @@ -1133,7 +1133,7 @@ def transform_points(self, points: ArrayLike) -> ArrayLike: input_columns_stacked: ArrayLike = np.stack(input_columns, axis=1) output_columns_t = t.transform_points(input_columns_stacked) for ax, col in zip(t.output_coordinate_system.axes_names, output_columns_t.T, strict=True): - output_columns[ax] = col + output_columns[ax] = col # type: ignore[assignment] output: ArrayLike = np.stack([output_columns[ax] for ax in output_axes], axis=1) return output diff --git a/tests/core/operations/test_rasterize.py b/tests/core/operations/test_rasterize.py index ea8a9d63..aa2e7930 100644 --- a/tests/core/operations/test_rasterize.py +++ b/tests/core/operations/test_rasterize.py @@ -179,7 +179,7 @@ def test_rasterize_points_shapes_with_string_index(points, shapes): ) -def test_rasterize_shapes(): +def _rasterize_shapes_prepare_data() -> tuple[SpatialData, GeoDataFrame, str]: box_one = box(0, 10, 20, 40) box_two = box(5, 35, 15, 45) box_three = box(0, 0, 2, 2) @@ -205,7 +205,11 @@ def test_rasterize_shapes(): ) adata.obs["cat_values"] = adata.obs["cat_values"].astype("category") adata = TableModel.parse(adata, region=element_name, region_key="region", instance_key="instance_id") - sdata = SpatialData.init_from_elements({element_name: gdf[["geometry"]], "table": adata}) + return SpatialData.init_from_elements({element_name: gdf[["geometry"]], "table": adata}), gdf, element_name + + +def test_rasterize_shapes(): + sdata, gdf, element_name = _rasterize_shapes_prepare_data() def _rasterize(element: GeoDataFrame, **kwargs) -> SpatialImage: return _rasterize_test_alternative_calls(element=element, sdata=sdata, element_name=element_name, **kwargs) @@ -292,6 +296,26 @@ def _rasterize(element: GeoDataFrame, **kwargs) -> SpatialImage: assert res[0].max() == 2 assert res[1].max() == 1 + # test rasterize shapes to labels + res_xarray = _rasterize( + gdf, + axes=("x", "y"), + min_coordinate=[0, 0], + max_coordinate=[50, 40], + target_coordinate_system="global", + target_unit_to_pixels=1, + return_regions_as_labels=True, + ) + d = res_xarray.attrs["label_index_to_category"] + assert d == {1: 0, 2: 1, 3: 2} + res = res_xarray.data.compute() + + assert res.dtype == np.uint16 + assert res[0, 0] == 2 + assert res[30, 10] == 0 + assert res[10, 30] == 1 + assert res[10, 37] == 2 + def test_rasterize_points(): data = { @@ -392,6 +416,27 @@ def _rasterize(element: DaskDataFrame, **kwargs) -> SpatialImage: assert res[0, 0, 1] == 0.2 assert res[0, 1, 3] == 1.2 + # test rasterize points to labels + res_xarray = _rasterize( + ddf, + axes=("x", "y"), + min_coordinate=[0, 0], + max_coordinate=[5, 5], + target_coordinate_system="global", + target_unit_to_pixels=1.0, + return_regions_as_labels=True, + ) + d = res_xarray.attrs["label_index_to_category"] + res = res_xarray.data.compute() + + assert res[0, 0] == 1 + assert res[0, 1] == 2 + assert res[1, 2] == 5 + + assert d[res[0, 0]] == 0 + assert d[res[0, 1]] == 1 + assert d[res[1, 2]] == 4 + def test_rasterize_spatialdata(full_sdata): sdata = full_sdata.subset( From 2e09be78ae7ece35c250b2044f60a7c9e7f5152c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:46:12 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/ISSUE_TEMPLATE/bug_report.md | 4 +- CHANGELOG.md | 238 +++++++++++++-------------- README.md | 28 ++-- 3 files changed, 135 insertions(+), 135 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 8e6d6ff6..87466cfa 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -46,8 +46,8 @@ If applicable, add screenshots to help explain your problem. **Desktop (optional):** -- OS: [e.g. macOS, Windows, Linux] -- Version [e.g. 22] +- OS: [e.g. macOS, Windows, Linux] +- Version [e.g. 22] **Additional context** Add any other context about the problem here. diff --git a/CHANGELOG.md b/CHANGELOG.md index 52bed3be..c91afcfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,127 +12,127 @@ and this project adheres to [Semantic Versioning][]. ### Major -- Added attributes at the SpatialData object level (`.attrs`) +- Added attributes at the SpatialData object level (`.attrs`) ## [0.2.6] - 2024-11-26 ### Added -- Added `set_channel_names` method to `SpatialData` to change the channel names of an - image element in `SpatialData` #786 -- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk - without overwriting the image array itself. #786 +- Added `set_channel_names` method to `SpatialData` to change the channel names of an + image element in `SpatialData` #786 +- Added `write_channel_names` method to `SpatialData` to overwrite channel metadata on disk + without overwriting the image array itself. #786 ### Changed -- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 -- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced - by `get_channel_names` #786 -- Updated dependency of `multiscale-spatial-image` #792 -- Adjust to new version of `xarray` with `DataTree` # 752 +- Argument `c_coords` is moved out of kwargs for the `ImageModel`s. #779 +- `get_channels` is marked for deprecation in `SpatialData` v0.3.0. Function is replaced + by `get_channel_names` #786 +- Updated dependency of `multiscale-spatial-image` #792 +- Adjust to new version of `xarray` with `DataTree` # 752 ### Fixed -- Updated deprecated default stages of `pre-commit` #771 -- Preserve points `feature_key` during queries #794 +- Updated deprecated default stages of `pre-commit` #771 +- Preserve points `feature_key` during queries #794 ## [0.2.5] - 2024-11-06 ### Fixed -- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 +- Incompatibility issues due to newest release of `multiscale-spatial-image` #760 ## [0.2.4] - 2024-11-06 ### Major -- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 +- Enable vectorization of `bounding_box_query` for all `SpatialData` elements. #699 ### Minor -- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 -- Added `get_pyramid_levels()` utils API #719 -- Improved ergonomics of `concatenate()` when element names are non-unique #720 -- Improved performance of writing images with multiscales #577 +- Added `shortest_path` parameter to `get_transformation_between_coordinate_systems` #714 +- Added `get_pyramid_levels()` utils API #719 +- Improved ergonomics of `concatenate()` when element names are non-unique #720 +- Improved performance of writing images with multiscales #577 ## [0.2.3] - 2024-09-25 ### Minor -- Added `clip: bool = False` parameter to `polygon_query()` #670 -- Add `sort` parameter to `PointsModel.parse()` #672 +- Added `clip: bool = False` parameter to `polygon_query()` #670 +- Add `sort` parameter to `PointsModel.parse()` #672 ### Fixed -- Fix interpolation artifact multiscale computation for labels #697 +- Fix interpolation artifact multiscale computation for labels #697 ## [0.2.2] - 2024-08-07 ### Major -- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 +- New disk format for shapes using `GeoParquet` (the change is backward compatible) #542 ### Minor -- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 -- Ability to save data using older disk formats #542 +- Add `return_background` as argument to `get_centroids` and `get_element_instances` #621 +- Ability to save data using older disk formats #542 ### Fixed -- Circles validation now checks for inf or nan radii #653 -- Bug with table name in torch dataset #654 @LLehner +- Circles validation now checks for inf or nan radii #653 +- Bug with table name in torch dataset #654 @LLehner ## [0.2.1] - 2024-07-04 ### Minor -- Relaxing `spatial-image` package requirement #616 +- Relaxing `spatial-image` package requirement #616 ## [0.2.0] - 2024-07-03 ### Changed -- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 -- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 -- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 +- Using `DataArray` directly instead of the subclass `SpatialImage` (removed install constraint for the `spatial_image` package) #587 +- Using `DataTree` directly instead of the subclass `MultiscaleSpatialImage` (removed install constraint for the `multiscale_spatial_image` package) #587 +- Changed `element`parameter (deprecation in v0.3.0) of `transform_element_to_coordinate_system` to a string `element_name` #611 ### Major -- Added operation: `to_polygons()` @quentinblampey #560 -- Extended `rasterize()` to support all the data types @quentinblampey #566 -- Added operation: `rasterize_bins()` @quentinblampey #578 -- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 +- Added operation: `to_polygons()` @quentinblampey #560 +- Extended `rasterize()` to support all the data types @quentinblampey #566 +- Added operation: `rasterize_bins()` @quentinblampey #578 +- Added operation: `map_raster()` to apply functions block-wise to raster data @ArneDefauw #588 ### Minor -- Removed `pygeos` dependency @omsai #545 -- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 -- Added `datasets` module -- Extended `get_values()` to `AnnData` tables #579 -- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 -- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 +- Removed `pygeos` dependency @omsai #545 +- Channel coordinate annotations on images now persist through `rasterize()` @clwgg #544 +- Added `datasets` module +- Extended `get_values()` to `AnnData` tables #579 +- Added `get_element_instances()` (replaces `_get_unique_label_values_as_index()`) #582 +- Added `get_element_annotators()`, retrieving the tables that annotate a particular SpatialElement #595 ### Fixed -- Preserve channel names of multi-scale images in `transform` (#379) -- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) +- Preserve channel names of multi-scale images in `transform` (#379) +- Fix `filter_by_coordinate_system` with SpatialData object having a table not annotating an element (#619) ## [0.1.2] - 2024-03-30 ### Minor -- Made `get_channels()` public. -- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). +- Made `get_channels()` public. +- Added utils `force_2d()` to force 3D shapes to 2D (this is a temporary solution until `.force_2d()` is available in `geopandas`). ## [0.1.1] - 2024-03-28 ### Added -- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns +- Added method `update_annotated_regions_metadata() which updates the `region`value automatically from the`region_key` columns ### Changed -- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. +- Renamed `join_sdata_spatialelement_table` to `join_spatialelement_table`, and made it work also without `SpatialData` objects. ## [0.1.0] - 2024-03-24 @@ -140,70 +140,70 @@ and this project adheres to [Semantic Versioning][]. #### Major -- Implemented support in `SpatialData` for storing multiple tables. -- These tables can annotate a `SpatialElement` but now not necessarily so. -- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. +- Implemented support in `SpatialData` for storing multiple tables. +- These tables can annotate a `SpatialElement` but now not necessarily so. +- Deprecated `.table` attribute in favor of `.tables` dict-like accessor. -- Added join operations -- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. -- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. +- Added join operations +- Added SQL like joins that can be executed by calling one public function `join_sdata_spatialelement_table`. The following joins are supported: `left`, `left_exclusive`, `right`, `right_exclusive` and `inner`. The function has an option to match rows. For `left` only matching `left` is supported and for `right` join only `right` matching of rows is supported. Not all joins are supported for `Labels` elements. +- Added function `match_element_to_table` which allows the user to perform a right join of `SpatialElement`(s) with a table with rows matching the row order in the table. -- Incremental IO of data and metadata: -- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. -- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. -- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` -- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` -- deprecated `save_transformations()` -- improved `__repr__()` with information on Zarr storage and Dask-backed files -- new utils `is_self_contained()`, `describe_elements_are_self_contained()` -- new utils `element_paths_in_memory()`, `element_paths_on_disk()` +- Incremental IO of data and metadata: +- Increased in-memory vs on-disk control: changes performed in-memory (e.g. adding a new image) are not automatically performed on-disk. +- Deprecated `add_image()`, `add_labels()`, `add_shapes()`, `add_points()` in favor of `.images`, `.labels`, `.shapes`, `.points` dict-like accessors. +- new methods `write_element()`, `write_transformations()`, `write_metadata()`, `remove_element_from_disk()` +- new methods `write_consolidated_metadata()` and `has_consolidated_metadata()` +- deprecated `save_transformations()` +- improved `__repr__()` with information on Zarr storage and Dask-backed files +- new utils `is_self_contained()`, `describe_elements_are_self_contained()` +- new utils `element_paths_in_memory()`, `element_paths_on_disk()` #### Minor -- Multiple table helper functions -- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. -- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation - metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. -- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. -- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. -- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. -- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. -- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. -- Added `table_name` parameter to the `get_values()` function. - -- Utils -- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. -- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. -- added `SpatialData.subset()` API -- added `SpatialData.locate_element()` API -- added utils function: `get_centroids()` -- added utils function: `deepcopy()` -- added operation: `to_circles()` -- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale - -- Transformations-related - - - added utils function: `transform_to_data_extent()` - - added utils function: `are_extents_equal()` - - added utils function: `postpone_transformation()` - - added utils function: `remove_transformations_to_coordinate_system()` - -- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` +- Multiple table helper functions +- Added public helper function `get_table_keys()` in `spatialdata.models` to retrieve annotation information of a given table. +- Added public helper function `check_target_region_column_symmetry()` in `spatialdata.models` to check whether annotation + metadata in `table.uns['spatialdata_attrs']` corresponds with respective columns in `table.obs`. +- Added function `validate_table_in_spatialdata()` in SpatialData to validate the annotation target of a table being present in the `SpatialData` object. +- Added method `get_annotated_regions()` in `SpatialData` to get the regions annotated by a given table. +- Added method `get_region_key_column()` in `SpatialData` to get the region_key column in table.obs. +- Added method `get_instance_key_column()` in `SpatialData` to get the instance_key column in table.obs. +- Added method `set_table_annotates_spatialelement()` in `SpatialData` to either set or change the annotation metadata of a table in a given `SpatialData` object. - Added `table_name` parameter to the `aggregate()` function to allow users to give a custom table name to table resulting from aggregation. +- Added `table_name` parameter to the `get_values()` function. + +- Utils +- Added `gen_spatial_elements()` generator in SpatialData to generate the `SpatialElements` in a given `SpatialData` object. +- Added `gen_elements` generator in `SpatialData` to generate elements of a `SpatialData` object including tables. +- added `SpatialData.subset()` API +- added `SpatialData.locate_element()` API +- added utils function: `get_centroids()` +- added utils function: `deepcopy()` +- added operation: `to_circles()` +- documented previously-added `get_channels()` to retrieve the channel names of a raster element indepently of it being single or multi-scale + +- Transformations-related + + - added utils function: `transform_to_data_extent()` + - added utils function: `are_extents_equal()` + - added utils function: `postpone_transformation()` + - added utils function: `remove_transformations_to_coordinate_system()` + +- added testing utilities: `assert_spatial_data_objects_are_identical()`, `assert_elements_are_identical()`, `assert_elements_dict_are_identical()` ### Changed/fixed #### Major -- refactored data loader for deep learning -- refactored `SpatialData.write()` to be more robust -- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 +- refactored data loader for deep learning +- refactored `SpatialData.write()` to be more robust +- generalized spatial queries to any combination of 2D/3D data and 2D/3D query region #409 #### Minor -- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. -- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth -- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` -- fixed wrong order of points after spatial queries +- Changed the string representation of `SpatialData` to reflect the changes in regard to multiple tables and incremental IO. +- improved usability and robustness of `sdata.write()` when `overwrite=True` @aeisenbarth +- fixed warnings for categorical dtypes in tables in `TableModel` and `PointsModel` +- fixed wrong order of points after spatial queries ## [0.0.14] - 2023-10-11 @@ -211,105 +211,105 @@ and this project adheres to [Semantic Versioning][]. #### Minor -- new API: sdata.rename_coordinate_systems() +- new API: sdata.rename_coordinate_systems() #### Technical -- decompose affine transformation into simpler transformations -- remove padding for blobs() +- decompose affine transformation into simpler transformations +- remove padding for blobs() #### Major -- get_extent() function to compute bounding box of the data +- get_extent() function to compute bounding box of the data #### Minor -- testing against pre-release packages +- testing against pre-release packages ### Fixed -- Fixed bug with get_values(): ignoring background channel in labels +- Fixed bug with get_values(): ignoring background channel in labels ## [0.0.13] - 2023-10-02 ### Added -- polygon_query() support for images #358 +- polygon_query() support for images #358 ### Fixed -- Fix missing c_coords argument in blobs multiscale #342 -- Replaced hardcoded string with instance_key #346 +- Fix missing c_coords argument in blobs multiscale #342 +- Replaced hardcoded string with instance_key #346 ## [0.0.12] - 2023-06-24 ### Added -- Add multichannel blobs sample data (by @melonora) +- Add multichannel blobs sample data (by @melonora) ## [0.0.11] - 2023-06-21 ### Improved -- Aggregation APIs. +- Aggregation APIs. ## [0.0.10] - 2023-06-06 ### Fixed -- Fix blobs (#282) +- Fix blobs (#282) ## [0.0.9] - 2023-05-23 ### Updated -- Update napari-spatialdata pin (#279) -- pin typing-extensions +- Update napari-spatialdata pin (#279) +- pin typing-extensions ## [0.0.8] - 2023-05-22 ### Merged -- Merge pull request #271 from scverse/fix/aggregation +- Merge pull request #271 from scverse/fix/aggregation ## [0.0.7] - 2023-05-20 ### Updated -- Update readme +- Update readme ## [0.0.6] - 2023-05-10 ### Added -- This release adds polygon spatial query. +- This release adds polygon spatial query. ## [0.0.5] - 2023-05-05 ### Fixed -- fix tests badge (#242) +- fix tests badge (#242) ## [0.0.4] - 2023-05-04 ### Tested -- This release tests distribution via pypi +- This release tests distribution via pypi ## [0.0.3] - 2023-05-02 ### Added -- This is an alpha release to test the release process. +- This is an alpha release to test the release process. ## [0.0.2] - 2023-05-02 ### Added -- make version dynamic +- make version dynamic ## [0.0.1.dev1] - 2023-03-25 ### Added -- Dev version, not official release yet +- Dev version, not official release yet diff --git a/README.md b/README.md index 7637ee7c..8b5acd0c 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ SpatialData is a data framework that comprises a FAIR storage format and a collection of python libraries for performant access, alignment, and processing of uni- and multi-modal spatial omics datasets. This repository contains the core spatialdata library. See the links below to learn more about other packages in the SpatialData ecosystem. -- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. -- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. -- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. +- [spatialdata-io](https://github.com/scverse/spatialdata-io): load data from common spatial omics technologies into spatialdata. +- [spatialdata-plot](https://github.com/scverse/spatialdata-plot): Static plotting library for spatialdata. +- [napari-spatialdata](https://github.com/scverse/napari-spatialdata): napari plugin for interactive exploration and annotation of spatial data. [//]: # "numfocus-fiscal-sponsor-attribution" @@ -32,16 +32,16 @@ The spatialdata project also received support by the Chan Zuckerberg Initiative. ![SpatialDataOverview](https://github.com/scverse/spatialdata/assets/1120672/cb91071f-12a7-4b8e-9430-2b3a0f65e52f) -- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. -- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. +- **The library is currently under review.** We expect there to be changes as the community provides feedback. We have an announcement channel for communicating these changes, please see the contact section below. +- The SpatialData storage format is built on top of the [OME-NGFF](https://ngff.openmicroscopy.org/latest/) specification. ## Getting started Please refer to the [documentation][link-docs]. In particular: -- [API documentation][link-api]. -- [Design doc][link-design-doc]. -- [Example notebooks][link-notebooks]. +- [API documentation][link-api]. +- [Design doc][link-design-doc]. +- [Example notebooks][link-notebooks]. Another useful resource to get started is the source code of the [`spatialdata-io`](https://github.com/scverse/spatialdata-io) package, which shows example of how to read data from common technologies. @@ -61,20 +61,20 @@ mamba install -c conda-forge spatialdata napari-spatialdata spatialdata-io spati ## Limitations -- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. +- Code only manually tested for Windows machines. Currently the framework is being developed using Linux, macOS and Windows machines, but it is automatically tested only for Linux and macOS machines. ## Contact To get involved in the discussion, or if you need help to get started, you are welcome to use the following options. -- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). -- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). -- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. -- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). +- Chat via [`scverse` Zulip](https://scverse.zulipchat.com/#narrow/stream/315824-spatial) (public or 1 to 1). +- Forum post in the [scverse discourse forum](https://discourse.scverse.org/). +- Bug report/feature request via the [GitHub issue tracker][issue-tracker]. +- Zoom call as part of the SpatialData Community Meetings, held every 2 weeks on Thursday, [schedule here](https://hackmd.io/enWU826vRai-JYaL7TZaSw). Finally, especially relevant for for developers that are building a library upon `spatialdata`, please follow this channel for: -- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). +- Announcements on new features and important changes [Zulip](https://imagesc.zulipchat.com/#narrow/stream/329057-scverse/topic/spatialdata.20announcements). ## Citation From 760bfb444f3760fa5197a2a864b327e6f778ec7e Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 16 Dec 2024 16:47:25 +0100 Subject: [PATCH 3/4] restore mypy not warning on unused ignores --- .mypy.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mypy.ini b/.mypy.ini index f658d6f6..77bf7465 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -5,7 +5,7 @@ plugins = numpy.typing.mypy_plugin ignore_errors = False warn_redundant_casts = True warn_unused_configs = True -warn_unused_ignores = True +warn_unused_ignores = False disallow_untyped_calls = False disallow_untyped_defs = True From 421183bf092bdbd241438224a27e335877c86149 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 16 Dec 2024 16:48:57 +0100 Subject: [PATCH 4/4] fix mypy --- src/spatialdata/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py index 53e61b8a..61f5a52c 100644 --- a/src/spatialdata/_utils.py +++ b/src/spatialdata/_utils.py @@ -57,7 +57,7 @@ def _affine_matrix_multiplication(matrix: ArrayLike, data: ArrayLike) -> ArrayLi offset_part = matrix[:-1, -1] result = data @ vector_part.T + offset_part assert result.shape[0] == data.shape[0] - return result + return result # type: ignore[no-any-return] def unpad_raster(raster: DataArray | DataTree) -> DataArray | DataTree: