Skip to content

Commit

Permalink
fix for to_polygons when using processes instead of threads in dask (#…
Browse files Browse the repository at this point in the history
…756)

vectorize fix
  • Loading branch information
ArneDefauw authored Dec 10, 2024
1 parent d3cdf69 commit eb1d713
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions src/spatialdata/_core/operations/vectorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ def to_polygons(data: SpatialElement, buffer_resolution: int | None = None) -> G
"""
Convert a set of geometries (2D labels, 2D shapes) to approximated 2D polygons/multypolygons.
For optimal performance when converting rasters (:class:`xarray.DataArray` or :class:`datatree.DataTree`)
to polygons, it is recommended to configure `Dask` to use 'processes' rather than 'threads'.
For example, you can set this configuration with:
>>> import dask
>>> dask.config.set(scheduler='processes')
Parameters
----------
data
Expand Down Expand Up @@ -194,23 +201,22 @@ def _(
else:
element_single_scale = element

gdf_chunks = []
chunk_sizes = element_single_scale.data.chunks

def _vectorize_chunk(chunk: np.ndarray, yoff: int, xoff: int) -> None: # type: ignore[type-arg]
def _vectorize_chunk(chunk: np.ndarray, yoff: int, xoff: int) -> GeoDataFrame: # type: ignore[type-arg]
gdf = _vectorize_mask(chunk)
gdf["chunk-location"] = f"({yoff}, {xoff})"
gdf.geometry = gdf.translate(xoff, yoff)
gdf_chunks.append(gdf)
return gdf

tasks = [
dask.delayed(_vectorize_chunk)(chunk, sum(chunk_sizes[0][:iy]), sum(chunk_sizes[1][:ix]))
for iy, row in enumerate(element_single_scale.data.to_delayed())
for ix, chunk in enumerate(row)
]
dask.compute(tasks)

gdf = pd.concat(gdf_chunks)
results = dask.compute(*tasks)
gdf = pd.concat(results)
gdf = GeoDataFrame([_dissolve_on_overlaps(*item) for item in gdf.groupby("label")], columns=["label", "geometry"])
gdf.index = gdf["label"]

Expand Down

0 comments on commit eb1d713

Please sign in to comment.