From 35a8a02b4f1019a94c3af61e3b1d9b2484a66c51 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 19 Jan 2024 18:43:01 -0500 Subject: [PATCH] Add conversion between shapely and cf for polygons (#495) * Shapely polygons to and from cf * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix mypy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix mypy for real * Apply suggestions from code review Co-authored-by: Pascal Bourgault * Update the value in the dataset as well --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Pascal Bourgault --- cf_xarray/geometry.py | 170 ++++++++++++++++++++- cf_xarray/tests/test_geometry.py | 246 +++++++++++++++++++++++++++++-- 2 files changed, 397 insertions(+), 19 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index e06e2cf1..8794b876 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -75,9 +75,6 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None """ Convert a DataArray with shapely geometry objects into a CF-compliant dataset. - .. warning:: - Only point and line geometries are currently implemented. - Parameters ---------- geometries : sequence of shapely geometries or xarray.DataArray @@ -115,7 +112,7 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None elif types.issubset({"LineString", "MultiLineString"}): ds = lines_to_cf(geometries) elif types.issubset({"Polygon", "MultiPolygon"}): - raise NotImplementedError("Polygon geometry conversion is not implemented.") + ds = polygons_to_cf(geometries) else: raise ValueError( f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}" @@ -142,9 +139,6 @@ def cf_to_shapely(ds: xr.Dataset): """ Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable. - .. warning:: - Only point and line geometries are currently implemented. - Parameters ---------- ds : xr.Dataset @@ -168,7 +162,7 @@ def cf_to_shapely(ds: xr.Dataset): elif geom_type == "line": geometries = cf_to_lines(ds) elif geom_type == "polygon": - raise NotImplementedError("Polygon geometry conversion is not implemented.") + geometries = cf_to_polygons(ds) else: raise ValueError( f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}" @@ -430,3 +424,163 @@ def cf_to_lines(ds: xr.Dataset): geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) + + +def polygons_to_cf(polygons: xr.DataArray | Sequence): + """Convert an iterable of polygons (shapely.geometry.[Multi]Polygon) into a CF-compliant geometry dataset. + + Parameters + ---------- + polygons : sequence of shapely.geometry.Polygon or MultiPolygon + The sequence of [multi]polygons to translate to a CF dataset. + + Returns + ------- + xr.Dataset + A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container' + and optionally 'part_node_count'. + """ + from shapely import to_ragged_array + + if isinstance(polygons, xr.DataArray): + dim = polygons.dims[0] + coord = polygons[dim] if dim in polygons.coords else None + polygons_ = polygons.values + else: + dim = "index" + coord = None + polygons_ = np.array(polygons) + + _, arr, offsets = to_ragged_array(polygons_) + x = arr[:, 0] + y = arr[:, 1] + + part_node_count = np.diff(offsets[0]) + if len(offsets) == 1: + indices = offsets[0] + node_count = part_node_count + elif len(offsets) >= 2: + indices = np.take(offsets[0], offsets[1]) + interior_ring = np.isin(offsets[0], indices, invert=True)[:-1].astype(int) + + if len(offsets) == 3: + indices = np.take(indices, offsets[2]) + + node_count = np.diff(indices) + + geom_coords = arr.take(indices[:-1], 0) + crdX = geom_coords[:, 0] + crdY = geom_coords[:, 1] + + ds = xr.Dataset( + data_vars={ + "node_count": xr.DataArray(node_count, dims=(dim,)), + "interior_ring": xr.DataArray(interior_ring, dims=("part",)), + "part_node_count": xr.DataArray(part_node_count, dims=("part",)), + "geometry_container": xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "part_node_count": "part_node_count", + "interior_ring": "interior_ring", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + }, + coords={ + "x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}), + "y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}), + "crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}), + "crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}), + }, + ) + + if coord is not None: + ds = ds.assign_coords({dim: coord}) + + # Special case when we have no MultiPolygons and no holes + if len(ds.part_node_count) == len(ds.node_count): + ds = ds.drop_vars("part_node_count") + del ds.geometry_container.attrs["part_node_count"] + + # Special case when we have no holes + if (ds.interior_ring == 0).all(): + ds = ds.drop_vars("interior_ring") + del ds.geometry_container.attrs["interior_ring"] + return ds + + +def cf_to_polygons(ds: xr.Dataset): + """Convert polygon geometries stored in a CF-compliant way to shapely polygons stored in a single variable. + + Parameters + ---------- + ds : xr.Dataset + A dataset with CF-compliant polygon geometries. + Must have a "geometry_container" variable with at least a 'node_coordinates' attribute. + Must also have the two 1D variables listed by this attribute. + + Returns + ------- + geometry : xr.DataArray + A 1D array of shapely.geometry.[Multi]Polygon objects. + It has the same dimension as the ``part_node_count`` or the coordinates variables, or + ``'features'`` if those were not present in ``ds``. + """ + from shapely import GeometryType, from_ragged_array + + # Shorthand for convenience + geo = ds.geometry_container.attrs + + # The features dimension name, defaults to the one of 'part_node_count' + # or the dimension of the coordinates, if present. + feat_dim = None + if "coordinates" in geo: + xcoord_name, _ = geo["coordinates"].split(" ") + (feat_dim,) = ds[xcoord_name].dims + + x_name, y_name = geo["node_coordinates"].split(" ") + xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) + + node_count_name = geo.get("node_count") + part_node_count_name = geo.get("part_node_count", node_count_name) + interior_ring_name = geo.get("interior_ring") + + if node_count_name is None: + raise ValueError("'node_count' must be provided for polygon geometries") + else: + node_count = ds[node_count_name] + feat_dim = feat_dim or "index" + if feat_dim in ds.coords: + node_count = node_count.assign_coords({feat_dim: ds[feat_dim]}) + + # first get geometries for all the rings + part_node_count = ds[part_node_count_name] + offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0) + + if interior_ring_name is None: + offset2 = np.array(list(range(len(offset1)))) + else: + interior_ring = ds[interior_ring_name] + if not interior_ring[0] == 0: + raise ValueError("coordinate array must start with an exterior ring") + offset2 = np.append(np.where(interior_ring == 0)[0], [len(part_node_count)]) + + polygons = from_ragged_array(GeometryType.POLYGON, xy, offsets=(offset1, offset2)) + + # get index of offset2 values that are edges for node_count + offset3 = np.nonzero( + np.isin( + offset2, + np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0], + ) + )[0] + multipolygons = from_ragged_array( + GeometryType.MULTIPOLYGON, xy, offsets=(offset1, offset2, offset3) + ) + + # get items from polygons or multipolygons depending on number of parts + geoms = np.where(np.diff(offset3) == 1, polygons[offset3[:-1]], multipolygons) + + return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index f6fdf515..458355e9 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -126,6 +126,154 @@ def geometry_line_without_multilines_ds(): return cf_ds, shp_da +@pytest.fixture +def geometry_polygon_without_holes_ds(): + from shapely.geometry import Polygon + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + Polygon(([50, 0], [40, 15], [30, 0])), + Polygon(([70, 50], [60, 65], [50, 50])), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [50, 40, 30, 50, 70, 60, 50, 70], dims=("node",), attrs={"axis": "X"} + ), + y=xr.DataArray( + [0, 15, 0, 0, 50, 65, 50, 50], dims=("node",), attrs={"axis": "Y"} + ), + node_count=xr.DataArray([4, 4], dims=("index",)), + crd_x=xr.DataArray([50, 70], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0, 50], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + +@pytest.fixture +def geometry_polygon_without_multipolygons_ds(): + from shapely.geometry import Polygon + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + Polygon(([50, 0], [40, 15], [30, 0])), + Polygon( + ([70, 50], [60, 65], [50, 50]), + [ + ([55, 55], [60, 60], [65, 55]), + ], + ), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [50, 40, 30, 50, 70, 60, 50, 70, 55, 60, 65, 55], + dims=("node",), + attrs={"axis": "X"}, + ), + y=xr.DataArray( + [0, 15, 0, 0, 50, 65, 50, 50, 55, 60, 55, 55], + dims=("node",), + attrs={"axis": "Y"}, + ), + node_count=xr.DataArray([4, 8], dims=("index",)), + part_node_count=xr.DataArray([4, 4, 4], dims=("part",)), + interior_ring=xr.DataArray([0, 0, 1], dims=("part",)), + crd_x=xr.DataArray([50, 70], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0, 50], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "part_node_count": "part_node_count", + "interior_ring": "interior_ring", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + +@pytest.fixture +def geometry_polygon_ds(): + from shapely.geometry import MultiPolygon, Polygon + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + MultiPolygon( + [ + ( + ([20, 0], [10, 15], [0, 0]), + [ + ([5, 5], [10, 10], [15, 5]), + ], + ), + (([20, 20], [10, 35], [0, 20]),), + ] + ), + Polygon(([50, 0], [40, 15], [30, 0])), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [20, 10, 0, 20, 5, 10, 15, 5, 20, 10, 0, 20, 50, 40, 30, 50], + dims=("node",), + attrs={"axis": "X"}, + ), + y=xr.DataArray( + [0, 15, 0, 0, 5, 10, 5, 5, 20, 35, 20, 20, 0, 15, 0, 0], + dims=("node",), + attrs={"axis": "Y"}, + ), + node_count=xr.DataArray([12, 4], dims=("index",)), + part_node_count=xr.DataArray([4, 4, 4, 4], dims=("part",)), + interior_ring=xr.DataArray([0, 1, 0, 0], dims=("part",)), + crd_x=xr.DataArray([20, 50], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0, 0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "part_node_count": "part_node_count", + "interior_ring": "interior_ring", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + @requires_shapely def test_shapely_to_cf(geometry_ds): from shapely.geometry import Point @@ -192,6 +340,43 @@ def test_shapely_to_cf_for_lines_without_multilines( xr.testing.assert_identical(actual, expected) +@requires_shapely +def test_shapely_to_cf_for_polygons_as_da(geometry_polygon_ds): + expected, in_da = geometry_polygon_ds + + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + in_da = in_da.assign_coords(index=["a", "b"]) + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b"])) + + +@requires_shapely +def test_shapely_to_cf_for_polygons_as_sequence(geometry_polygon_ds): + expected, in_da = geometry_polygon_ds + actual = cfxr.shapely_to_cf(in_da.values) + xr.testing.assert_identical(actual, expected) + + +@requires_shapely +def test_shapely_to_cf_for_polygons_without_multipolygons( + geometry_polygon_without_multipolygons_ds, +): + expected, in_da = geometry_polygon_without_multipolygons_ds + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + +@requires_shapely +def test_shapely_to_cf_for_polygons_without_holes( + geometry_polygon_without_holes_ds, +): + expected, in_da = geometry_polygon_without_holes_ds + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + @requires_shapely def test_shapely_to_cf_errors(): from shapely.geometry import Point, Polygon @@ -199,13 +384,8 @@ def test_shapely_to_cf_errors(): geoms = [ Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]), Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]), + Point(1, 2), ] - with pytest.raises( - NotImplementedError, match="Polygon geometry conversion is not implemented" - ): - cfxr.shapely_to_cf(geoms) - - geoms.append(Point(1, 2)) with pytest.raises(ValueError, match="Mixed geometry types are not supported"): cfxr.shapely_to_cf(geoms) @@ -258,12 +438,51 @@ def test_cf_to_shapely_for_lines_without_multilines( @requires_shapely -def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds): - in_ds, _ = geometry_ds - in_ds.geometry_container.attrs["geometry_type"] = "polygon" - with pytest.raises(NotImplementedError, match="Polygon geometry"): - cfxr.cf_to_shapely(in_ds) +def test_cf_to_shapely_for_polygons(geometry_polygon_ds): + in_ds, expected = geometry_polygon_ds + + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + +@requires_shapely +def test_cf_to_shapely_for_polygons_without_multipolygons( + geometry_polygon_without_multipolygons_ds, +): + in_ds, expected = geometry_polygon_without_multipolygons_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + in_ds = in_ds.assign_coords(index=["b", "c"]) + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical( + actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"]) + ) + +@requires_shapely +def test_cf_to_shapely_for_polygons_without_holes( + geometry_polygon_without_holes_ds, +): + in_ds, expected = geometry_polygon_without_holes_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + in_ds = in_ds.assign_coords(index=["b", "c"]) + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical( + actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"]) + ) + + +@requires_shapely +def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds, geometry_polygon_ds): + in_ds, _ = geometry_ds in_ds.geometry_container.attrs["geometry_type"] = "punkt" with pytest.raises(ValueError, match="Valid CF geometry types are "): cfxr.cf_to_shapely(in_ds) @@ -273,6 +492,11 @@ def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds): with pytest.raises(ValueError, match="'node_count' must be provided"): cfxr.cf_to_shapely(in_ds) + in_ds, _ = geometry_polygon_ds + del in_ds.geometry_container.attrs["node_count"] + with pytest.raises(ValueError, match="'node_count' must be provided"): + cfxr.cf_to_shapely(in_ds) + @requires_shapely def test_reshape_unique_geometries(geometry_ds):