Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add conversion between shapely and cf for polygons #495

Merged
merged 7 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 162 additions & 8 deletions cf_xarray/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@
"""
Convert a DataArray with shapely geometry objects into a CF-compliant dataset.

.. warning::
Only point and line geometries are currently implemented.

Parameters
----------
geometries : sequence of shapely geometries or xarray.DataArray
Expand Down Expand Up @@ -115,7 +112,7 @@
elif types.issubset({"LineString", "MultiLineString"}):
ds = lines_to_cf(geometries)
elif types.issubset({"Polygon", "MultiPolygon"}):
raise NotImplementedError("Polygon geometry conversion is not implemented.")
ds = polygons_to_cf(geometries)
else:
raise ValueError(
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
Expand All @@ -142,9 +139,6 @@
"""
Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable.

.. warning::
Only point and line geometries are currently implemented.

Parameters
----------
ds : xr.Dataset
Expand All @@ -168,7 +162,7 @@
elif geom_type == "line":
geometries = cf_to_lines(ds)
elif geom_type == "polygon":
raise NotImplementedError("Polygon geometry conversion is not implemented.")
geometries = cf_to_polygons(ds)
else:
raise ValueError(
f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
Expand Down Expand Up @@ -430,3 +424,163 @@
geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines)

return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)


def polygons_to_cf(polygons: xr.DataArray | Sequence):
"""Convert an iterable of polygons (shapely.geometry.[Multi]Polygon) into a CF-compliant geometry dataset.

Parameters
----------
polygons : sequence of shapely.geometry.Polygon or MultiPolygon
The sequence of [multi]polygons to translate to a CF dataset.

Returns
-------
xr.Dataset
A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
and optionally 'part_node_count'.
"""
from shapely import to_ragged_array

if isinstance(polygons, xr.DataArray):
dim = polygons.dims[0]
coord = polygons[dim] if dim in polygons.coords else None
polygons_ = polygons.values
else:
dim = "index"
coord = None
polygons_ = np.array(polygons)

_, arr, offsets = to_ragged_array(polygons_)
x = arr[:, 0]
y = arr[:, 1]

part_node_count = np.diff(offsets[0])
if len(offsets) == 1:
indices = offsets[0]

Check warning on line 460 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L460

Added line #L460 was not covered by tests
node_count = part_node_count
elif len(offsets) >= 2:
indices = np.take(offsets[0], offsets[1])
interior_ring = np.isin(offsets[0], indices, invert=True)[:-1].astype(int)

if len(offsets) == 3:
indices = np.take(indices, offsets[2])

node_count = np.diff(indices)

geom_coords = arr.take(indices[:-1], 0)
crdX = geom_coords[:, 0]
crdY = geom_coords[:, 1]

ds = xr.Dataset(
data_vars={
"node_count": xr.DataArray(node_count, dims=(dim,)),
"interior_ring": xr.DataArray(interior_ring, dims=("part",)),
"part_node_count": xr.DataArray(part_node_count, dims=("part",)),

Check warning on line 479 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L477-L479

Added lines #L477 - L479 were not covered by tests
"geometry_container": xr.DataArray(
attrs={
"geometry_type": "polygon",
"node_count": "node_count",
"part_node_count": "part_node_count",
"interior_ring": "interior_ring",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
},
coords={
"x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}),
"y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}),
"crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}),
"crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}),

Check warning on line 495 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L492-L495

Added lines #L492 - L495 were not covered by tests
},
)

if coord is not None:
ds = ds.assign_coords({dim: coord})

# Special case when we have no MultiPolygons and no holes
if len(ds.part_node_count) == len(ds.node_count):
ds = ds.drop_vars("part_node_count")
del ds.geometry_container.attrs["part_node_count"]

# Special case when we have no holes
if (ds.interior_ring == 0).all():
ds = ds.drop_vars("interior_ring")
del ds.geometry_container.attrs["interior_ring"]
return ds


def cf_to_polygons(ds: xr.Dataset):
"""Convert polygon geometries stored in a CF-compliant way to shapely polygons stored in a single variable.

Parameters
----------
ds : xr.Dataset
A dataset with CF-compliant polygon geometries.
Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
Must also have the two 1D variables listed by this attribute.

Returns
-------
geometry : xr.DataArray
A 1D array of shapely.geometry.[Multi]Polygon objects.
It has the same dimension as the ``part_node_count`` or the coordinates variables, or
``'features'`` if those were not present in ``ds``.
"""
from shapely import GeometryType, from_ragged_array

# Shorthand for convenience
geo = ds.geometry_container.attrs

# The features dimension name, defaults to the one of 'part_node_count'
# or the dimension of the coordinates, if present.
feat_dim = None
if "coordinates" in geo:
xcoord_name, _ = geo["coordinates"].split(" ")
(feat_dim,) = ds[xcoord_name].dims

x_name, y_name = geo["node_coordinates"].split(" ")
xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)

node_count_name = geo.get("node_count")
part_node_count_name = geo.get("part_node_count", node_count_name)
interior_ring_name = geo.get("interior_ring")

if node_count_name is None:
raise ValueError("'node_count' must be provided for polygon geometries")
else:
node_count = ds[node_count_name]
feat_dim = feat_dim or "index"
if feat_dim in ds.coords:
node_count = node_count.assign_coords({feat_dim: ds[feat_dim]})

# first get geometries for all the rings
part_node_count = ds[part_node_count_name]
offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0)

if interior_ring_name is None:
offset2 = np.array(list(range(len(offset1))))
else:
interior_ring = ds[interior_ring_name]
if not interior_ring[0] == 0:
raise ValueError("coordinate array must start with an exterior ring")
offset2 = np.append(np.where(interior_ring == 0)[0], [len(part_node_count)])

polygons = from_ragged_array(GeometryType.POLYGON, xy, offsets=(offset1, offset2))

# get index of offset2 values that are edges for node_count
offset3 = np.nonzero(
np.isin(
offset2,
np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0],
)
)[0]
multipolygons = from_ragged_array(
GeometryType.MULTIPOLYGON, xy, offsets=(offset1, offset2, offset3)

Check warning on line 580 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L580

Added line #L580 was not covered by tests
)

# get items from polygons or multipolygons depending on number of parts
geoms = np.where(np.diff(offset3) == 1, polygons[offset3[:-1]], multipolygons)

return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)
Loading