Skip to content

Commit

Permalink
Handle chunks of all null values
Browse files Browse the repository at this point in the history
  • Loading branch information
fbunt committed May 19, 2022
1 parent 0c43040 commit de4ed5f
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 7 deletions.
19 changes: 14 additions & 5 deletions raster_tools/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,9 @@ def to_vector(self):
dd.from_delayed(_vectorize(*chunk), meta=meta) for chunk in chunks
]
ddf = dd.concat(results)
return ddf
# Add astype() as a workaround for
# https://github.com/geopandas/dask-geopandas/issues/190
return ddf.astype({"band": int, "row": int, "col": int})


_XY_OFFSET_REMAP = {"ul": "ll", "ll": "ul", "ur": "lr", "lr": "ur"}
Expand Down Expand Up @@ -1083,10 +1085,17 @@ def _extract_values(data, mask):
@dask.delayed
def _vectorize(data, mask, cx, cy, band, crs, affine):
xpoints, ypoints = _extract_points(mask, cx, cy)
values = _extract_values(data, mask)
points = [Point(x, y) for x, y in zip(xpoints, ypoints)]
rows, cols = xy_to_rowcol(xpoints, ypoints, affine)
bands = [band] * len(values)
if len(xpoints):
values = _extract_values(data, mask)
points = [Point(x, y) for x, y in zip(xpoints, ypoints)]
rows, cols = xy_to_rowcol(xpoints, ypoints, affine)
bands = [band] * len(values)
else:
values = []
points = []
bands = []
rows = []
cols = []
df = gpd.GeoDataFrame(
{
"value": values,
Expand Down
23 changes: 21 additions & 2 deletions tests/test_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,8 +861,6 @@ def test_burn_mask():
assert rs.null_value == 999
true_state = data > 15
true_state = np.where(data >= 20, False, true_state)
print(rs.burn_mask()._values)
print(true_state)
assert np.allclose(rs.burn_mask(), true_state)


Expand Down Expand Up @@ -965,6 +963,27 @@ def test_to_vector():
assert np.all(ddf.columns == ["value", "band", "row", "col", "geometry"])
_compare_raster_to_vectorized(rs, df)

# make sure that empty (all-null) chunks are handled
data = np.array(
[
[
[0, 0, 1, 2],
[0, 0, 2, 2],
[0, 0, 1, 0],
[0, 1, 3, 0],
]
]
)
count = np.sum(data > 0)
rs = Raster(data).set_null_value(0)
rs.xrs.data = dask.array.rechunk(rs.xrs.data, (1, 2, 2))
rs._mask = dask.array.rechunk(rs._mask, (1, 2, 2))
ddf = rs.to_vector()
df = ddf.compute()

assert len(df) == count
_compare_raster_to_vectorized(rs, df)


if __name__ == "__main__":
unittest.main()

0 comments on commit de4ed5f

Please sign in to comment.