From de4ed5faa269ed53d40c2b0346e9323aa33adcf4 Mon Sep 17 00:00:00 2001 From: Fred Bunt Date: Thu, 19 May 2022 13:12:27 -0600 Subject: [PATCH] Handle chunks of all null values --- raster_tools/raster.py | 19 ++++++++++++++----- tests/test_raster.py | 23 +++++++++++++++++++++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/raster_tools/raster.py b/raster_tools/raster.py index 7c12323..60514a7 100644 --- a/raster_tools/raster.py +++ b/raster_tools/raster.py @@ -1024,7 +1024,9 @@ def to_vector(self): dd.from_delayed(_vectorize(*chunk), meta=meta) for chunk in chunks ] ddf = dd.concat(results) - return ddf + # Add astype() as a workaround for + # https://github.com/geopandas/dask-geopandas/issues/190 + return ddf.astype({"band": int, "row": int, "col": int}) _XY_OFFSET_REMAP = {"ul": "ll", "ll": "ul", "ur": "lr", "lr": "ur"} @@ -1083,10 +1085,17 @@ def _extract_values(data, mask): @dask.delayed def _vectorize(data, mask, cx, cy, band, crs, affine): xpoints, ypoints = _extract_points(mask, cx, cy) - values = _extract_values(data, mask) - points = [Point(x, y) for x, y in zip(xpoints, ypoints)] - rows, cols = xy_to_rowcol(xpoints, ypoints, affine) - bands = [band] * len(values) + if len(xpoints): + values = _extract_values(data, mask) + points = [Point(x, y) for x, y in zip(xpoints, ypoints)] + rows, cols = xy_to_rowcol(xpoints, ypoints, affine) + bands = [band] * len(values) + else: + values = [] + points = [] + bands = [] + rows = [] + cols = [] df = gpd.GeoDataFrame( { "value": values, diff --git a/tests/test_raster.py b/tests/test_raster.py index 38f95fe..7a7de9a 100644 --- a/tests/test_raster.py +++ b/tests/test_raster.py @@ -861,8 +861,6 @@ def test_burn_mask(): assert rs.null_value == 999 true_state = data > 15 true_state = np.where(data >= 20, False, true_state) - print(rs.burn_mask()._values) - print(true_state) assert np.allclose(rs.burn_mask(), true_state) @@ -965,6 +963,27 @@ def test_to_vector(): assert np.all(ddf.columns == ["value", "band", "row", "col", "geometry"]) _compare_raster_to_vectorized(rs, df) + # make sure that empty (all-null) chunks are handled + data = np.array( + [ + [ + [0, 0, 1, 2], + [0, 0, 2, 2], + [0, 0, 1, 0], + [0, 1, 3, 0], + ] + ] + ) + count = np.sum(data > 0) + rs = Raster(data).set_null_value(0) + rs.xrs.data = dask.array.rechunk(rs.xrs.data, (1, 2, 2)) + rs._mask = dask.array.rechunk(rs._mask, (1, 2, 2)) + ddf = rs.to_vector() + df = ddf.compute() + + assert len(df) == count + _compare_raster_to_vectorized(rs, df) + if __name__ == "__main__": unittest.main()