Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve memory load efficiency for shape_availability calculation #243

Merged
merged 12 commits into from
Jun 14, 2022
Merged
2 changes: 2 additions & 0 deletions RELEASE_NOTES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ Release Notes
* Bugfix: Downsampling the availability matrix (high resolution to low resolution) failed. Only rasters with 0 or 1
were produced. Expected are also floats between 0 and 1 (GH Issue #238). Changing the rasterio version solved this.
See solution (https://github.com/PyPSA/atlite/pull/240).
* Breaking Change: Due to better performance and memory efficiency the method of matrix summation, as well as the matrix dtpyes within `shape_availability()` in `atlite.gis`, have been changed.
The returned object `masked` (numpy.array) is now dtype `bool` instead of `float64`. This can create broken workflows, if `masked` is not transformed ahead of certain operations (https://github.com/PyPSA/atlite/pull/243).
* Bugfix: Avoid NaN values into the hydro inflows

Version 0.2.7
Expand Down
38 changes: 22 additions & 16 deletions atlite/gis.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from collections import OrderedDict
from pathlib import Path
from warnings import warn
from warnings import warn, catch_warnings, simplefilter
from pyproj import CRS, Transformer
from shapely.ops import transform
from rasterio.warp import reproject, transform_bounds
Expand Down Expand Up @@ -426,15 +426,14 @@ def shape_availability(geometry, excluder):
Affine transform of the mask.

"""
exclusions = []
if not excluder.all_open:
excluder.open_files()
assert geometry.crs == excluder.crs

bounds = rio.features.bounds(geometry)
transform, shape = padded_transform_and_shape(bounds, res=excluder.res)
masked = geometry_mask(geometry, shape, transform).astype(int)
exclusions.append(masked)
masked = geometry_mask(geometry, shape, transform)
exclusions = masked

# For the following: 0 is eligible, 1 in excluded
raster = None
Expand All @@ -449,25 +448,28 @@ def shape_availability(geometry, excluder):
)
if d["codes"]:
if callable(d["codes"]):
masked_ = d["codes"](masked)
masked_ = d["codes"](masked).astype(bool)
else:
masked_ = isin(masked, d["codes"])
else:
masked_ = masked
masked_ = masked.astype(bool)

if d["invert"]:
masked_ = ~(masked_).astype(bool)
masked_ = ~masked_
if d["buffer"]:
iterations = int(d["buffer"] / excluder.res) + 1
masked_ = dilation(masked_, iterations=iterations).astype(int)
masked_ = dilation(masked_, iterations=iterations)

exclusions.append(masked_.astype(int))
exclusions = exclusions | masked_

for d in excluder.geometries:
masked = ~geometry_mask(d["geometry"], shape, transform, invert=d["invert"])
exclusions.append(masked.astype(int))
exclusions = exclusions | masked

return (sum(exclusions) == 0).astype(float), transform
warn(
"Output dtype of shape_availability changed from float to boolean.", UserWarning
)
return ~exclusions, transform
FabianHofmann marked this conversation as resolved.
Show resolved Hide resolved


def shape_availability_reprojected(
Expand Down Expand Up @@ -509,7 +511,7 @@ def shape_availability_reprojected(
masked, transform, dst_transform, excluder.crs, dst_crs
)
return rio.warp.reproject(
masked,
masked.astype(np.uint8),
empty(dst_shape),
resampling=rio.warp.Resampling.average,
src_transform=transform,
Expand All @@ -527,7 +529,9 @@ def _init_process(shapes_, excluder_, dst_transform_, dst_crs_, dst_shapes_):

def _process_func(i):
args = (excluder, dst_transform, dst_crs, dst_shapes)
return shape_availability_reprojected(shapes.loc[[i]], *args)[0]
with catch_warnings():
simplefilter("ignore")
return shape_availability_reprojected(shapes.loc[[i]], *args)[0]


def compute_availabilitymatrix(
Expand Down Expand Up @@ -586,9 +590,11 @@ def compute_availabilitymatrix(
desc="Compute availability matrix",
)
if nprocesses is None:
for i in tqdm(shapes.index, **tqdm_kwargs):
_ = shape_availability_reprojected(shapes.loc[[i]], *args)[0]
availability.append(_)
with catch_warnings():
simplefilter("ignore")
for i in tqdm(shapes.index, **tqdm_kwargs):
_ = shape_availability_reprojected(shapes.loc[[i]], *args)[0]
availability.append(_)
else:
assert (
excluder.all_closed
Expand Down