diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f7bd04bc..af06fcb3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: autoupdate_schedule: quarterly repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -12,10 +12,14 @@ repos: - id: check-merge-conflict # Fix common spelling mistakes - repo: https://github.com/codespell-project/codespell - rev: v2.2.1 + rev: v2.3.0 hooks: - id: codespell - args: [--ignore-words-list=alos, --ignore-regex=\bnin\b] + args: [ + '--ignore-words-list', 'alos,inout,vor', + '--ignore-regex', '\bnin\b', + '--' + ] types_or: [python, rst, markdown] files: ^(geoutils|doc|tests)/ @@ -27,23 +31,24 @@ repos: # Format the code aggressively using black - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 24.10.0 hooks: - id: black args: [--line-length=120] # Lint the code using flake8 - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 7.1.1 hooks: - id: flake8 args: [ - --max-line-length=120, - --extend-ignore=E203, # flake8 disagrees with black, so this should be ignored. + '--max-line-length', '120', # we can write dicts however we want + '--extend-ignore', 'E203,B028', # flake8 disagrees with black, so this should be ignored. + '--' ] additional_dependencies: - - flake8-comprehensions==3.1.0 - - flake8-bugbear==21.3.2 + - flake8-comprehensions + - flake8-bugbear files: ^(geoutils|tests) # Lint the code using mypy - repo: https://github.com/pre-commit/mirrors-mypy @@ -63,26 +68,26 @@ repos: --disable-error-code=var-annotated, --disable-error-code=no-any-return ] - additional_dependencies: [tokenize-rt==3.2.0, numpy==1.22] + additional_dependencies: [tokenize-rt==3.2.0, numpy==1.26] files: ^(geoutils|tests) # Sort imports using isort - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort args: [ "--profile", "black" ] # Automatically upgrade syntax to a minimum version - repo: https://github.com/asottile/pyupgrade - rev: v3.1.0 + rev: v3.19.0 hooks: - id: pyupgrade args: [--py37-plus] # Various formattings - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.9.0 + rev: v1.10.0 hooks: # Single backticks should apparently not be used - id: rst-backticks @@ -101,7 +106,7 @@ repos: # Add custom regex lints (see .relint.yml) - repo: https://github.com/codingjoe/relint - rev: 2.0.0 + rev: 3.3.1 hooks: - id: relint - repo: local diff --git a/doc/source/background.md b/doc/source/background.md index f357fc2c..1855ea0b 100644 --- a/doc/source/background.md +++ b/doc/source/background.md @@ -51,7 +51,7 @@ In details, those mean: - **Reproducibility:** all code is version-controlled and release-based, to ensure consistency of dependent packages and works; -- **Open-source:** all code is accessible and re-usable to anyone in the community, for transparency and open governance. +- **Open-source:** all code is accessible and reusable to anyone in the community, for transparency and open governance. ```{note} :class: margin diff --git a/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py b/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py index 46de30d4..4a424662 100644 --- a/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py +++ b/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py @@ -35,6 +35,7 @@ ma2 = rast2.read(masked=True) ma_result = (1 + ma2) / (ma1_reproj) + # Equivalent of saving # (requires to define a logical # nodata for the data type) diff --git a/doc/source/conf.py b/doc/source/conf.py index cf8def54..c18f6b0b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -202,7 +202,7 @@ def setup(app): 'to update your code see here. ⚠️' "
Future changes will come with deprecation warnings! 🙂" ), - "show_toc_level": 3 + "show_toc_level": 3, # "logo_only": True, # "icon_links": [ # { diff --git a/doc/source/core_array_funcs.md b/doc/source/core_array_funcs.md index fba675ef..f982b3b9 100644 --- a/doc/source/core_array_funcs.md +++ b/doc/source/core_array_funcs.md @@ -33,7 +33,7 @@ matching georeferencing or shape, respectively. These functions inherently support the casting of different {attr}`~geoutils.Raster.dtype` and values masked by {attr}`~geoutils.Raster.nodata` in the {class}`~numpy.ma.MaskedArray`. -Below, we re-use the same example created in {ref}`core-py-ops`. +Below, we reuse the same example created in {ref}`core-py-ops`. ```{code-cell} ipython3 :tags: [hide-input, hide-output] diff --git a/doc/source/georeferencing.md b/doc/source/georeferencing.md index 08fd84e4..88841b86 100644 --- a/doc/source/georeferencing.md +++ b/doc/source/georeferencing.md @@ -124,7 +124,7 @@ vect.get_footprint_projected(vect.crs).plot() ### Grid (only for rasters) A raster's grid origin and resolution are defined by its geotransform attribute, {attr}`~geoutils.Raster.transform`. -Comined with the 2D shape of the data array {attr}`~geoutils.Raster.shape` (and independently of the number of +Combined with the 2D shape of the data array {attr}`~geoutils.Raster.shape` (and independently of the number of bands {attr}`~geoutils.Raster.bands`), these two attributes define the georeferenced grid of a raster. From it are derived the resolution {attr}`~geoutils.Raster.res`, and {attr}`~geoutils.Raster.height` and diff --git a/doc/source/sphinxext.py b/doc/source/sphinxext.py index 05cec930..53ff95a8 100644 --- a/doc/source/sphinxext.py +++ b/doc/source/sphinxext.py @@ -1,4 +1,6 @@ """Functions for documentation configuration only, importable by sphinx""" + + # To reset resolution setting for each sphinx-gallery example def reset_mpl(gallery_conf, fname): # To get a good resolution for displayed figures diff --git a/examples/analysis/array_numerics/numpy_interfacing.py b/examples/analysis/array_numerics/numpy_interfacing.py index 98adf3d2..fde9ecb8 100644 --- a/examples/analysis/array_numerics/numpy_interfacing.py +++ b/examples/analysis/array_numerics/numpy_interfacing.py @@ -4,6 +4,7 @@ This example demonstrates NumPy interfacing with rasters on :class:`Rasters`. See :ref:`core-array-funcs` for more details. """ + # %% # We open a raster. diff --git a/examples/analysis/array_numerics/python_arithmetic.py b/examples/analysis/array_numerics/python_arithmetic.py index 1ae11bcb..207460f8 100644 --- a/examples/analysis/array_numerics/python_arithmetic.py +++ b/examples/analysis/array_numerics/python_arithmetic.py @@ -4,6 +4,7 @@ This example demonstrates arithmetic operations using raster arithmetic on :class:`Rasters`. See :ref:`core-py-ops` for more details. """ + # %% # We open a raster diff --git a/examples/analysis/geospatial/buffer_voronoi.py b/examples/analysis/geospatial/buffer_voronoi.py index 554f43bf..343b1e13 100644 --- a/examples/analysis/geospatial/buffer_voronoi.py +++ b/examples/analysis/geospatial/buffer_voronoi.py @@ -4,6 +4,7 @@ This example demonstrates the metric buffering of a vector using :func:`~geoutils.Vector.buffer_metric` and :func:`~geoutils.Vector.buffer_without_overlap`. """ + # %% # We open an example vector diff --git a/examples/analysis/geospatial/proximity_metric.py b/examples/analysis/geospatial/proximity_metric.py index c73964be..98afbf8b 100644 --- a/examples/analysis/geospatial/proximity_metric.py +++ b/examples/analysis/geospatial/proximity_metric.py @@ -4,6 +4,7 @@ This example demonstrates the calculation of proximity distances to a raster or vector using :func:`~geoutils.Raster.proximity`. """ + # %% # We open an example raster, and a vector for which we select a single feature diff --git a/examples/analysis/point_extraction/interpolation.py b/examples/analysis/point_extraction/interpolation.py index 5682b717..05450692 100644 --- a/examples/analysis/point_extraction/interpolation.py +++ b/examples/analysis/point_extraction/interpolation.py @@ -4,6 +4,7 @@ This example demonstrates the 2D interpolation of raster values to points using :func:`~geoutils.Raster.interp_points`. """ + # %% # We open an example raster, a digital elevation model in South America. diff --git a/examples/analysis/point_extraction/reduction.py b/examples/analysis/point_extraction/reduction.py index 533c2d5d..c2ea4028 100644 --- a/examples/analysis/point_extraction/reduction.py +++ b/examples/analysis/point_extraction/reduction.py @@ -4,6 +4,7 @@ This example demonstrates the reduction of windowed raster values around a point using :func:`~geoutils.Raster.value_at_coords`. """ + # %% # We open an example raster, a digital elevation model in South America. diff --git a/examples/handling/georeferencing/crop_raster.py b/examples/handling/georeferencing/crop_raster.py index 369b8380..3c0efeb5 100644 --- a/examples/handling/georeferencing/crop_raster.py +++ b/examples/handling/georeferencing/crop_raster.py @@ -4,6 +4,7 @@ This example demonstrates the cropping of a raster using :func:`geoutils.Raster.crop`. """ + # %% # We open a raster and vector, and subset the latter. diff --git a/examples/handling/georeferencing/crop_vector.py b/examples/handling/georeferencing/crop_vector.py index 65587bb7..604c3074 100644 --- a/examples/handling/georeferencing/crop_vector.py +++ b/examples/handling/georeferencing/crop_vector.py @@ -4,6 +4,7 @@ This example demonstrates the cropping of a vector using :func:`geoutils.Vector.crop`. """ + # %% # We open a raster and vector. diff --git a/examples/handling/georeferencing/reproj_raster.py b/examples/handling/georeferencing/reproj_raster.py index 924ddb91..8cf4db25 100644 --- a/examples/handling/georeferencing/reproj_raster.py +++ b/examples/handling/georeferencing/reproj_raster.py @@ -4,6 +4,7 @@ This example demonstrates the reprojection of a raster using :func:`geoutils.Raster.reproject`. """ + # %% # We open two example rasters. diff --git a/examples/handling/georeferencing/reproj_vector.py b/examples/handling/georeferencing/reproj_vector.py index aca9782a..e529ab31 100644 --- a/examples/handling/georeferencing/reproj_vector.py +++ b/examples/handling/georeferencing/reproj_vector.py @@ -4,6 +4,7 @@ This example demonstrates the reprojection of a vector using :func:`geoutils.Vector.reproject`. """ + # %% # We open a raster and vector. diff --git a/examples/handling/interface/create_mask.py b/examples/handling/interface/create_mask.py index 8657009a..35495608 100644 --- a/examples/handling/interface/create_mask.py +++ b/examples/handling/interface/create_mask.py @@ -4,6 +4,7 @@ This example demonstrates the creation of a mask from a vector using :func:`geoutils.Vector.create_mask`. """ + # %% # We open a raster and vector. diff --git a/examples/handling/interface/polygonize.py b/examples/handling/interface/polygonize.py index ea8d1280..8f3b8f1b 100644 --- a/examples/handling/interface/polygonize.py +++ b/examples/handling/interface/polygonize.py @@ -4,6 +4,7 @@ This example demonstrates the polygonizing of a raster using :func:`geoutils.Raster.polygonize` and :func:`geoutils.Mask.polygonize`. """ + # %% # We open a raster. diff --git a/examples/handling/interface/rasterize.py b/examples/handling/interface/rasterize.py index a8bb1a91..b4517d96 100644 --- a/examples/handling/interface/rasterize.py +++ b/examples/handling/interface/rasterize.py @@ -4,6 +4,7 @@ This example demonstrates the rasterizing of a vector using :func:`geoutils.Vector.rasterize`. """ + # %% # We open a raster and vector. diff --git a/examples/handling/interface/topoints.py b/examples/handling/interface/topoints.py index fcf3df54..1b758584 100644 --- a/examples/handling/interface/topoints.py +++ b/examples/handling/interface/topoints.py @@ -4,6 +4,7 @@ This example demonstrates the conversion of a raster to point vector using :func:`geoutils.Raster.to_points`. """ + # %% # We open a raster. diff --git a/geoutils/_config.py b/geoutils/_config.py index 7ecb9830..8fe35ebc 100644 --- a/geoutils/_config.py +++ b/geoutils/_config.py @@ -1,4 +1,5 @@ """Setup of runtime-compile configuration of GeoUtils.""" + from __future__ import annotations import configparser diff --git a/geoutils/_typing.py b/geoutils/_typing.py index d8aacec9..ecf21256 100644 --- a/geoutils/_typing.py +++ b/geoutils/_typing.py @@ -1,4 +1,5 @@ """Typing aliases for internal use.""" + from __future__ import annotations import sys diff --git a/geoutils/examples.py b/geoutils/examples.py index 1662e53a..ed315608 100644 --- a/geoutils/examples.py +++ b/geoutils/examples.py @@ -1,4 +1,5 @@ """Utility functions to download and find example data.""" + import os import tarfile import tempfile diff --git a/geoutils/interface/__init__.py b/geoutils/interface/__init__.py new file mode 100644 index 00000000..78379c95 --- /dev/null +++ b/geoutils/interface/__init__.py @@ -0,0 +1,5 @@ +from geoutils.interface.distance import * # noqa +from geoutils.interface.gridding import * # noqa +from geoutils.interface.interpolate import * # noqa +from geoutils.interface.raster_point import * # noqa +from geoutils.interface.raster_vector import * # noqa diff --git a/geoutils/interface/distance.py b/geoutils/interface/distance.py new file mode 100644 index 00000000..c07f8fd1 --- /dev/null +++ b/geoutils/interface/distance.py @@ -0,0 +1,88 @@ +"""Functionalities related to distance operations.""" + +from __future__ import annotations + +import warnings +from typing import Literal + +import geopandas as gpd +import numpy as np +from scipy.ndimage import distance_transform_edt + +import geoutils as gu +from geoutils._typing import NDArrayNum + + +def _proximity_from_vector_or_raster( + raster: gu.Raster, + vector: gu.Vector | None = None, + target_values: list[float] | None = None, + geometry_type: str = "boundary", + in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both", + distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced", +) -> NDArrayNum: + """ + (This function is defined here as mostly raster-based, but used in a class method for both Raster and Vector) + Proximity to a Raster's target values if no Vector is provided, otherwise to a Vector's geometry type + rasterized on the Raster. + + :param raster: Raster to burn the proximity grid on. + :param vector: Vector for which to compute the proximity to geometry, + if not provided computed on the Raster target pixels. + :param target_values: (Only with a Raster) List of target values to use for the proximity, + defaults to all non-zero values. + :param geometry_type: (Only with a Vector) Type of geometry to use for the proximity, defaults to 'boundary'. + :param in_or_out: (Only with a Vector) Compute proximity only 'in' or 'out'-side the geometry, or 'both'. + :param distance_unit: Distance unit, either 'georeferenced' or 'pixel'. + """ + + # 1/ First, if there is a vector input, we rasterize the geometry type + # (works with .boundary that is a LineString (.exterior exists, but is a LinearRing) + if vector is not None: + + # TODO: Only when using centroid... Maybe we should leave this operation to the user anyway? + warnings.filterwarnings("ignore", message="Geometry is in a geographic CRS.*") + + # We create a geodataframe with the geometry type + boundary_shp = gpd.GeoDataFrame(geometry=vector.ds.__getattr__(geometry_type), crs=vector.crs) + # We mask the pixels that make up the geometry type + mask_boundary = gu.Vector(boundary_shp).create_mask(raster, as_array=True) + + else: + # We mask target pixels + if target_values is not None: + mask_boundary = np.logical_or.reduce([raster.get_nanarray() == target_val for target_val in target_values]) + # Otherwise, all non-zero values are considered targets + else: + mask_boundary = raster.get_nanarray().astype(bool) + + # 2/ Now, we compute the distance matrix relative to the masked geometry type + if distance_unit.lower() == "georeferenced": + sampling: int | tuple[float | int, float | int] = raster.res + elif distance_unit.lower() == "pixel": + sampling = 1 + else: + raise ValueError('Distance unit must be either "georeferenced" or "pixel".') + + # If not all pixels are targets, then we compute the distance + non_targets = np.count_nonzero(mask_boundary) + if non_targets > 0: + proximity = distance_transform_edt(~mask_boundary, sampling=sampling) + # Otherwise, pass an array full of nodata + else: + proximity = np.ones(np.shape(mask_boundary)) * np.nan + + # 3/ If there was a vector input, apply the in_and_out argument to optionally mask inside/outside + if vector is not None: + if in_or_out == "both": + pass + elif in_or_out in ["in", "out"]: + mask_polygon = gu.Vector(vector.ds).create_mask(raster, as_array=True) + if in_or_out == "in": + proximity[~mask_polygon] = 0 + else: + proximity[mask_polygon] = 0 + else: + raise ValueError('The type of proximity must be one of "in", "out" or "both".') + + return proximity diff --git a/geoutils/pointcloud.py b/geoutils/interface/gridding.py similarity index 98% rename from geoutils/pointcloud.py rename to geoutils/interface/gridding.py index c7e521fd..20e71821 100644 --- a/geoutils/pointcloud.py +++ b/geoutils/interface/gridding.py @@ -1,4 +1,4 @@ -"""Module for point cloud manipulation.""" +"""Functionalities for gridding points (point cloud to raster).""" import warnings from typing import Literal diff --git a/geoutils/raster/interpolate.py b/geoutils/interface/interpolate.py similarity index 98% rename from geoutils/raster/interpolate.py rename to geoutils/interface/interpolate.py index 153d9955..2824bd39 100644 --- a/geoutils/raster/interpolate.py +++ b/geoutils/interface/interpolate.py @@ -1,3 +1,5 @@ +"""Functionalities for interpolating a regular grid at points (raster to point cloud).""" + from __future__ import annotations from typing import Any, Callable, Literal, overload @@ -195,8 +197,7 @@ def _interp_points( *, return_interpolator: Literal[False] = False, **kwargs: Any, -) -> NDArrayNum: - ... +) -> NDArrayNum: ... @overload @@ -212,8 +213,7 @@ def _interp_points( *, return_interpolator: Literal[True], **kwargs: Any, -) -> Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]: - ... +) -> Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]: ... @overload @@ -229,8 +229,7 @@ def _interp_points( *, return_interpolator: bool = False, **kwargs: Any, -) -> NDArrayNum | Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]: - ... +) -> NDArrayNum | Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]: ... def _interp_points( diff --git a/geoutils/interface/raster_point.py b/geoutils/interface/raster_point.py new file mode 100644 index 00000000..594e81a8 --- /dev/null +++ b/geoutils/interface/raster_point.py @@ -0,0 +1,243 @@ +"""Functionalities at the interface of rasters and point clouds.""" + +from __future__ import annotations + +from typing import Iterable, Literal + +import affine +import geopandas as gpd +import numpy as np +import rasterio as rio +from rasterio.crs import CRS + +import geoutils as gu +from geoutils._typing import NDArrayNum +from geoutils.raster.array import _get_mask_from_array +from geoutils.raster.georeferencing import _default_nodata, _xy2ij +from geoutils.raster.sampling import subsample_array + + +def _regular_pointcloud_to_raster( + pointcloud: gpd.GeoDataFrame, + grid_coords: tuple[NDArrayNum, NDArrayNum] = None, + transform: rio.transform.Affine = None, + shape: tuple[int, int] = None, + nodata: int | float | None = None, + data_column_name: str = "b1", + area_or_point: Literal["Area", "Point"] = "Point", +) -> tuple[NDArrayNum, affine.Affine, CRS, int | float | None, Literal["Area", "Point"]]: + """ + Convert a regular point cloud to a raster. See Raster.from_pointcloud_regular() for details. + """ + + # Get transform and shape from input + if grid_coords is not None: + + # Input checks + if ( + not isinstance(grid_coords, tuple) + or not (isinstance(grid_coords[0], np.ndarray) and grid_coords[0].ndim == 1) + or not (isinstance(grid_coords[1], np.ndarray) and grid_coords[1].ndim == 1) + ): + raise TypeError("Input grid coordinates must be 1D arrays.") + + diff_x = np.diff(grid_coords[0]) + diff_y = np.diff(grid_coords[1]) + + if not all(diff_x == diff_x[0]) and all(diff_y == diff_y[0]): + raise ValueError("Grid coordinates must be regular (equally spaced, independently along X and Y).") + + # Build transform from min X, max Y and step in both + out_transform = rio.transform.from_origin(np.min(grid_coords[0]), np.max(grid_coords[1]), diff_x[0], diff_y[0]) + # Y is first axis, X is second axis + out_shape = (len(grid_coords[1]), len(grid_coords[0])) + + elif transform is not None and shape is not None: + + out_transform = transform + out_shape = shape + + else: + raise ValueError("Either grid coordinates or both geotransform and shape must be provided.") + + # Create raster from inputs, with placeholder data for now + dtype = pointcloud[data_column_name].dtype + out_nodata = nodata if not None else _default_nodata(dtype) + arr = np.ones(out_shape, dtype=dtype) + + # Get indexes of point cloud coordinates in the raster, forcing no shift + i, j = _xy2ij( + x=pointcloud.geometry.x.values, + y=pointcloud.geometry.y.values, + shift_area_or_point=False, + transform=out_transform, + area_or_point=area_or_point, + ) + + # If coordinates are not integer type (forced in xy2ij), then some points are not falling on exact coordinates + if not np.issubdtype(i.dtype, np.integer) or not np.issubdtype(i.dtype, np.integer): + raise ValueError("Some point cloud coordinates differ from the grid coordinates.") + + # Set values + mask = np.ones(np.shape(arr), dtype=bool) + mask[i, j] = False + arr[i, j] = pointcloud[data_column_name].values + + # Set output values + raster_arr = np.ma.masked_array(data=arr, mask=mask) + + return raster_arr, out_transform, pointcloud.crs, out_nodata, area_or_point + + +def _raster_to_pointcloud( + source_raster: gu.Raster, + data_column_name: str, + data_band: int, + auxiliary_data_bands: list[int] | None, + auxiliary_column_names: list[str] | None, + subsample: float | int, + skip_nodata: bool, + as_array: bool, + random_state: int | np.random.Generator | None, + force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"], +) -> NDArrayNum | gu.Vector: + """ + Convert a raster to a point cloud. See Raster.to_pointcloud() for details. + """ + + # Input checks + + # Main data column checks + if not isinstance(data_column_name, str): + raise ValueError("Data column name must be a string.") + if not (isinstance(data_band, int) and data_band >= 1 and data_band <= source_raster.count): + raise ValueError( + f"Data band number must be an integer between 1 and the total number of bands ({source_raster.count})." + ) + + # Rename data column if a different band is selected but the name is still default + if data_band != 1 and data_column_name == "b1": + data_column_name = "b" + str(data_band) + + # Auxiliary data columns checks + if auxiliary_column_names is not None and auxiliary_data_bands is None: + raise ValueError("Passing auxiliary column names requires passing auxiliary data band numbers as well.") + if auxiliary_data_bands is not None: + if not (isinstance(auxiliary_data_bands, Iterable) and all(isinstance(b, int) for b in auxiliary_data_bands)): + raise ValueError("Auxiliary data band number must be an iterable containing only integers.") + if any((1 > b or source_raster.count < b) for b in auxiliary_data_bands): + raise ValueError( + f"Auxiliary data band numbers must be between 1 and the total number of bands ({source_raster.count})." + ) + if data_band in auxiliary_data_bands: + raise ValueError( + f"Main data band {data_band} should not be listed in auxiliary data bands {auxiliary_data_bands}." + ) + + # Ensure auxiliary column name is defined if auxiliary data bands is not None + if auxiliary_column_names is not None: + if not ( + isinstance(auxiliary_column_names, Iterable) and all(isinstance(b, str) for b in auxiliary_column_names) + ): + raise ValueError("Auxiliary column names must be an iterable containing only strings.") + if not len(auxiliary_column_names) == len(auxiliary_data_bands): + raise ValueError( + f"Length of auxiliary column name and data band numbers should be the same, " + f"found {len(auxiliary_column_names)} and {len(auxiliary_data_bands)} respectively." + ) + + else: + auxiliary_column_names = [f"b{i}" for i in auxiliary_data_bands] + + # Define bigger list with all bands and names + all_bands = [data_band] + auxiliary_data_bands + all_column_names = [data_column_name] + auxiliary_column_names + + else: + all_bands = [data_band] + all_column_names = [data_column_name] + + # If subsample is the entire array, load it to optimize speed + if subsample == 1 and not source_raster.is_loaded: + source_raster.load(bands=all_bands) + + # Band indexes in the array are band number minus one + all_indexes = [b - 1 for b in all_bands] + + # We do 2D subsampling on the data band only, regardless of valid masks on other bands + if skip_nodata: + if source_raster.is_loaded: + if source_raster.count == 1: + self_mask = _get_mask_from_array( + source_raster.data + ) # This is to avoid the case where the mask is just "False" + else: + self_mask = _get_mask_from_array( + source_raster.data[data_band - 1, :, :] + ) # This is to avoid the case where the mask is just "False" + valid_mask = ~self_mask + + # Load only mask of valid data from disk if array not loaded + else: + valid_mask = ~source_raster._load_only_mask(bands=data_band) + # If we are not skipping nodata values, valid mask is everywhere + else: + if source_raster.count == 1: + valid_mask = np.ones(source_raster.data.shape, dtype=bool) + else: + valid_mask = np.ones(source_raster.data[0, :].shape, dtype=bool) + + # Get subsample on valid mask + # Build a low memory boolean masked array with invalid values masked to pass to subsampling + ma_valid = np.ma.masked_array(data=np.ones(np.shape(valid_mask), dtype=bool), mask=~valid_mask) + # Take a subsample within the valid values + indices = subsample_array(array=ma_valid, subsample=subsample, random_state=random_state, return_indices=True) + + # If the Raster is loaded, pick from the data while ignoring the mask + if source_raster.is_loaded: + if source_raster.count == 1: + pixel_data = source_raster.data[indices[0], indices[1]] + else: + # TODO: Combining both indexes at once could reduce memory usage? + pixel_data = source_raster.data[all_indexes, :][:, indices[0], indices[1]] + + # Otherwise use rasterio.sample to load only requested pixels + else: + # Extract the coordinates at subsampled pixels with valid data + # To extract data, we always use "upper left" which rasterio interprets as the exact raster coordinates + # Further below we redefine output coordinates based on point interpretation + x_coords, y_coords = (np.array(a) for a in source_raster.ij2xy(indices[0], indices[1], force_offset="ul")) + + with rio.open(source_raster.filename) as raster: + # Rasterio uses indexes (starts at 1) + pixel_data = np.array(list(raster.sample(zip(x_coords, y_coords), indexes=all_bands))).T + + # At this point there should not be any nodata anymore, so we can transform everything to normal array + if np.ma.isMaskedArray(pixel_data): + pixel_data = pixel_data.data + + # If nodata values were not skipped, convert them to NaNs and change data type + if skip_nodata is False: + pixel_data = pixel_data.astype("float32") + pixel_data[pixel_data == source_raster.nodata] = np.nan + + # Now we force the coordinates we define for the point cloud, according to pixel interpretation + x_coords_2, y_coords_2 = ( + np.array(a) for a in source_raster.ij2xy(indices[0], indices[1], force_offset=force_pixel_offset) + ) + + if not as_array: + points = gu.Vector( + gpd.GeoDataFrame( + pixel_data.T, + columns=all_column_names, + geometry=gpd.points_from_xy(x_coords_2, y_coords_2), + crs=source_raster.crs, + ) + ) + return points + else: + # Merge the coordinates and pixel data an array of N x K + # This has the downside of converting all the data to the same data type + points_arr = np.vstack((x_coords_2.reshape(1, -1), y_coords_2.reshape(1, -1), pixel_data)).T + return points_arr diff --git a/geoutils/interface/raster_vector.py b/geoutils/interface/raster_vector.py new file mode 100644 index 00000000..8d90f3fc --- /dev/null +++ b/geoutils/interface/raster_vector.py @@ -0,0 +1,257 @@ +"""Functionalities at the interface of rasters and vectors.""" + +from __future__ import annotations + +import warnings +from typing import Any, Iterable, Literal + +import affine +import geopandas as gpd +import numpy as np +import rasterio as rio +from rasterio import features, warp +from rasterio.crs import CRS +from rasterio.features import shapes + +import geoutils as gu +from geoutils._typing import NDArrayBool, NDArrayNum, Number + + +def _polygonize( + source_raster: gu.Raster, + target_values: Number | tuple[Number, Number] | list[Number] | NDArrayNum | Literal["all"], + data_column_name: str, +) -> gu.Vector: + """Polygonize a raster. See Raster.polygonize() for details.""" + + # Mask a unique value set by a number + if isinstance(target_values, (int, float, np.integer, np.floating)): + if np.sum(source_raster.data == target_values) == 0: + raise ValueError(f"no pixel with in_value {target_values}") + + bool_msk = np.array(source_raster.data == target_values).astype(np.uint8) + + # Mask values within boundaries set by a tuple + elif isinstance(target_values, tuple): + if np.sum((source_raster.data > target_values[0]) & (source_raster.data < target_values[1])) == 0: + raise ValueError(f"no pixel with in_value between {target_values[0]} and {target_values[1]}") + + bool_msk = ((source_raster.data > target_values[0]) & (source_raster.data < target_values[1])).astype(np.uint8) + + # Mask specific values set by a sequence + elif isinstance(target_values, list) or isinstance(target_values, np.ndarray): + if np.sum(np.isin(source_raster.data, np.array(target_values))) == 0: + raise ValueError("no pixel with in_value " + ", ".join(map("{}".format, target_values))) + + bool_msk = np.isin(source_raster.data, np.array(target_values)).astype("uint8") + + # Mask all valid values + elif target_values == "all": + # Using getmaskarray is necessary in case .data.mask is nomask (False) + bool_msk = (~np.ma.getmaskarray(source_raster.data)).astype("uint8") + + else: + raise ValueError("in_value must be a number, a tuple or a sequence") + + # GeoPandas.from_features() only supports certain dtypes, we find the best common dtype to optimize memory usage + # TODO: this should be a function independent of polygonize, reused in several places + gpd_dtypes = ["uint8", "uint16", "int16", "int32", "float32"] + list_common_dtype_index = [] + for gpd_type in gpd_dtypes: + polygonize_dtype = np.promote_types(gpd_type, source_raster.dtype) + if str(polygonize_dtype) in gpd_dtypes: + list_common_dtype_index.append(gpd_dtypes.index(gpd_type)) + if len(list_common_dtype_index) == 0: + final_dtype = "float32" + else: + final_dtype_index = min(list_common_dtype_index) + final_dtype = gpd_dtypes[final_dtype_index] + + results = ( + {"properties": {"raster_value": v}, "geometry": s} + for i, (s, v) in enumerate( + shapes(source_raster.data.astype(final_dtype), mask=bool_msk, transform=source_raster.transform) + ) + ) + + gdf = gpd.GeoDataFrame.from_features(list(results)) + gdf.insert(0, data_column_name, range(0, 0 + len(gdf))) + gdf = gdf.set_geometry(col="geometry") + gdf = gdf.set_crs(source_raster.crs) + + return gu.Vector(gdf) + + +def _rasterize( + gdf: gpd.GeoDataFrame, + raster: gu.Raster | None = None, + crs: CRS | int | None = None, + xres: float | None = None, + yres: float | None = None, + bounds: tuple[float, float, float, float] | None = None, + in_value: int | float | Iterable[int | float] | None = None, + out_value: int | float = 0, +) -> gu.Raster: + if (raster is not None) and (crs is not None): + raise ValueError("Only one of raster or crs can be provided.") + + # Reproject vector into requested CRS or rst CRS first, if needed + # This has to be done first so that width/height calculated below are correct! + if crs is None: + crs = gdf.crs + + if raster is not None: + crs = raster.crs # type: ignore + + vect = gdf.to_crs(crs) + + # If no raster given, now use provided dimensions + if raster is None: + # At minimum, xres must be set + if xres is None: + raise ValueError("At least raster or xres must be set.") + if yres is None: + yres = xres + + # By default, use self's bounds + if bounds is None: + bounds = vect.total_bounds + + # Calculate raster shape + left, bottom, right, top = bounds + width = abs((right - left) / xres) + height = abs((top - bottom) / yres) + + if width % 1 != 0 or height % 1 != 0: + warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.") + + width = int(np.round(width)) + height = int(np.round(height)) + out_shape = (height, width) + + # Calculate raster transform + transform = rio.transform.from_bounds(left, bottom, right, top, width, height) + + # otherwise use directly raster's dimensions + else: + out_shape = raster.shape # type: ignore + transform = raster.transform # type: ignore + + # Set default burn value, index from 1 to len(self.ds) + if in_value is None: + in_value = gdf.index + 1 + + # Rasterize geometry + if isinstance(in_value, Iterable): + if len(in_value) != len(vect.geometry): # type: ignore + raise ValueError( + "in_value must have same length as self.ds.geometry, currently {} != {}".format( + len(in_value), len(vect.geometry) # type: ignore + ) + ) + + out_geom = ((geom, value) for geom, value in zip(vect.geometry, in_value)) + + mask = features.rasterize(shapes=out_geom, fill=out_value, out_shape=out_shape, transform=transform) + + elif isinstance(in_value, int | float | np.floating | np.integer): + mask = features.rasterize( + shapes=vect.geometry, fill=out_value, out_shape=out_shape, transform=transform, default_value=in_value + ) + else: + raise ValueError("in_value must be a single number or an iterable with same length as self.ds.geometry") + + # We return a mask if there is a single value to burn and this value is 1 + if isinstance(in_value, (int, np.integer, float, np.floating)) and in_value == 1: + output = gu.Mask.from_array(data=mask, transform=transform, crs=crs, nodata=None) + + # Otherwise we return a Raster if there are several values to burn + else: + output = gu.Raster.from_array(data=mask, transform=transform, crs=crs, nodata=None) + + return output + + +def _create_mask( + gdf: gpd.GeoDataFrame, + raster: gu.Raster | None = None, + crs: CRS | None = None, + xres: float | None = None, + yres: float | None = None, + bounds: tuple[float, float, float, float] | None = None, + buffer: int | float | np.integer[Any] | np.floating[Any] = 0, + as_array: bool = False, +) -> tuple[NDArrayBool, affine.Affine, CRS]: + + # If no raster given, use provided dimensions + if raster is None: + # At minimum, xres must be set + if xres is None: + raise ValueError("At least raster or xres must be set.") + if yres is None: + yres = xres + + # By default, use self's CRS and bounds + if crs is None: + crs = gdf.crs + if bounds is None: + bounds_shp = True + bounds = gdf.total_bounds + else: + bounds_shp = False + + # Calculate raster shape + left, bottom, right, top = bounds + height = abs((right - left) / xres) + width = abs((top - bottom) / yres) + + if width % 1 != 0 or height % 1 != 0: + # Only warn if the bounds were provided, and not derived from the vector + if not bounds_shp: + warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.") + + width = int(np.round(width)) + height = int(np.round(height)) + out_shape = (height, width) + + # Calculate raster transform + transform = rio.transform.from_bounds(left, bottom, right, top, width, height) + + # otherwise use directly raster's dimensions + elif isinstance(raster, gu.Raster): + out_shape = raster.shape + transform = raster.transform + crs = raster.crs + bounds = raster.bounds + else: + raise TypeError("Raster must be a geoutils.Raster or None.") + + # Copying GeoPandas dataframe before applying changes + gdf = gdf.copy() + + # Crop vector geometries to avoid issues when reprojecting + left, bottom, right, top = bounds # type: ignore + x1, y1, x2, y2 = warp.transform_bounds(crs, gdf.crs, left, bottom, right, top) + gdf = gdf.cx[x1:x2, y1:y2] + + # Reproject vector into raster CRS + gdf = gdf.to_crs(crs) + + # Create a buffer around the features + if not isinstance(buffer, (int, float, np.number)): + raise TypeError(f"Buffer must be a number, currently set to {type(buffer).__name__}.") + if buffer != 0: + gdf.geometry = [geom.buffer(buffer) for geom in gdf.geometry] + elif buffer == 0: + pass + + # Rasterize geometry + mask = features.rasterize( + shapes=gdf.geometry, fill=0, out_shape=out_shape, transform=transform, default_value=1, dtype="uint8" + ).astype("bool") + + # Force output mask to be of same dimension as input raster + if raster is not None: + mask = mask.reshape((raster.count, raster.height, raster.width)) # type: ignore + + return mask, transform, crs diff --git a/geoutils/misc.py b/geoutils/misc.py index ed2b981f..51d0ceae 100644 --- a/geoutils/misc.py +++ b/geoutils/misc.py @@ -1,4 +1,5 @@ -"""Miscellaneous functions, mainly for testing.""" +"""Miscellaneous functions for maintenance, documentation and testing.""" + from __future__ import annotations import copy @@ -13,7 +14,6 @@ except ImportError: _has_yaml = False -import rasterio as rio from packaging.version import Version import geoutils @@ -138,22 +138,6 @@ def decorator(decorated: Callable) -> Callable: # type: ignore return decorator -def resampling_method_from_str(method_str: str) -> rio.enums.Resampling: - """Get a rasterio resampling method from a string representation, e.g. "cubic_spline".""" - # Try to match the string version of the resampling method with a rio Resampling enum name - for method in rio.enums.Resampling: - if method.name == method_str: - resampling_method = method - break - # If no match was found, raise an error. - else: - raise ValueError( - f"'{method_str}' is not a valid rasterio.enums.Resampling method. " - f"Valid methods: {[method.name for method in rio.enums.Resampling]}" - ) - return resampling_method - - def diff_environment_yml( fn_env: str | dict[str, Any], fn_devenv: str | dict[str, Any], print_dep: str = "both", input_dict: bool = False ) -> None: diff --git a/geoutils/pointcloud/__init__.py b/geoutils/pointcloud/__init__.py new file mode 100644 index 00000000..16393346 --- /dev/null +++ b/geoutils/pointcloud/__init__.py @@ -0,0 +1 @@ +from geoutils.pointcloud.pointcloud import * # noqa diff --git a/geoutils/pointcloud/pointcloud.py b/geoutils/pointcloud/pointcloud.py new file mode 100644 index 00000000..cf2f6f3c --- /dev/null +++ b/geoutils/pointcloud/pointcloud.py @@ -0,0 +1 @@ +"""Module for future PointCloud class.""" diff --git a/geoutils/projtools.py b/geoutils/projtools.py index 1f234a5d..c9cf73c8 100644 --- a/geoutils/projtools.py +++ b/geoutils/projtools.py @@ -1,12 +1,12 @@ """ -projtools provides a set of tools for dealing with different coordinate reference systems (CRS) and bounds. +Functionalities to manipulate metadata in different coordinate reference systems (CRS). """ + from __future__ import annotations import warnings -from collections import abc from math import ceil, floor -from typing import Literal +from typing import Iterable, Literal import geopandas as gpd import numpy as np @@ -154,7 +154,7 @@ def bounds2poly( def merge_bounds( - bounds_list: abc.Iterable[ + bounds_list: Iterable[ list[float] | tuple[float] | rio.coords.BoundingBox | rio.io.DatasetReader | gpd.GeoDataFrame ], resolution: float | None = None, @@ -270,7 +270,7 @@ def reproject_points( def reproject_to_latlon( points: list[list[float]] | list[float] | NDArrayNum, in_crs: CRS, round_: int = 8 -) -> tuple[list[float], list[float]]: +) -> NDArrayNum: """ Reproject a set of point from in_crs to lat/lon. @@ -281,13 +281,12 @@ def reproject_to_latlon( :returns: Reprojected points, of same shape as points. """ proj_points = reproject_points(points, in_crs, crs_4326) - proj_points = np.round(proj_points, round_) - return proj_points + return np.round(proj_points, round_) def reproject_from_latlon( points: list[list[float]] | tuple[list[float], list[float]] | NDArrayNum, out_crs: CRS, round_: int = 2 -) -> tuple[list[float], list[float]]: +) -> NDArrayNum: """ Reproject a set of point from lat/lon to out_crs. @@ -298,8 +297,7 @@ def reproject_from_latlon( :returns: Reprojected points, of same shape as points. """ proj_points = reproject_points(points, crs_4326, out_crs) - proj_points = np.round(proj_points, round_) - return proj_points + return np.round(proj_points, round_) def reproject_shape(inshape: BaseGeometry, in_crs: CRS, out_crs: CRS) -> BaseGeometry: diff --git a/geoutils/raster/__init__.py b/geoutils/raster/__init__.py index 5402aa46..35f981b7 100644 --- a/geoutils/raster/__init__.py +++ b/geoutils/raster/__init__.py @@ -1,5 +1,7 @@ from geoutils.raster.raster import Raster, RasterType, Mask, handled_array_funcs # noqa isort:skip from geoutils.raster.array import * # noqa +from geoutils.raster.georeferencing import * # noqa +from geoutils.raster.geotransformations import * # noqa from geoutils.raster.multiraster import * # noqa from geoutils.raster.sampling import * # noqa from geoutils.raster.satimg import SatelliteImage # noqa diff --git a/geoutils/raster/array.py b/geoutils/raster/array.py index e1df06c5..ed32b909 100644 --- a/geoutils/raster/array.py +++ b/geoutils/raster/array.py @@ -10,7 +10,7 @@ from geoutils._typing import MArrayNum, NDArrayBool, NDArrayNum -def get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayBool: +def _get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayBool: """ Return the mask of invalid values, whether array is a ndarray with NaNs or a np.ma.masked_array. @@ -22,7 +22,7 @@ def get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayB return mask.squeeze() -def get_array_and_mask( +def _get_array_and_mask( array: NDArrayNum | MArrayNum, check_shape: bool = True, copy: bool = True ) -> tuple[NDArrayNum, NDArrayBool]: """ @@ -59,19 +59,19 @@ def get_array_and_mask( array_data = np.array(array).squeeze() if copy else np.asarray(array).squeeze() # Get the mask of invalid pixels and set nans if it is occupied. - invalid_mask = get_mask_from_array(array) + invalid_mask = _get_mask_from_array(array) if np.any(invalid_mask): array_data[invalid_mask] = np.nan return array_data, invalid_mask -def get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, ...]: +def _get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, ...]: """ Return (rowmin, rowmax, colmin, colmax), the first/last row/column of array with valid pixels """ if not array.dtype == "bool": - valid_mask = ~get_mask_from_array(array) + valid_mask = ~_get_mask_from_array(array) else: # Not sure why Mypy is not recognizing that the type of the array can only be bool here valid_mask = array # type: ignore @@ -80,7 +80,7 @@ def get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, return rows_nonzero[0], rows_nonzero[-1], cols_nonzero[0], cols_nonzero[-1] -def get_xy_rotated(raster: gu.Raster, along_track_angle: float) -> tuple[NDArrayNum, NDArrayNum]: +def _get_xy_rotated(raster: gu.Raster, along_track_angle: float) -> tuple[NDArrayNum, NDArrayNum]: """ Rotate x, y axes of image to get along- and cross-track distances. :param raster: Raster to get x,y positions from. diff --git a/geoutils/raster/delayed.py b/geoutils/raster/delayed.py index af668bd1..5f08ddc4 100644 --- a/geoutils/raster/delayed.py +++ b/geoutils/raster/delayed.py @@ -157,7 +157,7 @@ def delayed_subsample( flattened chunk). For this reason, a loaded array will also have a different subsample due to its direct 1D indexing (per valid value for the entire flattened array). - To ensure you re-use a similar subsample of valid values for several arrays, call this function with + To ensure you reuse a similar subsample of valid values for several arrays, call this function with return_indices=True, then sample your arrays out-of-memory with .vindex[indices[0], indices[1]] (this assumes that these arrays have valid values at the same locations). @@ -736,7 +736,7 @@ def delayed_reproject( src_block_ids = np.array(src_geotiling.get_block_locations()) meta_params = [ ( - _combined_blocks_shape_transform(sub_block_ids=src_block_ids[sbid], src_geogrid=src_geogrid) + _combined_blocks_shape_transform(sub_block_ids=src_block_ids[sbid], src_geogrid=src_geogrid) # type: ignore if len(sbid) > 0 else ({}, []) ) diff --git a/geoutils/raster/georeferencing.py b/geoutils/raster/georeferencing.py index da02d927..c37d30b2 100644 --- a/geoutils/raster/georeferencing.py +++ b/geoutils/raster/georeferencing.py @@ -1,12 +1,17 @@ +""" +Functions for manipulating georeferencing of the raster objects. +""" + from __future__ import annotations +import warnings from typing import Iterable, Literal import numpy as np import rasterio as rio from geoutils._config import config -from geoutils._typing import ArrayLike, NDArrayNum +from geoutils._typing import ArrayLike, DTypeLike, NDArrayNum def _ij2xy( @@ -170,3 +175,94 @@ def _bounds(transform: rio.transform.Affine, shape: tuple[int, int]) -> rio.coor """See description of Raster.bounds.""" return rio.coords.BoundingBox(*rio.transform.array_bounds(height=shape[0], width=shape[1], transform=transform)) + + +def _cast_pixel_interpretation( + area_or_point1: Literal["Area", "Point"] | None, area_or_point2: Literal["Area", "Point"] | None +) -> Literal["Area", "Point"] | None: + """ + Cast two pixel interpretations and warn if not castable. + + Casts to: + - "Area" if both are "Area", + - "Point" if both are "Point", + - None if any of the interpretation is None, or + - None if one is "Area" and the other "Point" (and raises a warning). + """ + + # If one is None, cast to None + if area_or_point1 is None or area_or_point2 is None: + area_or_point_out = None + # If both are equal and not None + elif area_or_point1 == area_or_point2: + area_or_point_out = area_or_point1 + else: + area_or_point_out = None + msg = ( + 'One raster has a pixel interpretation "Area" and the other "Point". To silence this warning, ' + "either correct the pixel interpretation of one raster, or deactivate " + 'warnings of pixel interpretation with geoutils.config["warn_area_or_point"]=False.' + ) + if config["warn_area_or_point"]: + warnings.warn(message=msg, category=UserWarning) + + return area_or_point_out + + +# Function to set the default nodata values for any given dtype +# Similar to GDAL for int types, but without absurdly long nodata values for floats. +# For unsigned types, the maximum value is chosen (with a max of 99999). +# For signed types, the minimum value is chosen (with a min of -99999). +def _default_nodata(dtype: DTypeLike) -> int: + """ + Set the default nodata value for any given dtype, when this is not provided. + """ + default_nodata_lookup = { + "uint8": 255, + "int8": -128, + "uint16": 65535, + "int16": -32768, + "uint32": 99999, + "int32": -99999, + "float16": -99999, + "float32": -99999, + "float64": -99999, + "float128": -99999, + "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility + } + # Check argument dtype is as expected + if not isinstance(dtype, (str, np.dtype, type)): + raise TypeError(f"dtype {dtype} not understood.") + + # Convert numpy types to string + if isinstance(dtype, type): + dtype = np.dtype(dtype).name + + # Convert np.dtype to string + if isinstance(dtype, np.dtype): + dtype = dtype.name + + if dtype in default_nodata_lookup.keys(): + return default_nodata_lookup[dtype] + else: + raise NotImplementedError(f"No default nodata value set for dtype {dtype}.") + + +def _cast_nodata(out_dtype: DTypeLike, nodata: int | float | None) -> int | float | None: + """ + Cast nodata value for output data type to default nodata if incompatible. + + :param out_dtype: Dtype of output array. + :param nodata: Nodata value. + + :return: Cast nodata value. + """ + + if out_dtype == bool: + nodata = None + if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, out_dtype): + nodata = _default_nodata(out_dtype) + else: + nodata = nodata + + return nodata diff --git a/geoutils/raster/geotransformations.py b/geoutils/raster/geotransformations.py new file mode 100644 index 00000000..e43e51a5 --- /dev/null +++ b/geoutils/raster/geotransformations.py @@ -0,0 +1,546 @@ +""" +Functionalities for geotransformations of raster objects. +""" + +from __future__ import annotations + +import os +import warnings +from typing import Any, Iterable, Literal + +import affine +import numpy as np +import rasterio as rio +from rasterio.crs import CRS +from rasterio.enums import Resampling + +import geoutils as gu +from geoutils._typing import DTypeLike, MArrayNum +from geoutils.raster.georeferencing import ( + _cast_pixel_interpretation, + _default_nodata, + _res, +) + + +def _resampling_method_from_str(method_str: str) -> rio.enums.Resampling: + """Get a rasterio resampling method from a string representation, e.g. "cubic_spline".""" + # Try to match the string version of the resampling method with a rio Resampling enum name + for method in rio.enums.Resampling: + if method.name == method_str: + resampling_method = method + break + # If no match was found, raise an error. + else: + raise ValueError( + f"'{method_str}' is not a valid rasterio.enums.Resampling method. " + f"Valid methods: {[method.name for method in rio.enums.Resampling]}" + ) + return resampling_method + + +############## +# 1/ REPROJECT +############## + + +def _user_input_reproject( + source_raster: gu.Raster, + ref: gu.Raster, + crs: CRS | str | int | None, + res: float | Iterable[float] | None, + bounds: dict[str, float] | rio.coords.BoundingBox | None, + nodata: int | float | None, + dtype: DTypeLike | None, + force_source_nodata: int | float | None, +) -> tuple[ + CRS, DTypeLike, int | float | None, int | float | None, float | Iterable[float] | None, rio.coords.BoundingBox +]: + """Check all user inputs of reproject.""" + + # --- Sanity checks on inputs and defaults -- # + # Check that either ref or crs is provided + if ref is not None and crs is not None: + raise ValueError("Either of `ref` or `crs` must be set. Not both.") + # If none are provided, simply preserve the CRS + elif ref is None and crs is None: + crs = source_raster.crs + + # Set output dtype + if dtype is None: + # Warning: this will not work for multiple bands with different dtypes + dtype = source_raster.dtype + + # --- Set source nodata if provided -- # + if force_source_nodata is None: + src_nodata = source_raster.nodata + else: + src_nodata = force_source_nodata + # Raise warning if a different nodata value exists for this raster than the forced one (not None) + if source_raster.nodata is not None: + warnings.warn( + "Forcing source nodata value of {} despite an existing nodata value of {} in the raster. " + "To silence this warning, use self.set_nodata() before reprojection instead of forcing.".format( + force_source_nodata, source_raster.nodata + ) + ) + + # --- Set destination nodata if provided -- # + # This is needed in areas not covered by the input data. + # If None, will use GeoUtils' default, as rasterio's default is unknown, hence cannot be handled properly. + if nodata is None: + nodata = source_raster.nodata + if nodata is None: + nodata = _default_nodata(dtype) + # If nodata is already being used, raise a warning. + # TODO: for uint8, if all values are used, apply rio.warp to mask to identify invalid values + if not source_raster.is_loaded: + warnings.warn( + f"For reprojection, nodata must be set. Setting default nodata to {nodata}. You may " + f"set a different nodata with `nodata`." + ) + + elif nodata in source_raster.data: + warnings.warn( + f"For reprojection, nodata must be set. Default chosen value {nodata} exists in " + f"self.data. This may have unexpected consequences. Consider setting a different nodata with " + f"self.set_nodata()." + ) + + # Create a BoundingBox if required + if bounds is not None: + if not isinstance(bounds, rio.coords.BoundingBox): + bounds = rio.coords.BoundingBox( + bounds["left"], + bounds["bottom"], + bounds["right"], + bounds["top"], + ) + + # Case a raster is provided as reference + if ref is not None: + # Check that ref type is either str, Raster or rasterio data set + # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45 + if isinstance(ref, gu.Raster): + # Raise a warning if the reference is a raster that has a different pixel interpretation + _cast_pixel_interpretation(source_raster.area_or_point, ref.area_or_point) + ds_ref = ref + elif isinstance(ref, str): + if not os.path.exists(ref): + raise ValueError("Reference raster does not exist.") + ds_ref = gu.Raster(ref, load_data=False) + else: + raise TypeError("Type of ref not understood, must be path to file (str), Raster.") + + # Read reprojecting params from ref raster + crs = ds_ref.crs + res = ds_ref.res + bounds = ds_ref.bounds + else: + # Determine target CRS + crs = CRS.from_user_input(crs) + res = res + + return crs, dtype, src_nodata, nodata, res, bounds + + +def _get_target_georeferenced_grid( + raster: gu.Raster, + crs: CRS | str | int | None = None, + grid_size: tuple[int, int] | None = None, + res: int | float | Iterable[float] | None = None, + bounds: dict[str, float] | rio.coords.BoundingBox | None = None, +) -> tuple[affine.Affine, tuple[int, int]]: + """ + Derive the georeferencing parameters (transform, size) for the target grid. + + Needed to reproject a raster to a different grid (resolution or size, bounds) and/or + coordinate reference system (CRS). + + If requested bounds are incompatible with output resolution (would result in non integer number of pixels), + the bounds are rounded up to the nearest compatible value. + + :param crs: Destination coordinate reference system as a string or EPSG. Defaults to this raster's CRS. + :param grid_size: Destination size as (ncol, nrow). Mutually exclusive with ``res``. + :param res: Destination resolution (pixel size) in units of destination CRS. Single value or (xres, yres). + Mutually exclusive with ``size``. + :param bounds: Destination bounds as a Rasterio bounding box, or a dictionary containing left, bottom, + right, top bounds in the destination CRS. + + :returns: Calculated transform and size. + """ + # --- Input sanity checks --- # + # check size and res are not both set + if (grid_size is not None) and (res is not None): + raise ValueError("size and res both specified. Specify only one.") + + # Set CRS to input CRS by default + if crs is None: + crs = raster.crs + + if grid_size is None: + width, height = None, None + else: + width, height = grid_size + + # Convert bounds to BoundingBox + if bounds is not None: + if not isinstance(bounds, rio.coords.BoundingBox): + bounds = rio.coords.BoundingBox( + bounds["left"], + bounds["bottom"], + bounds["right"], + bounds["top"], + ) + + # If all georeferences are the same as input, skip calculating because of issue in + # rio.warp.calculate_default_transform (https://github.com/rasterio/rasterio/issues/3010) + if ( + (crs == raster.crs) + & ((grid_size is None) | ((height == raster.shape[0]) & (width == raster.shape[1]))) + & ((res is None) | np.all(np.array(res) == raster.res)) + & ((bounds is None) | (bounds == raster.bounds)) + ): + return raster.transform, raster.shape[::-1] + + # --- First, calculate default transform ignoring any change in bounds --- # + tmp_transform, tmp_width, tmp_height = rio.warp.calculate_default_transform( + raster.crs, + crs, + raster.width, + raster.height, + left=raster.bounds.left, + right=raster.bounds.right, + top=raster.bounds.top, + bottom=raster.bounds.bottom, + resolution=res, + dst_width=width, + dst_height=height, + ) + + # If no bounds specified, can directly use output of rio.warp.calculate_default_transform + if bounds is None: + dst_size = (tmp_width, tmp_height) + dst_transform = tmp_transform + + # --- Second, crop to requested bounds --- # + else: + # If output size and bounds are known, can use rio.transform.from_bounds to get dst_transform + if grid_size is not None: + dst_transform = rio.transform.from_bounds( + bounds.left, bounds.bottom, bounds.right, bounds.top, grid_size[0], grid_size[1] + ) + dst_size = grid_size + + else: + # Otherwise, need to calculate the new output size, rounded to nearest integer + ref_win = rio.windows.from_bounds(*list(bounds), tmp_transform).round_lengths() + dst_size = (int(ref_win.width), int(ref_win.height)) + + if res is not None: + # In this case, we force output resolution + if isinstance(res, tuple): + dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res[0], res[1]) + else: + dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res, res) + else: + # In this case, we force output bounds + dst_transform = rio.transform.from_bounds( + bounds.left, bounds.bottom, bounds.right, bounds.top, dst_size[0], dst_size[1] + ) + + return dst_transform, dst_size + + +def _get_reproj_params( + source_raster: gu.Raster, + crs: CRS, + res: float | Iterable[float] | None, + grid_size: tuple[int, int] | None, + bounds: dict[str, float] | rio.coords.BoundingBox | None, + dtype: DTypeLike, + src_nodata: int | float | None, + nodata: int | float | None, + resampling: Resampling | str, +) -> dict[str, Any]: + """Get all reprojection parameters.""" + + # First, set basic reprojection options + reproj_kwargs = { + "src_transform": source_raster.transform, + "src_crs": source_raster.crs, + "resampling": resampling if isinstance(resampling, Resampling) else _resampling_method_from_str(resampling), + "src_nodata": src_nodata, + "dst_nodata": nodata, + } + + # Second, determine target transform and grid size + transform, grid_size = _get_target_georeferenced_grid( + source_raster, crs=crs, grid_size=grid_size, res=res, bounds=bounds + ) + + # Finally, update reprojection options accordingly + reproj_kwargs.update({"dst_transform": transform}) + data = np.ones((source_raster.count, grid_size[1], grid_size[0]), dtype=dtype) + reproj_kwargs.update({"destination": data}) + reproj_kwargs.update({"dst_crs": crs}) + + return reproj_kwargs + + +def _is_reproj_needed(src_shape: tuple[int, int], reproj_kwargs: dict[str, Any]) -> bool: + """Check if reprojection is actually needed based on transformation parameters.""" + + src_transform = reproj_kwargs["src_transform"] + transform = reproj_kwargs["dst_transform"] + src_crs = reproj_kwargs["src_crs"] + crs = reproj_kwargs["dst_crs"] + grid_size = reproj_kwargs["destination"].shape[1:][::-1] + src_res = _res(src_transform) + res = _res(transform) + + # Caution, grid_size is (width, height) while shape is (height, width) + return all( + [ + (transform == src_transform) or (transform is None), + (crs == src_crs) or (crs is None), + (grid_size == src_shape[::-1]) or (grid_size is None), + np.all(np.array(res) == src_res) or (res is None), + ] + ) + + +def _reproject( + source_raster: gu.Raster, + ref: gu.Raster, + crs: CRS | str | int | None = None, + res: float | Iterable[float] | None = None, + grid_size: tuple[int, int] | None = None, + bounds: dict[str, float] | rio.coords.BoundingBox | None = None, + nodata: int | float | None = None, + dtype: DTypeLike | None = None, + resampling: Resampling | str = Resampling.bilinear, + force_source_nodata: int | float | None = None, + silent: bool = False, + n_threads: int = 0, + memory_limit: int = 64, +) -> tuple[bool, MArrayNum | None, affine.Affine | None, CRS | None, int | float | None]: + """ + Reproject raster. See Raster.reproject() for details. + """ + + # 1/ Process user input + crs, dtype, src_nodata, nodata, res, bounds = _user_input_reproject( + source_raster=source_raster, + ref=ref, + crs=crs, + bounds=bounds, + res=res, + nodata=nodata, + dtype=dtype, + force_source_nodata=force_source_nodata, + ) + + # 2/ Derive georeferencing parameters for reprojection (transform, grid size) + reproj_kwargs = _get_reproj_params( + source_raster=source_raster, + crs=crs, + res=res, + grid_size=grid_size, + bounds=bounds, + dtype=dtype, + src_nodata=src_nodata, + nodata=nodata, + resampling=resampling, + ) + + # 3/ Check if reprojection is needed, otherwise return source raster with warning + if _is_reproj_needed(src_shape=source_raster.shape, reproj_kwargs=reproj_kwargs): + if (nodata == src_nodata) or (nodata is None): + if not silent: + warnings.warn("Output projection, bounds and grid size are identical -> returning self (not a copy!)") + return True, None, None, None, None + + elif nodata is not None: + if not silent: + warnings.warn( + "Only nodata is different, consider using the 'set_nodata()' method instead'\ + ' -> returning self (not a copy!)" + ) + return True, None, None, None, None + + # 4/ Perform reprojection + + # --- Set the performance keywords --- # + if n_threads == 0: + # Default to cpu count minus one. If the cpu count is undefined, num_threads will be 1 + cpu_count = os.cpu_count() or 2 + num_threads = cpu_count - 1 + else: + num_threads = n_threads + reproj_kwargs.update({"num_threads": num_threads, "warp_mem_limit": memory_limit}) + + # --- Run the reprojection of data --- # + # If data is loaded, reproject the numpy array directly + if source_raster.is_loaded: + # All masked values must be set to a nodata value for rasterio's reproject to work properly + # TODO: another option is to apply rio.warp.reproject to the mask to identify invalid pixels + if src_nodata is None and np.sum(source_raster.data.mask) > 0: + raise ValueError( + "No nodata set, set one for the raster with self.set_nodata() or use a temporary one " + "with `force_source_nodata`." + ) + + # Mask not taken into account by rasterio, need to fill with src_nodata + data, transformed = rio.warp.reproject(source_raster.data.filled(src_nodata), **reproj_kwargs) + + # If not, uses the dataset instead + else: + data = [] # type: ignore + for k in range(source_raster.count): + with rio.open(source_raster.filename) as ds: + band = rio.band(ds, k + 1) + band, transformed = rio.warp.reproject(band, **reproj_kwargs) + data.append(band.squeeze()) + + data = np.array(data) + + # Enforce output type + data = np.ma.masked_array(data.astype(dtype), fill_value=nodata) + + if nodata is not None: + data.mask = data == nodata + + # Check for funny business. + if reproj_kwargs["dst_transform"] is not None: + assert reproj_kwargs["dst_transform"] == transformed + + return False, data, transformed, crs, nodata + + +######### +# 2/ CROP +######### + + +def _crop( + source_raster: gu.Raster, + crop_geom: gu.Raster | gu.Vector | list[float] | tuple[float, ...], + mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", +) -> tuple[MArrayNum, affine.Affine]: + """Crop raster. See details in Raster.crop().""" + + assert mode in [ + "match_extent", + "match_pixel", + ], "mode must be one of 'match_pixel', 'match_extent'" + + if isinstance(crop_geom, (gu.Raster, gu.Vector)): + # For another Vector or Raster, we reproject the bounding box in the same CRS as self + xmin, ymin, xmax, ymax = crop_geom.get_bounds_projected(out_crs=source_raster.crs) + if isinstance(crop_geom, gu.Raster): + # Raise a warning if the reference is a raster that has a different pixel interpretation + _cast_pixel_interpretation(source_raster.area_or_point, crop_geom.area_or_point) + elif isinstance(crop_geom, (list, tuple)): + xmin, ymin, xmax, ymax = crop_geom + else: + raise ValueError("cropGeom must be a Raster, Vector, or list of coordinates.") + + if mode == "match_pixel": + # Finding the intersection of requested bounds and original bounds, cropped to image shape + ref_win = rio.windows.from_bounds(xmin, ymin, xmax, ymax, transform=source_raster.transform) + self_win = rio.windows.from_bounds(*source_raster.bounds, transform=source_raster.transform).crop( + *source_raster.shape + ) + final_window = ref_win.intersection(self_win).round_lengths().round_offsets() + + # Update bounds and transform accordingly + new_xmin, new_ymin, new_xmax, new_ymax = rio.windows.bounds(final_window, transform=source_raster.transform) + tfm = rio.transform.from_origin(new_xmin, new_ymax, *source_raster.res) + + if source_raster.is_loaded: + # In case data is loaded on disk, can extract directly from np array + (rowmin, rowmax), (colmin, colmax) = final_window.toranges() + + if source_raster.count == 1: + crop_img = source_raster.data[rowmin:rowmax, colmin:colmax] + else: + crop_img = source_raster.data[:, rowmin:rowmax, colmin:colmax] + else: + + assert source_raster._disk_shape is not None # This should not be the case, sanity check to make mypy happy + + # If data was not loaded, and self's transform was updated (e.g. due to downsampling) need to + # get the Window corresponding to on disk data + ref_win_disk = rio.windows.from_bounds( + new_xmin, new_ymin, new_xmax, new_ymax, transform=source_raster._disk_transform + ) + self_win_disk = rio.windows.from_bounds( + *source_raster.bounds, transform=source_raster._disk_transform + ).crop(*source_raster._disk_shape[1:]) + final_window_disk = ref_win_disk.intersection(self_win_disk).round_lengths().round_offsets() + + # Round up to downsampling size, to match __init__ + final_window_disk = rio.windows.round_window_to_full_blocks( + final_window_disk, ((source_raster._downsample, source_raster._downsample),) + ) + + # Load data for "on_disk" window but out_shape matching in-memory transform -> enforce downsampling + # AD (24/04/24): Note that the same issue as #447 occurs here when final_window_disk extends beyond + # self's bounds. Using option `boundless=True` solves the issue but causes other tests to fail + # This should be fixed with #447 and previous line would be obsolete. + with rio.open(source_raster.filename) as raster: + crop_img = raster.read( + indexes=source_raster._bands, + masked=source_raster._masked, + window=final_window_disk, + out_shape=(final_window.height, final_window.width), + ) + + # Squeeze first axis for single-band + if crop_img.ndim == 3 and crop_img.shape[0] == 1: + crop_img = crop_img.squeeze(axis=0) + + else: + bbox = rio.coords.BoundingBox(left=xmin, bottom=ymin, right=xmax, top=ymax) + out_rst = source_raster.reproject(bounds=bbox) # should we instead raise an issue and point to reproject? + crop_img = out_rst.data + tfm = out_rst.transform + + return crop_img, tfm + + +############## +# 3/ TRANSLATE +############## + + +def _translate( + transform: affine.Affine, + xoff: float, + yoff: float, + distance_unit: Literal["georeferenced", "pixel"] = "georeferenced", +) -> affine.Affine: + """ + Translate geotransform horizontally, either in pixels or georeferenced units. + + :param transform: Input geotransform. + :param xoff: Translation x offset. + :param yoff: Translation y offset. + :param distance_unit: Distance unit, either 'georeferenced' (default) or 'pixel'. + + :return: Translated transform. + """ + + if distance_unit not in ["georeferenced", "pixel"]: + raise ValueError("Argument 'distance_unit' should be either 'pixel' or 'georeferenced'.") + + # Get transform + dx, b, xmin, d, dy, ymax = list(transform)[:6] + + # Convert pixel offsets to georeferenced units + if distance_unit == "pixel": + xoff *= dx + yoff *= abs(dy) # dy is negative + + return rio.transform.Affine(dx, b, xmin + xoff, d, dy, ymax + yoff) diff --git a/geoutils/raster/multiraster.py b/geoutils/raster/multiraster.py index 838f332d..73851f9b 100644 --- a/geoutils/raster/multiraster.py +++ b/geoutils/raster/multiraster.py @@ -1,4 +1,5 @@ """Multiple rasters tools.""" + from __future__ import annotations import warnings @@ -11,9 +12,9 @@ import geoutils as gu from geoutils._typing import NDArrayNum -from geoutils.misc import resampling_method_from_str -from geoutils.raster import Raster, RasterType, get_array_and_mask -from geoutils.raster.raster import _default_nodata +from geoutils.raster.array import _get_array_and_mask +from geoutils.raster.geotransformations import _resampling_method_from_str +from geoutils.raster.raster import RasterType, _default_nodata def load_multiple_rasters( @@ -141,7 +142,7 @@ def stack_rasters( """ # Check resampling method if isinstance(resampling_method, str): - resampling_method = resampling_method_from_str(resampling_method) + resampling_method = _resampling_method_from_str(resampling_method) # Check raster has a single band if any(r.count > 1 for r in rasters): @@ -193,7 +194,7 @@ def stack_rasters( # Optionally calculate difference if diff: diff_to_ref = (reference_raster.data - reprojected_raster.data).squeeze() - diff_to_ref, _ = get_array_and_mask(diff_to_ref) + diff_to_ref, _ = _get_array_and_mask(diff_to_ref) data.append(diff_to_ref) else: # img_data, _ = get_array_and_mask(reprojected_raster.data.squeeze()) @@ -228,7 +229,7 @@ def stack_rasters( def merge_rasters( rasters: list[RasterType], - reference: int | Raster = 0, + reference: int | RasterType = 0, merge_algorithm: Callable | list[Callable] = np.nanmean, # type: ignore resampling_method: str | rio.enums.Resampling = "bilinear", use_ref_bounds: bool = False, diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py index 4c4fe501..69dfcd1b 100644 --- a/geoutils/raster/raster.py +++ b/geoutils/raster/raster.py @@ -1,16 +1,16 @@ """ -geoutils.raster provides a toolset for working with raster data. +Module for Raster class. """ + from __future__ import annotations import math -import os import pathlib import warnings from collections import abc from contextlib import ExitStack from math import floor -from typing import IO, Any, Callable, Iterable, TypeVar, overload +from typing import IO, Any, Callable, TypeVar, overload import affine import geopandas as gpd @@ -18,7 +18,6 @@ import matplotlib.pyplot as plt import numpy as np import rasterio as rio -import rasterio.warp import rasterio.windows import rioxarray import xarray as xr @@ -27,11 +26,8 @@ from packaging.version import Version from rasterio.crs import CRS from rasterio.enums import Resampling -from rasterio.features import shapes from rasterio.plot import show as rshow -from scipy.ndimage import distance_transform_edt -import geoutils.vector as gv from geoutils._config import config from geoutils._typing import ( ArrayLike, @@ -42,6 +38,13 @@ NDArrayNum, Number, ) +from geoutils.interface.distance import _proximity_from_vector_or_raster +from geoutils.interface.interpolate import _interp_points +from geoutils.interface.raster_point import ( + _raster_to_pointcloud, + _regular_pointcloud_to_raster, +) +from geoutils.interface.raster_vector import _polygonize from geoutils.misc import deprecate from geoutils.projtools import ( _get_bounds_projected, @@ -49,18 +52,20 @@ _get_utm_ups_crs, reproject_from_latlon, ) -from geoutils.raster.array import get_mask_from_array from geoutils.raster.georeferencing import ( _bounds, + _cast_nodata, + _cast_pixel_interpretation, _coords, + _default_nodata, _ij2xy, _outside_image, _res, _xy2ij, ) -from geoutils.raster.interpolate import _interp_points +from geoutils.raster.geotransformations import _crop, _reproject, _translate from geoutils.raster.sampling import subsample_array -from geoutils.vector import Vector +from geoutils.vector.vector import Vector # If python38 or above, Literal is builtin. Otherwise, use typing_extensions try: @@ -137,46 +142,6 @@ ] handled_array_funcs = _HANDLED_FUNCTIONS_1NIN + _HANDLED_FUNCTIONS_2NIN - -# Function to set the default nodata values for any given dtype -# Similar to GDAL for int types, but without absurdly long nodata values for floats. -# For unsigned types, the maximum value is chosen (with a max of 99999). -# For signed types, the minimum value is chosen (with a min of -99999). -def _default_nodata(dtype: DTypeLike) -> int: - """ - Set the default nodata value for any given dtype, when this is not provided. - """ - default_nodata_lookup = { - "uint8": 255, - "int8": -128, - "uint16": 65535, - "int16": -32768, - "uint32": 99999, - "int32": -99999, - "float16": -99999, - "float32": -99999, - "float64": -99999, - "float128": -99999, - "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility - } - # Check argument dtype is as expected - if not isinstance(dtype, (str, np.dtype, type)): - raise TypeError(f"dtype {dtype} not understood.") - - # Convert numpy types to string - if isinstance(dtype, type): - dtype = np.dtype(dtype).name - - # Convert np.dtype to string - if isinstance(dtype, np.dtype): - dtype = dtype.name - - if dtype in default_nodata_lookup.keys(): - return default_nodata_lookup[dtype] - else: - raise NotImplementedError(f"No default nodata value set for dtype {dtype}.") - - # Set default attributes to be kept from rasterio's DatasetReader _default_rio_attrs = [ "bounds", @@ -260,195 +225,6 @@ def _load_rio( return data -def _get_reproject_params( - raster: RasterType, - crs: CRS | str | int | None = None, - grid_size: tuple[int, int] | None = None, - res: int | float | abc.Iterable[float] | None = None, - bounds: dict[str, float] | rio.coords.BoundingBox | None = None, -) -> tuple[Affine, tuple[int, int]]: - """ - Returns the parameters (transform, size) needed to reproject a raster to a different grid (resolution or - size, bounds) and/or coordinate reference system (CRS). - - If requested bounds are incompatible with output resolution (would result in non integer number of pixels), - the bounds are rounded up to the nearest compatible value. - - :param crs: Destination coordinate reference system as a string or EPSG. Defaults to this raster's CRS. - :param grid_size: Destination size as (ncol, nrow). Mutually exclusive with ``res``. - :param res: Destination resolution (pixel size) in units of destination CRS. Single value or (xres, yres). - Mutually exclusive with ``size``. - :param bounds: Destination bounds as a Rasterio bounding box, or a dictionary containing left, bottom, - right, top bounds in the destination CRS. - - :returns: Calculated transform and size. - """ - # --- Input sanity checks --- # - # check size and res are not both set - if (grid_size is not None) and (res is not None): - raise ValueError("size and res both specified. Specify only one.") - - # Set CRS to input CRS by default - if crs is None: - crs = raster.crs - - if grid_size is None: - width, height = None, None - else: - width, height = grid_size - - # Convert bounds to BoundingBox - if bounds is not None: - if not isinstance(bounds, rio.coords.BoundingBox): - bounds = rio.coords.BoundingBox( - bounds["left"], - bounds["bottom"], - bounds["right"], - bounds["top"], - ) - - # If all georeferences are the same as input, skip calculating because of issue in - # rio.warp.calculate_default_transform (https://github.com/rasterio/rasterio/issues/3010) - if ( - (crs == raster.crs) - & ((grid_size is None) | ((height == raster.shape[0]) & (width == raster.shape[1]))) - & ((res is None) | np.all(np.array(res) == raster.res)) - & ((bounds is None) | (bounds == raster.bounds)) - ): - return raster.transform, raster.shape[::-1] - - # --- First, calculate default transform ignoring any change in bounds --- # - tmp_transform, tmp_width, tmp_height = rio.warp.calculate_default_transform( - raster.crs, - crs, - raster.width, - raster.height, - left=raster.bounds.left, - right=raster.bounds.right, - top=raster.bounds.top, - bottom=raster.bounds.bottom, - resolution=res, - dst_width=width, - dst_height=height, - ) - - # If no bounds specified, can directly use output of rio.warp.calculate_default_transform - if bounds is None: - dst_size = (tmp_width, tmp_height) - dst_transform = tmp_transform - - # --- Second, crop to requested bounds --- # - else: - # If output size and bounds are known, can use rio.transform.from_bounds to get dst_transform - if grid_size is not None: - dst_transform = rio.transform.from_bounds( - bounds.left, bounds.bottom, bounds.right, bounds.top, grid_size[0], grid_size[1] - ) - dst_size = grid_size - - else: - # Otherwise, need to calculate the new output size, rounded to nearest integer - ref_win = rio.windows.from_bounds(*list(bounds), tmp_transform).round_lengths() - dst_size = (int(ref_win.width), int(ref_win.height)) - - if res is not None: - # In this case, we force output resolution - if isinstance(res, tuple): - dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res[0], res[1]) - else: - dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res, res) - else: - # In this case, we force output bounds - dst_transform = rio.transform.from_bounds( - bounds.left, bounds.bottom, bounds.right, bounds.top, dst_size[0], dst_size[1] - ) - - return dst_transform, dst_size - - -def _cast_pixel_interpretation( - area_or_point1: Literal["Area", "Point"] | None, area_or_point2: Literal["Area", "Point"] | None -) -> Literal["Area", "Point"] | None: - """ - Cast two pixel interpretations and warn if not castable. - - Casts to: - - "Area" if both are "Area", - - "Point" if both are "Point", - - None if any of the interpretation is None, or - - None if one is "Area" and the other "Point" (and raises a warning). - """ - - # If one is None, cast to None - if area_or_point1 is None or area_or_point2 is None: - area_or_point_out = None - # If both are equal and not None - elif area_or_point1 == area_or_point2: - area_or_point_out = area_or_point1 - else: - area_or_point_out = None - msg = ( - 'One raster has a pixel interpretation "Area" and the other "Point". To silence this warning, ' - "either correct the pixel interpretation of one raster, or deactivate " - 'warnings of pixel interpretation with geoutils.config["warn_area_or_point"]=False.' - ) - if config["warn_area_or_point"]: - warnings.warn(message=msg, category=UserWarning) - - return area_or_point_out - - -def _cast_nodata(out_dtype: DTypeLike, nodata: int | float | None) -> int | float | None: - """ - Cast nodata value for output data type to default nodata if incompatible. - - :param out_dtype: Dtype of output array. - :param nodata: Nodata value. - - :return: Cast nodata value. - """ - - if out_dtype == bool: - nodata = None - if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, out_dtype): - nodata = _default_nodata(out_dtype) - else: - nodata = nodata - - return nodata - - -def _shift_transform( - transform: affine.Affine, - xoff: float, - yoff: float, - distance_unit: Literal["georeferenced", "pixel"] = "georeferenced", -) -> affine.Affine: - """ - Shift geotransform horizontally, either in pixels or georeferenced units. - - :param transform: Input geotransform. - :param xoff: Translation x offset. - :param yoff: Translation y offset. - :param distance_unit: Distance unit, either 'georeferenced' (default) or 'pixel'. - - :return: Shifted transform. - """ - - if distance_unit not in ["georeferenced", "pixel"]: - raise ValueError("Argument 'distance_unit' should be either 'pixel' or 'georeferenced'.") - - # Get transform - dx, b, xmin, d, dy, ymax = list(transform)[:6] - - # Convert pixel offsets to georeferenced units - if distance_unit == "pixel": - xoff *= dx - yoff *= abs(dy) # dy is negative - - return rio.transform.Affine(dx, b, xmin + xoff, d, dy, ymax + yoff) - - def _cast_numeric_array_raster( raster: RasterType, other: RasterType | NDArrayNum | Number, operation_name: str ) -> tuple[MArrayNum, MArrayNum | NDArrayNum | Number, float | int | None, Literal["Area", "Point"] | None]: @@ -567,12 +343,9 @@ class Raster: def __init__( self, - filename_or_dataset: str - | pathlib.Path - | RasterType - | rio.io.DatasetReader - | rio.io.MemoryFile - | dict[str, Any], + filename_or_dataset: ( + str | pathlib.Path | RasterType | rio.io.DatasetReader | rio.io.MemoryFile | dict[str, Any] + ), bands: int | list[int] | None = None, load_data: bool = False, downsample: Number = 1, @@ -1658,18 +1431,15 @@ def __ge__(self: RasterType, other: RasterType | NDArrayNum | Number) -> RasterT @overload def astype( self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: Literal[False] = False - ) -> RasterType: - ... + ) -> RasterType: ... @overload - def astype(self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: Literal[True]) -> None: - ... + def astype(self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: Literal[True]) -> None: ... @overload def astype( self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: bool = False - ) -> RasterType | None: - ... + ) -> RasterType | None: ... def astype( self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, inplace: bool = False @@ -2068,12 +1838,10 @@ def set_mask(self, mask: NDArrayBool | Mask) -> None: self.data[mask_arr > 0] = np.ma.masked @overload - def info(self, stats: bool = False, *, verbose: Literal[True] = ...) -> None: - ... + def info(self, stats: bool = False, *, verbose: Literal[True] = ...) -> None: ... @overload - def info(self, stats: bool = False, *, verbose: Literal[False]) -> str: - ... + def info(self, stats: bool = False, *, verbose: Literal[False]) -> str: ... def info(self, stats: bool = False, verbose: bool = True) -> None | str: """ @@ -2169,12 +1937,10 @@ def georeferenced_grid_equal(self: RasterType, raster: RasterType) -> bool: return all([self.shape == raster.shape, self.transform == raster.transform, self.crs == raster.crs]) @overload - def get_nanarray(self, return_mask: Literal[False] = False) -> NDArrayNum: - ... + def get_nanarray(self, return_mask: Literal[False] = False) -> NDArrayNum: ... @overload - def get_nanarray(self, return_mask: Literal[True]) -> tuple[NDArrayNum, NDArrayBool]: - ... + def get_nanarray(self, return_mask: Literal[True]) -> tuple[NDArrayNum, NDArrayBool]: ... def get_nanarray(self, return_mask: bool = False) -> NDArrayNum | tuple[NDArrayNum, NDArrayBool]: """ @@ -2417,8 +2183,7 @@ def crop( mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[False] = False, - ) -> RasterType: - ... + ) -> RasterType: ... @overload def crop( @@ -2427,8 +2192,7 @@ def crop( mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def crop( @@ -2437,8 +2201,7 @@ def crop( mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: bool = False, - ) -> RasterType | None: - ... + ) -> RasterType | None: ... def crop( self: RasterType, @@ -2464,80 +2227,8 @@ def crop( :returns: A new raster (or None if inplace). """ - assert mode in [ - "match_extent", - "match_pixel", - ], "mode must be one of 'match_pixel', 'match_extent'" - - if isinstance(crop_geom, (Raster, Vector)): - # For another Vector or Raster, we reproject the bounding box in the same CRS as self - xmin, ymin, xmax, ymax = crop_geom.get_bounds_projected(out_crs=self.crs) - if isinstance(crop_geom, Raster): - # Raise a warning if the reference is a raster that has a different pixel interpretation - _cast_pixel_interpretation(self.area_or_point, crop_geom.area_or_point) - elif isinstance(crop_geom, (list, tuple)): - xmin, ymin, xmax, ymax = crop_geom - else: - raise ValueError("cropGeom must be a Raster, Vector, or list of coordinates.") - - if mode == "match_pixel": - # Finding the intersection of requested bounds and original bounds, cropped to image shape - ref_win = rio.windows.from_bounds(xmin, ymin, xmax, ymax, transform=self.transform) - self_win = rio.windows.from_bounds(*self.bounds, transform=self.transform).crop(*self.shape) - final_window = ref_win.intersection(self_win).round_lengths().round_offsets() - - # Update bounds and transform accordingly - new_xmin, new_ymin, new_xmax, new_ymax = rio.windows.bounds(final_window, transform=self.transform) - tfm = rio.transform.from_origin(new_xmin, new_ymax, *self.res) - - if self.is_loaded: - # In case data is loaded on disk, can extract directly from np array - (rowmin, rowmax), (colmin, colmax) = final_window.toranges() - - if self.count == 1: - crop_img = self.data[rowmin:rowmax, colmin:colmax] - else: - crop_img = self.data[:, rowmin:rowmax, colmin:colmax] - else: - assert self._disk_shape is not None # This should not be the case, sanity check to make mypy happy - - # If data was not loaded, and self's transform was updated (e.g. due to downsampling) need to - # get the Window corresponding to on disk data - ref_win_disk = rio.windows.from_bounds( - new_xmin, new_ymin, new_xmax, new_ymax, transform=self._disk_transform - ) - self_win_disk = rio.windows.from_bounds(*self.bounds, transform=self._disk_transform).crop( - *self._disk_shape[1:] - ) - final_window_disk = ref_win_disk.intersection(self_win_disk).round_lengths().round_offsets() - - # Round up to downsampling size, to match __init__ - final_window_disk = rio.windows.round_window_to_full_blocks( - final_window_disk, ((self._downsample, self._downsample),) - ) - - # Load data for "on_disk" window but out_shape matching in-memory transform -> enforce downsampling - # AD (24/04/24): Note that the same issue as #447 occurs here when final_window_disk extends beyond - # self's bounds. Using option `boundless=True` solves the issue but causes other tests to fail - # This should be fixed with #447 and previous line would be obsolete. - with rio.open(self.filename) as raster: - crop_img = raster.read( - indexes=self._bands, - masked=self._masked, - window=final_window_disk, - out_shape=(final_window.height, final_window.width), - ) - - # Squeeze first axis for single-band - if crop_img.ndim == 3 and crop_img.shape[0] == 1: - crop_img = crop_img.squeeze(axis=0) - - else: - bbox = rio.coords.BoundingBox(left=xmin, bottom=ymin, right=xmax, top=ymax) - out_rst = self.reproject(bounds=bbox) # should we instead raise an issue and point to reproject? - crop_img = out_rst.data - tfm = out_rst.transform + crop_img, tfm = _crop(source_raster=self, crop_geom=crop_geom, mode=mode) if inplace: self._data = crop_img @@ -2564,8 +2255,7 @@ def reproject( silent: bool = False, n_threads: int = 0, memory_limit: int = 64, - ) -> RasterType: - ... + ) -> RasterType: ... @overload def reproject( @@ -2584,8 +2274,7 @@ def reproject( silent: bool = False, n_threads: int = 0, memory_limit: int = 64, - ) -> None: - ... + ) -> None: ... @overload def reproject( @@ -2604,8 +2293,7 @@ def reproject( silent: bool = False, n_threads: int = 0, memory_limit: int = 64, - ) -> RasterType | None: - ... + ) -> RasterType | None: ... def reproject( self: RasterType, @@ -2632,7 +2320,6 @@ def reproject( Any resampling algorithm implemented in Rasterio can be passed as a string. - :param ref: Reference raster to match resolution, bounds and CRS. :param crs: Destination coordinate reference system as a string or EPSG. If ``ref`` not set, defaults to this raster's CRS. @@ -2656,185 +2343,42 @@ def reproject( :returns: Reprojected raster (or None if inplace). """ - # --- Sanity checks on inputs and defaults -- # - # Check that either ref or crs is provided - if ref is not None and crs is not None: - raise ValueError("Either of `ref` or `crs` must be set. Not both.") - # If none are provided, simply preserve the CRS - elif ref is None and crs is None: - crs = self.crs - - # Set output dtype - if dtype is None: - # Warning: this will not work for multiple bands with different dtypes - dtype = self.dtype - - # --- Set source nodata if provided -- # - if force_source_nodata is None: - src_nodata = self.nodata - else: - src_nodata = force_source_nodata - # Raise warning if a different nodata value exists for this raster than the forced one (not None) - if self.nodata is not None: - warnings.warn( - "Forcing source nodata value of {} despite an existing nodata value of {} in the raster. " - "To silence this warning, use self.set_nodata() before reprojection instead of forcing.".format( - force_source_nodata, self.nodata - ) - ) - # --- Set destination nodata if provided -- # - # This is needed in areas not covered by the input data. - # If None, will use GeoUtils' default, as rasterio's default is unknown, hence cannot be handled properly. - if nodata is None: - nodata = self.nodata - if nodata is None: - nodata = _default_nodata(dtype) - # If nodata is already being used, raise a warning. - # TODO: for uint8, if all values are used, apply rio.warp to mask to identify invalid values - if not self.is_loaded: - warnings.warn( - f"For reprojection, nodata must be set. Setting default nodata to {nodata}. You may " - f"set a different nodata with `nodata`." - ) - - elif nodata in self.data: - warnings.warn( - f"For reprojection, nodata must be set. Default chosen value {nodata} exists in " - f"self.data. This may have unexpected consequences. Consider setting a different nodata with " - f"self.set_nodata()." - ) - - # Create a BoundingBox if required - if bounds is not None: - if not isinstance(bounds, rio.coords.BoundingBox): - bounds = rio.coords.BoundingBox( - bounds["left"], - bounds["bottom"], - bounds["right"], - bounds["top"], - ) + # Reproject + return_copy, data, transformed, crs, nodata = _reproject( + source_raster=self, + ref=ref, + crs=crs, + res=res, + grid_size=grid_size, + bounds=bounds, + nodata=nodata, + dtype=dtype, + resampling=resampling, + force_source_nodata=force_source_nodata, + silent=silent, + n_threads=n_threads, + memory_limit=memory_limit, + ) - from geoutils.misc import resampling_method_from_str - - # --- Basic reprojection options, needed in all cases. --- # - reproj_kwargs = { - "src_transform": self.transform, - "src_crs": self.crs, - "resampling": resampling if isinstance(resampling, Resampling) else resampling_method_from_str(resampling), - "src_nodata": src_nodata, - "dst_nodata": nodata, - } - - # --- Calculate output georeferences (transform, grid size) - - # Case a raster is provided as reference - if ref is not None: - # Check that ref type is either str, Raster or rasterio data set - # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45 - if isinstance(ref, Raster): - # Raise a warning if the reference is a raster that has a different pixel interpretation - _cast_pixel_interpretation(self.area_or_point, ref.area_or_point) - ds_ref = ref - elif isinstance(ref, str): - if not os.path.exists(ref): - raise ValueError("Reference raster does not exist.") - ds_ref = Raster(ref, load_data=False) + # If return copy is True (target georeferenced grid was the same as input) + if return_copy: + if inplace: + return None else: - raise TypeError("Type of ref not understood, must be path to file (str), Raster.") - - # Read reprojecting params from ref raster - crs = ds_ref.crs - res = ds_ref.res - bounds = ds_ref.bounds - else: - # Determine target CRS - crs = CRS.from_user_input(crs) - - # Determine target transform and grid size - transform, grid_size = _get_reproject_params(self, crs=crs, grid_size=grid_size, res=res, bounds=bounds) - - # Update reprojection options accordingly - reproj_kwargs.update({"dst_transform": transform}) - data = np.ones((self.count, grid_size[1], grid_size[0]), dtype=dtype) - reproj_kwargs.update({"destination": data}) - reproj_kwargs.update({"dst_crs": crs}) - - # --- Check that reprojection is actually needed --- # - # Caution, grid_size is (width, height) while shape is (height, width) - if all( - [ - (transform == self.transform) or (transform is None), - (crs == self.crs) or (crs is None), - (grid_size == self.shape[::-1]) or (grid_size is None), - np.all(np.array(res) == self.res) or (res is None), - ] - ): - if (nodata == self.nodata) or (nodata is None): - if not silent: - warnings.warn( - "Output projection, bounds and grid size are identical -> returning self (not a copy!)" - ) - return self - - elif nodata is not None: - if not silent: - warnings.warn( - "Only nodata is different, consider using the 'set_nodata()' method instead'\ - ' -> returning self (not a copy!)" - ) return self - # --- Set the performance keywords --- # - if n_threads == 0: - # Default to cpu count minus one. If the cpu count is undefined, num_threads will be 1 - cpu_count = os.cpu_count() or 2 - num_threads = cpu_count - 1 - else: - num_threads = n_threads - reproj_kwargs.update({"num_threads": num_threads, "warp_mem_limit": memory_limit}) - - # --- Run the reprojection of data --- # - # If data is loaded, reproject the numpy array directly - if self.is_loaded: - # All masked values must be set to a nodata value for rasterio's reproject to work properly - # TODO: another option is to apply rio.warp.reproject to the mask to identify invalid pixels - if src_nodata is None and np.sum(self.data.mask) > 0: - raise ValueError( - "No nodata set, set one for the raster with self.set_nodata() or use a temporary one " - "with `force_source_nodata`." - ) - - # Mask not taken into account by rasterio, need to fill with src_nodata - data, transformed = rio.warp.reproject(self.data.filled(src_nodata), **reproj_kwargs) - - # If not, uses the dataset instead - else: - data = [] # type: ignore - for k in range(self.count): - with rio.open(self.filename) as ds: - band = rio.band(ds, k + 1) - band, transformed = rio.warp.reproject(band, **reproj_kwargs) - data.append(band.squeeze()) - - data = np.array(data) - - # Enforce output type - data = np.ma.masked_array(data.astype(dtype), fill_value=nodata) - - if nodata is not None: - data.mask = data == nodata - - # Check for funny business. - if transform is not None: - assert transform == transformed + # To make MyPy happy without overload for _reproject (as it might re-structured soon anyway) + assert data is not None + assert transformed is not None + assert crs is not None # Write results to a new Raster. if inplace: # Order is important here, because calling self.data will use nodata to mask the array properly self._crs = crs self._nodata = nodata - self._transform = transform + self._transform = transformed # A little trick to force the right shape of data in, then update the mask properly through the data setter self._data = data.squeeze() self.data = data @@ -2850,8 +2394,7 @@ def translate( distance_unit: Literal["georeferenced"] | Literal["pixel"] = "georeferenced", *, inplace: Literal[False] = False, - ) -> RasterType: - ... + ) -> RasterType: ... @overload def translate( @@ -2861,8 +2404,7 @@ def translate( distance_unit: Literal["georeferenced"] | Literal["pixel"] = "georeferenced", *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def translate( @@ -2872,8 +2414,7 @@ def translate( distance_unit: Literal["georeferenced"] | Literal["pixel"] = "georeferenced", *, inplace: bool = False, - ) -> RasterType | None: - ... + ) -> RasterType | None: ... def translate( self: RasterType, @@ -2883,27 +2424,27 @@ def translate( inplace: bool = False, ) -> RasterType | None: """ - Shift a raster by a (x,y) offset. + Translate a raster by a (x,y) offset. - The shifting only updates the geotransform (no resampling is performed). + The translation only updates the geotransform (no resampling is performed). :param xoff: Translation x offset. :param yoff: Translation y offset. :param distance_unit: Distance unit, either 'georeferenced' (default) or 'pixel'. :param inplace: Whether to modify the raster in-place. - :returns: Shifted raster (or None if inplace). + :returns: Translated raster (or None if inplace). """ - shifted_transform = _shift_transform(self.transform, xoff=xoff, yoff=yoff, distance_unit=distance_unit) + translated_transform = _translate(self.transform, xoff=xoff, yoff=yoff, distance_unit=distance_unit) if inplace: - # Overwrite transform by shifted transform - self.transform = shifted_transform + # Overwrite transform by translated transform + self.transform = translated_transform return None else: raster_copy = self.copy() - raster_copy.transform = shifted_transform + raster_copy.transform = translated_transform return raster_copy def save( @@ -3256,10 +2797,10 @@ def plot( # Set colorbar min/max values (needed for ScalarMappable) if vmin is None: - vmin = np.nanmin(data) + vmin = float(np.nanmin(data)) if vmax is None: - vmax = np.nanmax(data) + vmax = float(np.nanmax(data)) # Make sure they are numbers, to avoid mpl error try: @@ -3725,8 +3266,7 @@ def to_pointcloud( as_array: Literal[False] = False, random_state: int | np.random.Generator | None = None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul", - ) -> NDArrayNum: - ... + ) -> NDArrayNum: ... @overload def to_pointcloud( @@ -3741,8 +3281,7 @@ def to_pointcloud( as_array: Literal[True], random_state: int | np.random.Generator | None = None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul", - ) -> Vector: - ... + ) -> Vector: ... @overload def to_pointcloud( @@ -3757,8 +3296,7 @@ def to_pointcloud( as_array: bool = False, random_state: int | np.random.Generator | None = None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul", - ) -> NDArrayNum | Vector: - ... + ) -> NDArrayNum | Vector: ... def to_pointcloud( self, @@ -3818,146 +3356,19 @@ def to_pointcloud( :returns: A point cloud, or array of the shape (N, 2 + count) where N is the sample count. """ - # Input checks - - # Main data column checks - if not isinstance(data_column_name, str): - raise ValueError("Data column name must be a string.") - if not (isinstance(data_band, int) and data_band >= 1 and data_band <= self.count): - raise ValueError( - f"Data band number must be an integer between 1 and the total number of bands ({self.count})." - ) - - # Rename data column if a different band is selected but the name is still default - if data_band != 1 and data_column_name == "b1": - data_column_name = "b" + str(data_band) - - # Auxiliary data columns checks - if auxiliary_column_names is not None and auxiliary_data_bands is None: - raise ValueError("Passing auxiliary column names requires passing auxiliary data band numbers as well.") - if auxiliary_data_bands is not None: - if not ( - isinstance(auxiliary_data_bands, Iterable) and all(isinstance(b, int) for b in auxiliary_data_bands) - ): - raise ValueError("Auxiliary data band number must be an iterable containing only integers.") - if any((1 > b or self.count < b) for b in auxiliary_data_bands): - raise ValueError( - f"Auxiliary data band numbers must be between 1 and the total number of bands ({self.count})." - ) - if data_band in auxiliary_data_bands: - raise ValueError( - f"Main data band {data_band} should not be listed in auxiliary data bands {auxiliary_data_bands}." - ) - - # Ensure auxiliary column name is defined if auxiliary data bands is not None - if auxiliary_column_names is not None: - if not ( - isinstance(auxiliary_column_names, Iterable) - and all(isinstance(b, str) for b in auxiliary_column_names) - ): - raise ValueError("Auxiliary column names must be an iterable containing only strings.") - if not len(auxiliary_column_names) == len(auxiliary_data_bands): - raise ValueError( - f"Length of auxiliary column name and data band numbers should be the same, " - f"found {len(auxiliary_column_names)} and {len(auxiliary_data_bands)} respectively." - ) - - else: - auxiliary_column_names = [f"b{i}" for i in auxiliary_data_bands] - - # Define bigger list with all bands and names - all_bands = [data_band] + auxiliary_data_bands - all_column_names = [data_column_name] + auxiliary_column_names - - else: - all_bands = [data_band] - all_column_names = [data_column_name] - - # If subsample is the entire array, load it to optimize speed - if subsample == 1 and not self.is_loaded: - self.load(bands=all_bands) - - # Band indexes in the array are band number minus one - all_indexes = [b - 1 for b in all_bands] - - # We do 2D subsampling on the data band only, regardless of valid masks on other bands - if skip_nodata: - if self.is_loaded: - if self.count == 1: - self_mask = get_mask_from_array( - self.data - ) # This is to avoid the case where the mask is just "False" - else: - self_mask = get_mask_from_array( - self.data[data_band - 1, :, :] - ) # This is to avoid the case where the mask is just "False" - valid_mask = ~self_mask - - # Load only mask of valid data from disk if array not loaded - else: - valid_mask = ~self._load_only_mask(bands=data_band) - # If we are not skipping nodata values, valid mask is everywhere - else: - if self.count == 1: - valid_mask = np.ones(self.data.shape, dtype=bool) - else: - valid_mask = np.ones(self.data[0, :].shape, dtype=bool) - - # Get subsample on valid mask - # Build a low memory boolean masked array with invalid values masked to pass to subsampling - ma_valid = np.ma.masked_array(data=np.ones(np.shape(valid_mask), dtype=bool), mask=~valid_mask) - # Take a subsample within the valid values - indices = subsample_array(array=ma_valid, subsample=subsample, random_state=random_state, return_indices=True) - - # If the Raster is loaded, pick from the data while ignoring the mask - if self.is_loaded: - if self.count == 1: - pixel_data = self.data[indices[0], indices[1]] - else: - # TODO: Combining both indexes at once could reduce memory usage? - pixel_data = self.data[all_indexes, :][:, indices[0], indices[1]] - - # Otherwise use rasterio.sample to load only requested pixels - else: - # Extract the coordinates at subsampled pixels with valid data - # To extract data, we always use "upper left" which rasterio interprets as the exact raster coordinates - # Further below we redefine output coordinates based on point interpretation - x_coords, y_coords = (np.array(a) for a in self.ij2xy(indices[0], indices[1], force_offset="ul")) - - with rio.open(self.filename) as raster: - # Rasterio uses indexes (starts at 1) - pixel_data = np.array(list(raster.sample(zip(x_coords, y_coords), indexes=all_bands))).T - - # At this point there should not be any nodata anymore, so we can transform everything to normal array - if np.ma.isMaskedArray(pixel_data): - pixel_data = pixel_data.data - - # If nodata values were not skipped, convert them to NaNs and change data type - if skip_nodata is False: - pixel_data = pixel_data.astype("float32") - pixel_data[pixel_data == self.nodata] = np.nan - - # Now we force the coordinates we define for the point cloud, according to pixel interpretation - x_coords_2, y_coords_2 = ( - np.array(a) for a in self.ij2xy(indices[0], indices[1], force_offset=force_pixel_offset) + return _raster_to_pointcloud( + source_raster=self, + data_column_name=data_column_name, + data_band=data_band, + auxiliary_data_bands=auxiliary_data_bands, + auxiliary_column_names=auxiliary_column_names, + subsample=subsample, + skip_nodata=skip_nodata, + as_array=as_array, + random_state=random_state, + force_pixel_offset=force_pixel_offset, ) - if not as_array: - points = Vector( - gpd.GeoDataFrame( - pixel_data.T, - columns=all_column_names, - geometry=gpd.points_from_xy(x_coords_2, y_coords_2), - crs=self.crs, - ) - ) - return points - else: - # Merge the coordinates and pixel data an array of N x K - # This has the downside of converting all the data to the same data type - points_arr = np.vstack((x_coords_2.reshape(1, -1), y_coords_2.reshape(1, -1), pixel_data)).T - return points_arr - @classmethod def from_pointcloud_regular( cls: type[RasterType], @@ -3986,62 +3397,17 @@ def from_pointcloud_regular( :param area_or_point: Whether to set the pixel interpretation of the raster to "Area" or "Point". """ - # Get transform and shape from input - if grid_coords is not None: - - # Input checks - if ( - not isinstance(grid_coords, tuple) - or not (isinstance(grid_coords[0], np.ndarray) and grid_coords[0].ndim == 1) - or not (isinstance(grid_coords[1], np.ndarray) and grid_coords[1].ndim == 1) - ): - raise TypeError("Input grid coordinates must be 1D arrays.") - - diff_x = np.diff(grid_coords[0]) - diff_y = np.diff(grid_coords[1]) - - if not all(diff_x == diff_x[0]) and all(diff_y == diff_y[0]): - raise ValueError("Grid coordinates must be regular (equally spaced, independently along X and Y).") - - # Build transform from min X, max Y and step in both - out_transform = rio.transform.from_origin( - np.min(grid_coords[0]), np.max(grid_coords[1]), diff_x[0], diff_y[0] - ) - # Y is first axis, X is second axis - out_shape = (len(grid_coords[1]), len(grid_coords[0])) - - elif transform is not None and shape is not None: - - out_transform = transform - out_shape = shape - - else: - raise ValueError("Either grid coordinates or both geotransform and shape must be provided.") - - # Create raster from inputs, with placeholder data for now - dtype = pointcloud[data_column_name].dtype - out_nodata = nodata if not None else _default_nodata(dtype) - arr = np.ones(out_shape, dtype=dtype) - raster_arr = cls.from_array( - data=arr, transform=out_transform, crs=pointcloud.crs, nodata=out_nodata, area_or_point=area_or_point - ) - - # Get indexes of point cloud coordinates in the raster, forcing no shift - i, j = raster_arr.xy2ij( - x=pointcloud.geometry.x.values, y=pointcloud.geometry.y.values, shift_area_or_point=False + arr, transform, crs, nodata, aop = _regular_pointcloud_to_raster( + pointcloud=pointcloud, + grid_coords=grid_coords, + transform=transform, + shape=shape, + nodata=nodata, + data_column_name=data_column_name, + area_or_point=area_or_point, ) - # If coordinates are not integer type (forced in xy2ij), then some points are not falling on exact coordinates - if not np.issubdtype(i.dtype, np.integer) or not np.issubdtype(i.dtype, np.integer): - raise ValueError("Some point cloud coordinates differ from the grid coordinates.") - - # Set values - mask = np.ones(np.shape(arr), dtype=bool) - mask[i, j] = False - arr[i, j] = pointcloud[data_column_name].values - raster_arr.data = np.ma.masked_array(data=arr, mask=mask) - - return raster_arr + return cls.from_array(data=arr, transform=transform, crs=crs, nodata=nodata, area_or_point=area_or_point) def polygonize( self, @@ -4059,60 +3425,7 @@ def polygonize( :returns: Vector containing the polygonized geometries associated to target values. """ - # Mask a unique value set by a number - if isinstance(target_values, (int, float, np.integer, np.floating)): - if np.sum(self.data == target_values) == 0: - raise ValueError(f"no pixel with in_value {target_values}") - - bool_msk = np.array(self.data == target_values).astype(np.uint8) - - # Mask values within boundaries set by a tuple - elif isinstance(target_values, tuple): - if np.sum((self.data > target_values[0]) & (self.data < target_values[1])) == 0: - raise ValueError(f"no pixel with in_value between {target_values[0]} and {target_values[1]}") - - bool_msk = ((self.data > target_values[0]) & (self.data < target_values[1])).astype(np.uint8) - - # Mask specific values set by a sequence - elif isinstance(target_values, list) or isinstance(target_values, np.ndarray): - if np.sum(np.isin(self.data, np.array(target_values))) == 0: - raise ValueError("no pixel with in_value " + ", ".join(map("{}".format, target_values))) - - bool_msk = np.isin(self.data, np.array(target_values)).astype("uint8") - - # Mask all valid values - elif target_values == "all": - # Using getmaskarray is necessary in case .data.mask is nomask (False) - bool_msk = (~np.ma.getmaskarray(self.data)).astype("uint8") - - else: - raise ValueError("in_value must be a number, a tuple or a sequence") - - # GeoPandas.from_features() only supports certain dtypes, we find the best common dtype to optimize memory usage - # TODO: this should be a function independent of polygonize, reused in several places - gpd_dtypes = ["uint8", "uint16", "int16", "int32", "float32"] - list_common_dtype_index = [] - for gpd_type in gpd_dtypes: - polygonize_dtype = np.promote_types(gpd_type, self.dtype) - if str(polygonize_dtype) in gpd_dtypes: - list_common_dtype_index.append(gpd_dtypes.index(gpd_type)) - if len(list_common_dtype_index) == 0: - final_dtype = "float32" - else: - final_dtype_index = min(list_common_dtype_index) - final_dtype = gpd_dtypes[final_dtype_index] - - results = ( - {"properties": {"raster_value": v}, "geometry": s} - for i, (s, v) in enumerate(shapes(self.data.astype(final_dtype), mask=bool_msk, transform=self.transform)) - ) - - gdf = gpd.GeoDataFrame.from_features(list(results)) - gdf.insert(0, data_column_name, range(0, 0 + len(gdf))) - gdf = gdf.set_geometry(col="geometry") - gdf = gdf.set_crs(self.crs) - - return gv.Vector(gdf) + return _polygonize(source_raster=self, target_values=target_values, data_column_name=data_column_name) def proximity( self, @@ -4143,7 +3456,7 @@ def proximity( :return: Proximity distances raster. """ - proximity = proximity_from_vector_or_raster( + proximity = _proximity_from_vector_or_raster( raster=self, vector=vector, target_values=target_values, @@ -4169,8 +3482,7 @@ def subsample( return_indices: Literal[False] = False, *, random_state: int | np.random.Generator | None = None, - ) -> NDArrayNum: - ... + ) -> NDArrayNum: ... @overload def subsample( @@ -4179,8 +3491,7 @@ def subsample( return_indices: Literal[True], *, random_state: int | np.random.Generator | None = None, - ) -> tuple[NDArrayNum, ...]: - ... + ) -> tuple[NDArrayNum, ...]: ... @overload def subsample( @@ -4188,8 +3499,7 @@ def subsample( subsample: float | int, return_indices: bool = False, random_state: int | np.random.Generator | None = None, - ) -> NDArrayNum | tuple[NDArrayNum, ...]: - ... + ) -> NDArrayNum | tuple[NDArrayNum, ...]: ... def subsample( self, @@ -4329,8 +3639,7 @@ def reproject( silent: bool = False, n_threads: int = 0, memory_limit: int = 64, - ) -> Mask: - ... + ) -> Mask: ... @overload def reproject( @@ -4349,8 +3658,7 @@ def reproject( silent: bool = False, n_threads: int = 0, memory_limit: int = 64, - ) -> None: - ... + ) -> None: ... @overload def reproject( @@ -4369,8 +3677,7 @@ def reproject( silent: bool = False, n_threads: int = 0, memory_limit: int = 64, - ) -> Mask | None: - ... + ) -> Mask | None: ... def reproject( self: Mask, @@ -4440,8 +3747,7 @@ def crop( mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[False] = False, - ) -> Mask: - ... + ) -> Mask: ... @overload def crop( @@ -4450,8 +3756,7 @@ def crop( mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def crop( @@ -4460,8 +3765,7 @@ def crop( mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: bool = False, - ) -> Mask | None: - ... + ) -> Mask | None: ... def crop( self: Mask, @@ -4590,83 +3894,3 @@ def __invert__(self: Mask) -> Mask: """Bitwise inversion of a mask.""" return self.copy(~self.data) - - -# ----------------------------------------- -# Additional stand-alone utility functions -# ----------------------------------------- - - -def proximity_from_vector_or_raster( - raster: Raster, - vector: Vector | None = None, - target_values: list[float] | None = None, - geometry_type: str = "boundary", - in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both", - distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced", -) -> NDArrayNum: - """ - (This function is defined here as mostly raster-based, but used in a class method for both Raster and Vector) - Proximity to a Raster's target values if no Vector is provided, otherwise to a Vector's geometry type - rasterized on the Raster. - - :param raster: Raster to burn the proximity grid on. - :param vector: Vector for which to compute the proximity to geometry, - if not provided computed on the Raster target pixels. - :param target_values: (Only with a Raster) List of target values to use for the proximity, - defaults to all non-zero values. - :param geometry_type: (Only with a Vector) Type of geometry to use for the proximity, defaults to 'boundary'. - :param in_or_out: (Only with a Vector) Compute proximity only 'in' or 'out'-side the geometry, or 'both'. - :param distance_unit: Distance unit, either 'georeferenced' or 'pixel'. - """ - - # 1/ First, if there is a vector input, we rasterize the geometry type - # (works with .boundary that is a LineString (.exterior exists, but is a LinearRing) - if vector is not None: - - # TODO: Only when using centroid... Maybe we should leave this operation to the user anyway? - warnings.filterwarnings("ignore", message="Geometry is in a geographic CRS.*") - - # We create a geodataframe with the geometry type - boundary_shp = gpd.GeoDataFrame(geometry=vector.ds.__getattr__(geometry_type), crs=vector.crs) - # We mask the pixels that make up the geometry type - mask_boundary = Vector(boundary_shp).create_mask(raster, as_array=True) - - else: - # We mask target pixels - if target_values is not None: - mask_boundary = np.logical_or.reduce([raster.get_nanarray() == target_val for target_val in target_values]) - # Otherwise, all non-zero values are considered targets - else: - mask_boundary = raster.get_nanarray().astype(bool) - - # 2/ Now, we compute the distance matrix relative to the masked geometry type - if distance_unit.lower() == "georeferenced": - sampling: int | tuple[float | int, float | int] = raster.res - elif distance_unit.lower() == "pixel": - sampling = 1 - else: - raise ValueError('Distance unit must be either "georeferenced" or "pixel".') - - # If not all pixels are targets, then we compute the distance - non_targets = np.count_nonzero(mask_boundary) - if non_targets > 0: - proximity = distance_transform_edt(~mask_boundary, sampling=sampling) - # Otherwise, pass an array full of nodata - else: - proximity = np.ones(np.shape(mask_boundary)) * np.nan - - # 3/ If there was a vector input, apply the in_and_out argument to optionally mask inside/outside - if vector is not None: - if in_or_out == "both": - pass - elif in_or_out in ["in", "out"]: - mask_polygon = Vector(vector.ds).create_mask(raster, as_array=True) - if in_or_out == "in": - proximity[~mask_polygon] = 0 - else: - proximity[mask_polygon] = 0 - else: - raise ValueError('The type of proximity must be one of "in", "out" or "both".') - - return proximity diff --git a/geoutils/raster/sampling.py b/geoutils/raster/sampling.py index 34aa557d..a4559403 100644 --- a/geoutils/raster/sampling.py +++ b/geoutils/raster/sampling.py @@ -7,7 +7,7 @@ import numpy as np from geoutils._typing import MArrayNum, NDArrayNum -from geoutils.raster.array import get_mask_from_array +from geoutils.raster.array import _get_mask_from_array @overload @@ -17,8 +17,7 @@ def subsample_array( return_indices: Literal[False] = False, *, random_state: int | np.random.Generator | None = None, -) -> NDArrayNum: - ... +) -> NDArrayNum: ... @overload @@ -28,8 +27,7 @@ def subsample_array( return_indices: Literal[True], *, random_state: int | np.random.Generator | None = None, -) -> tuple[NDArrayNum, ...]: - ... +) -> tuple[NDArrayNum, ...]: ... @overload @@ -38,8 +36,7 @@ def subsample_array( subsample: float | int, return_indices: bool = False, random_state: int | np.random.Generator | None = None, -) -> NDArrayNum | tuple[NDArrayNum, ...]: - ... +) -> NDArrayNum | tuple[NDArrayNum, ...]: ... def subsample_array( @@ -63,7 +60,7 @@ def subsample_array( rng = np.random.default_rng(random_state) # Remove invalid values and flatten array - mask = get_mask_from_array(array) # -> need to remove .squeeze in get_mask + mask = _get_mask_from_array(array) # -> need to remove .squeeze in get_mask valids = np.argwhere(~mask.flatten()).squeeze() # Get number of points to extract @@ -184,7 +181,7 @@ def subdivide_array(shape: tuple[int, ...], count: int) -> NDArrayNum: # Generate a small grid of indices, with the same unique count as 'count' rect = _get_closest_rectangle(count) - small_indices = np.pad(np.arange(count), np.prod(rect) - count, mode="edge")[: np.prod(rect)].reshape(rect) + small_indices = np.pad(np.arange(count), np.prod(rect) - count, mode="edge")[: int(np.prod(rect))].reshape(rect) # Upscale the grid to fit the output shape using nearest neighbour scaling. indices = skimage.transform.resize(small_indices, shape, order=0, preserve_range=True).astype(int) diff --git a/geoutils/raster/satimg.py b/geoutils/raster/satimg.py index 181ad5a0..79871dbb 100644 --- a/geoutils/raster/satimg.py +++ b/geoutils/raster/satimg.py @@ -1,6 +1,7 @@ """ geoutils.satimg provides a toolset for working with satellite data. """ + from __future__ import annotations import datetime as dt @@ -101,7 +102,7 @@ def parse_metadata_from_fn(fname: str) -> list[Any]: else: attrs = (None,) * 6 - # if the form is only XX.ext (only the first versions of SRTM had a naming that... bad (simplfied?)) + # if the form is only XX.ext (only the first versions of SRTM had a naming that... bad (simplified?)) elif os.path.splitext(os.path.basename(fname))[1] == ".hgt": attrs = ( "SRTM", diff --git a/geoutils/vector/__init__.py b/geoutils/vector/__init__.py new file mode 100644 index 00000000..dd198200 --- /dev/null +++ b/geoutils/vector/__init__.py @@ -0,0 +1,3 @@ +from geoutils.vector.geometric import * # noqa +from geoutils.vector.geotransformations import * # noqa +from geoutils.vector.vector import Vector, VectorType # noqa diff --git a/geoutils/vector/geometric.py b/geoutils/vector/geometric.py new file mode 100644 index 00000000..a30404a6 --- /dev/null +++ b/geoutils/vector/geometric.py @@ -0,0 +1,212 @@ +"""Functionalities to manipulate vector geometries.""" + +from __future__ import annotations + +import warnings + +import geopandas as gpd +import matplotlib.pyplot as plt +import numpy as np +import shapely +from scipy.spatial import Voronoi +from shapely.geometry.polygon import Polygon + +import geoutils as gu +from geoutils.projtools import _get_utm_ups_crs, bounds2poly + + +def _buffer_metric(gdf: gpd.GeoDataFrame, buffer_size: float) -> gu.Vector: + """ + Metric buffering. See Vector.buffer_metric() for details. + """ + + crs_utm_ups = _get_utm_ups_crs(df=gdf) + + # Reproject the shapefile in the local UTM + ds_utm = gdf.to_crs(crs=crs_utm_ups) + + # Buffer the shapefile + ds_buffered = ds_utm.buffer(distance=buffer_size) + del ds_utm + + # Revert-project the shapefile in the original CRS + ds_buffered_origproj = ds_buffered.to_crs(crs=gdf.crs) + del ds_buffered + + # Return a Vector object of the buffered GeoDataFrame + # TODO: Clarify what is conserved in the GeoSeries and what to pass the GeoDataFrame to not lose any attributes + vector_buffered = gu.Vector(gpd.GeoDataFrame(geometry=ds_buffered_origproj.geometry, crs=gdf.crs)) + + return vector_buffered + + +def _buffer_without_overlap( + gdf: gpd.GeoDataFrame, buffer_size: int | float, metric: bool = True, plot: bool = False +) -> gu.Vector: + """See Vector.buffer_without_overlap() for details.""" + + # Project in local UTM if metric is True + if metric: + crs_utm_ups = _get_utm_ups_crs(df=gdf) + gdf = gdf.to_crs(crs=crs_utm_ups) + else: + gdf = gdf + + # Dissolve all geometries into one + merged = gdf.dissolve() + + # Add buffer around geometries + merged_buffer = merged.buffer(buffer_size) + + # Extract only the buffered area + buffer = merged_buffer.difference(merged) + + # Crop Voronoi polygons to bound geometry and add missing polygons + bound_poly = bounds2poly(gdf) + bound_poly = bound_poly.buffer(buffer_size) + voronoi_all = _generate_voronoi_with_bounds(gdf, bound_poly) + if plot: + plt.figure(figsize=(16, 4)) + ax1 = plt.subplot(141) + voronoi_all.plot(ax=ax1) + gdf.plot(fc="none", ec="k", ax=ax1) + ax1.set_title("Voronoi polygons, cropped") + + # Extract Voronoi polygons only within the buffer area + voronoi_diff = voronoi_all.intersection(buffer.geometry[0]) + + # Split all polygons, and join attributes of original geometries into the Voronoi polygons + # Splitting, i.e. explode, is needed when Voronoi generate MultiPolygons that may extend over several features. + voronoi_gdf = gpd.GeoDataFrame(geometry=voronoi_diff.explode(index_parts=True)) # requires geopandas>=0.10 + joined_voronoi = gpd.tools.sjoin(gdf, voronoi_gdf, how="right") + + # Plot results -> some polygons are duplicated + if plot: + ax2 = plt.subplot(142, sharex=ax1, sharey=ax1) + joined_voronoi.plot(ax=ax2, column="index_left", alpha=0.5, ec="k") + gdf.plot(ax=ax2, column=gdf.index.values) + ax2.set_title("Buffer with duplicated polygons") + + # Find non unique Voronoi polygons, and retain only first one + _, indexes = np.unique(joined_voronoi.index, return_index=True) + unique_voronoi = joined_voronoi.iloc[indexes] + + # Plot results -> unique polygons only + if plot: + ax3 = plt.subplot(143, sharex=ax1, sharey=ax1) + unique_voronoi.plot(ax=ax3, column="index_left", alpha=0.5, ec="k") + gdf.plot(ax=ax3, column=gdf.index.values) + ax3.set_title("Buffer with unique polygons") + + # Dissolve all polygons by original index + merged_voronoi = unique_voronoi.dissolve(by="index_left") + + # Plot + if plot: + ax4 = plt.subplot(144, sharex=ax1, sharey=ax1) + gdf.plot(ax=ax4, column=gdf.index.values) + merged_voronoi.plot(column=merged_voronoi.index.values, ax=ax4, alpha=0.5) + ax4.set_title("Final buffer") + plt.show() + + # Reverse-project to the original CRS if metric is True + if metric: + merged_voronoi = merged_voronoi.to_crs(crs=gdf.crs) + + return gu.Vector(merged_voronoi) + + +def _extract_vertices(gdf: gpd.GeoDataFrame) -> list[list[tuple[float, float]]]: + r""" + Function to extract the exterior vertices of all shapes within a gpd.GeoDataFrame. + + :param gdf: The GeoDataFrame from which the vertices need to be extracted. + + :returns: A list containing a list of (x, y) positions of the vertices. The length of the primary list is equal + to the number of geometries inside gdf, and length of each sublist is the number of vertices in the geometry. + """ + vertices = [] + # Loop on all geometries within gdf + for geom in gdf.geometry: + # Extract geometry exterior(s) + if geom.geom_type == "MultiPolygon": + exteriors = [p.exterior for p in geom.geoms] + elif geom.geom_type == "Polygon": + exteriors = [geom.exterior] + elif geom.geom_type == "LineString": + exteriors = [geom] + elif geom.geom_type == "MultiLineString": + exteriors = list(geom.geoms) + else: + raise NotImplementedError(f"Geometry type {geom.geom_type} not implemented.") + + vertices.extend([list(ext.coords) for ext in exteriors]) + + return vertices + + +def _generate_voronoi_polygons(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Generate Voronoi polygons (tessellation) from the vertices of all geometries in a GeoDataFrame. + + Uses scipy.spatial.voronoi. + + :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons. + + :returns: A GeoDataFrame containing the Voronoi polygons. + """ + # Extract the coordinates of the vertices of all geometries in gdf + vertices = _extract_vertices(gdf) + coords = np.concatenate(vertices) + + # Create the Voronoi diagram and extract ridges + vor = Voronoi(coords) + lines = [shapely.geometry.LineString(vor.vertices[line]) for line in vor.ridge_vertices if -1 not in line] + polys = list(shapely.ops.polygonize(lines)) + if len(polys) == 0: + raise ValueError("Invalid geometry, cannot generate finite Voronoi polygons") + + # Convert into GeoDataFrame + voronoi = gpd.GeoDataFrame(geometry=gpd.GeoSeries(polys)) + voronoi.crs = gdf.crs + + return voronoi + + +def _generate_voronoi_with_bounds(gdf: gpd.GeoDataFrame, bound_poly: Polygon) -> gpd.GeoDataFrame: + """ + Generate Voronoi polygons that are bounded by the polygon bound_poly, to avoid Voronoi polygons that extend \ +far beyond the original geometry. + + Voronoi polygons are created using generate_voronoi_polygons, cropped to the extent of bound_poly and gaps \ +are filled with new polygons. + + :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons. + :param: A shapely Polygon to be used for bounding the Voronoi diagrams. + + :returns: A GeoDataFrame containing the Voronoi polygons. + """ + # Create Voronoi polygons + voronoi = _generate_voronoi_polygons(gdf) + + # Crop Voronoi polygons to input bound_poly extent + voronoi_crop = voronoi.intersection(bound_poly) + voronoi_crop = gpd.GeoDataFrame(geometry=voronoi_crop) # convert to DataFrame + + # Dissolve all Voronoi polygons and subtract from bounds to get gaps + voronoi_merged = voronoi_crop.dissolve() + bound_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(bound_poly)) + bound_gdf.crs = gdf.crs + gaps = bound_gdf.difference(voronoi_merged) + + # Merge cropped Voronoi with gaps, if not empty, otherwise return cropped Voronoi + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Geometry is in a geographic CRS. Results from 'area' are likely incorrect.") + tot_area = np.sum(gaps.area.values) + + if not tot_area == 0: + voronoi_all = gpd.GeoDataFrame(geometry=list(voronoi_crop.geometry) + list(gaps.geometry)) + voronoi_all.crs = gdf.crs + return voronoi_all + else: + return voronoi_crop diff --git a/geoutils/vector/geotransformations.py b/geoutils/vector/geotransformations.py new file mode 100644 index 00000000..a6c8a92f --- /dev/null +++ b/geoutils/vector/geotransformations.py @@ -0,0 +1,55 @@ +"""Functionalities for geotransformations of vectors.""" + +from __future__ import annotations + +import os + +import geopandas as gpd +import pyogrio +import rasterio as rio +from rasterio.crs import CRS + +import geoutils as gu + + +def _reproject( + gdf: gpd.GeoDataFrame, + ref: gu.Raster | rio.io.DatasetReader | gu.Vector | gpd.GeoDataFrame | str | None = None, + crs: CRS | str | int | None = None, +) -> gpd.GeoDataFrame: + """Reproject a vector. See Vector.reproject() for more details.""" + + # Check that either ref or crs is provided + if (ref is not None and crs is not None) or (ref is None and crs is None): + raise ValueError("Either of `ref` or `crs` must be set. Not both.") + + # Case a raster or vector is provided as reference + if ref is not None: + # Check that ref type is either str, Raster or rasterio data set + # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45 + if isinstance(ref, (gu.Raster, gu.Vector)): + ds_ref = ref + elif isinstance(ref, (rio.io.DatasetReader, gpd.GeoDataFrame)): + ds_ref = ref + elif isinstance(ref, str): + if not os.path.exists(ref): + raise ValueError("Reference raster or vector path does not exist.") + try: + ds_ref = gu.Raster(ref, load_data=False) + except rio.errors.RasterioIOError: + try: + ds_ref = gu.Vector(ref) + except pyogrio.errors.DataSourceError: + raise ValueError("Could not open raster or vector with rasterio or pyogrio.") + else: + raise TypeError("Type of ref must be string path to file, Raster or Vector.") + + # Read reprojecting params from ref raster + crs = ds_ref.crs + else: + # Determine user-input target CRS + crs = CRS.from_user_input(crs) + + new_ds = gdf.to_crs(crs=crs) + + return new_ds diff --git a/geoutils/vector.py b/geoutils/vector/vector.py similarity index 77% rename from geoutils/vector.py rename to geoutils/vector/vector.py index f4c61ab8..7d563b0b 100644 --- a/geoutils/vector.py +++ b/geoutils/vector/vector.py @@ -1,13 +1,12 @@ """ -geoutils.vectortools provides a toolset for working with vector data. +Module for Vector class. """ + from __future__ import annotations -import os import pathlib import warnings from collections import abc -from numbers import Number from os import PathLike from typing import ( Any, @@ -25,28 +24,25 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -import pyogrio import rasterio as rio -import rasterio.errors -import shapely from geopandas.testing import assert_geodataframe_equal from mpl_toolkits.axes_grid1 import make_axes_locatable from pandas._typing import WriteBuffer -from rasterio import features, warp from rasterio.crs import CRS -from scipy.spatial import Voronoi from shapely.geometry.base import BaseGeometry -from shapely.geometry.polygon import Polygon import geoutils as gu from geoutils._typing import NDArrayBool, NDArrayNum +from geoutils.interface.distance import _proximity_from_vector_or_raster +from geoutils.interface.raster_vector import _create_mask, _rasterize from geoutils.misc import copy_doc from geoutils.projtools import ( _get_bounds_projected, _get_footprint_projected, _get_utm_ups_crs, - bounds2poly, ) +from geoutils.vector.geometric import _buffer_metric, _buffer_without_overlap +from geoutils.vector.geotransformations import _reproject # This is a generic Vector-type (if subclasses are made, this will change appropriately) VectorType = TypeVar("VectorType", bound="Vector") @@ -148,12 +144,10 @@ def __str__(self) -> str: return str(self.ds.__str__()) @overload - def info(self, verbose: Literal[True] = ...) -> None: - ... + def info(self, verbose: Literal[True] = ...) -> None: ... @overload - def info(self, verbose: Literal[False]) -> str: - ... + def info(self, verbose: Literal[False]) -> str: ... def info(self, verbose: bool = True) -> str | None: """ @@ -323,7 +317,7 @@ def _override_gdf_output( """Parse outputs of GeoPandas functions to facilitate object manipulation.""" # Raise error if output is not treated separately, should appear in tests - if not isinstance(other, (gpd.GeoDataFrame, gpd.GeoDataFrame, pd.Series, BaseGeometry)): + if not isinstance(other, (gpd.GeoDataFrame, pd.Series, BaseGeometry)): raise ValueError("Not implemented. This error should only be raised in tests.") # If a GeoDataFrame is the output, return it @@ -652,7 +646,7 @@ def sjoin(self, df: Vector | gpd.GeoDataFrame, *args: Any, **kwargs: Any) -> Vec else: gdf = df - return self._override_gdf_output(self.ds.sjoin(df=gdf, *args, **kwargs)) + return self._override_gdf_output(self.ds.sjoin(gdf, *args, **kwargs)) @copy_doc(gpd.GeoDataFrame, "Vector") def sjoin_nearest( @@ -994,8 +988,7 @@ def crop( clip: bool, *, inplace: Literal[False] = False, - ) -> VectorType: - ... + ) -> VectorType: ... @overload def crop( @@ -1004,8 +997,7 @@ def crop( clip: bool, *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def crop( @@ -1014,8 +1006,7 @@ def crop( clip: bool, *, inplace: bool = False, - ) -> VectorType | None: - ... + ) -> VectorType | None: ... def crop( self: VectorType, @@ -1070,8 +1061,7 @@ def reproject( crs: CRS | str | int | None = None, *, inplace: Literal[False] = False, - ) -> Vector: - ... + ) -> Vector: ... @overload def reproject( @@ -1080,8 +1070,7 @@ def reproject( crs: CRS | str | int | None = None, *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def reproject( @@ -1090,8 +1079,7 @@ def reproject( crs: CRS | str | int | None = None, *, inplace: bool = False, - ) -> Vector | None: - ... + ) -> Vector | None: ... def reproject( self: Vector, @@ -1117,38 +1105,7 @@ def reproject( :returns: Reprojected vector (or None if inplace). """ - # Check that either ref or crs is provided - if (ref is not None and crs is not None) or (ref is None and crs is None): - raise ValueError("Either of `ref` or `crs` must be set. Not both.") - - # Case a raster or vector is provided as reference - if ref is not None: - # Check that ref type is either str, Raster or rasterio data set - # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45 - if isinstance(ref, (gu.Raster, gu.Vector)): - ds_ref = ref - elif isinstance(ref, (rio.io.DatasetReader, gpd.GeoDataFrame)): - ds_ref = ref - elif isinstance(ref, str): - if not os.path.exists(ref): - raise ValueError("Reference raster or vector path does not exist.") - try: - ds_ref = gu.Raster(ref, load_data=False) - except rasterio.errors.RasterioIOError: - try: - ds_ref = Vector(ref) - except pyogrio.errors.DataSourceError: - raise ValueError("Could not open raster or vector with rasterio or pyogrio.") - else: - raise TypeError("Type of ref must be string path to file, Raster or Vector.") - - # Read reprojecting params from ref raster - crs = ds_ref.crs - else: - # Determine user-input target CRS - crs = CRS.from_user_input(crs) - - new_ds = self.ds.to_crs(crs=crs) + new_ds = _reproject(gdf=self.ds, ref=ref, crs=crs) if inplace: self.ds = new_ds @@ -1164,8 +1121,7 @@ def translate( zoff: float = 0.0, *, inplace: Literal[False] = False, - ) -> VectorType: - ... + ) -> VectorType: ... @overload def translate( @@ -1175,8 +1131,7 @@ def translate( zoff: float = 0.0, *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... @overload def translate( @@ -1186,8 +1141,7 @@ def translate( zoff: float = 0.0, *, inplace: bool = False, - ) -> VectorType | None: - ... + ) -> VectorType | None: ... def translate( self: VectorType, @@ -1231,8 +1185,7 @@ def create_mask( buffer: int | float | np.integer[Any] | np.floating[Any] = 0, *, as_array: Literal[False] = False, - ) -> gu.Mask: - ... + ) -> gu.Mask: ... @overload def create_mask( @@ -1245,8 +1198,7 @@ def create_mask( buffer: int | float | np.integer[Any] | np.floating[Any] = 0, *, as_array: Literal[True], - ) -> NDArrayNum: - ... + ) -> NDArrayNum: ... def create_mask( self, @@ -1280,76 +1232,9 @@ def create_mask( :returns: A Mask object contain a boolean array """ - # If no raster given, use provided dimensions - if raster is None: - # At minimum, xres must be set - if xres is None: - raise ValueError("At least raster or xres must be set.") - if yres is None: - yres = xres - - # By default, use self's CRS and bounds - if crs is None: - crs = self.ds.crs - if bounds is None: - bounds_shp = True - bounds = self.ds.total_bounds - else: - bounds_shp = False - - # Calculate raster shape - left, bottom, right, top = bounds - height = abs((right - left) / xres) - width = abs((top - bottom) / yres) - - if width % 1 != 0 or height % 1 != 0: - # Only warn if the bounds were provided, and not derived from the vector - if not bounds_shp: - warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.") - - width = int(np.round(width)) - height = int(np.round(height)) - out_shape = (height, width) - - # Calculate raster transform - transform = rio.transform.from_bounds(left, bottom, right, top, width, height) - - # otherwise use directly raster's dimensions - elif isinstance(raster, gu.Raster): - out_shape = raster.shape - transform = raster.transform - crs = raster.crs - bounds = raster.bounds - else: - raise TypeError("Raster must be a geoutils.Raster or None.") - - # Copying GeoPandas dataframe before applying changes - gdf = self.ds.copy() - - # Crop vector geometries to avoid issues when reprojecting - left, bottom, right, top = bounds # type: ignore - x1, y1, x2, y2 = warp.transform_bounds(crs, gdf.crs, left, bottom, right, top) - gdf = gdf.cx[x1:x2, y1:y2] - - # Reproject vector into raster CRS - gdf = gdf.to_crs(crs) - - # Create a buffer around the features - if not isinstance(buffer, (int, float, np.number)): - raise TypeError(f"Buffer must be a number, currently set to {type(buffer).__name__}.") - if buffer != 0: - gdf.geometry = [geom.buffer(buffer) for geom in gdf.geometry] - elif buffer == 0: - pass - - # Rasterize geometry - mask = features.rasterize( - shapes=gdf.geometry, fill=0, out_shape=out_shape, transform=transform, default_value=1, dtype="uint8" - ).astype("bool") - - # Force output mask to be of same dimension as input raster - if raster is not None: - mask = mask.reshape((raster.count, raster.height, raster.width)) # type: ignore + mask, transform, crs = _create_mask( + gdf=self.ds, raster=raster, crs=crs, xres=xres, yres=yres, bounds=bounds, buffer=buffer, as_array=as_array + ) # Return output as mask or as array if as_array: @@ -1393,84 +1278,16 @@ def rasterize( :returns: Raster or mask containing the burned geometries. """ - if (raster is not None) and (crs is not None): - raise ValueError("Only one of raster or crs can be provided.") - - # Reproject vector into requested CRS or rst CRS first, if needed - # This has to be done first so that width/height calculated below are correct! - if crs is None: - crs = self.ds.crs - - if raster is not None: - crs = raster.crs # type: ignore - - vect = self.ds.to_crs(crs) - - # If no raster given, now use provided dimensions - if raster is None: - # At minimum, xres must be set - if xres is None: - raise ValueError("At least raster or xres must be set.") - if yres is None: - yres = xres - - # By default, use self's bounds - if bounds is None: - bounds = vect.total_bounds - - # Calculate raster shape - left, bottom, right, top = bounds - width = abs((right - left) / xres) - height = abs((top - bottom) / yres) - - if width % 1 != 0 or height % 1 != 0: - warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.") - - width = int(np.round(width)) - height = int(np.round(height)) - out_shape = (height, width) - - # Calculate raster transform - transform = rio.transform.from_bounds(left, bottom, right, top, width, height) - - # otherwise use directly raster's dimensions - else: - out_shape = raster.shape # type: ignore - transform = raster.transform # type: ignore - - # Set default burn value, index from 1 to len(self.ds) - if in_value is None: - in_value = self.ds.index + 1 - - # Rasterize geometry - if isinstance(in_value, abc.Iterable): - if len(in_value) != len(vect.geometry): # type: ignore - raise ValueError( - "in_value must have same length as self.ds.geometry, currently {} != {}".format( - len(in_value), len(vect.geometry) # type: ignore - ) - ) - - out_geom = ((geom, value) for geom, value in zip(vect.geometry, in_value)) - - mask = features.rasterize(shapes=out_geom, fill=out_value, out_shape=out_shape, transform=transform) - - elif isinstance(in_value, Number): - mask = features.rasterize( - shapes=vect.geometry, fill=out_value, out_shape=out_shape, transform=transform, default_value=in_value - ) - else: - raise ValueError("in_value must be a single number or an iterable with same length as self.ds.geometry") - - # We return a mask if there is a single value to burn and this value is 1 - if isinstance(in_value, (int, np.integer, float, np.floating)) and in_value == 1: - output = gu.Mask.from_array(data=mask, transform=transform, crs=crs, nodata=None) - - # Otherwise we return a Raster if there are several values to burn - else: - output = gu.Raster.from_array(data=mask, transform=transform, crs=crs, nodata=None) - - return output + return _rasterize( + gdf=self.ds, + raster=raster, + crs=crs, + xres=xres, + yres=yres, + bounds=bounds, + in_value=in_value, + out_value=out_value, + ) @classmethod def from_bounds_projected( @@ -1557,7 +1374,7 @@ def proximity( raster = gu.Raster.from_array(data=np.zeros((1000, 1000)), transform=transform, crs=self.crs) - proximity = gu.raster.raster.proximity_from_vector_or_raster( + proximity = _proximity_from_vector_or_raster( raster=raster, vector=self, geometry_type=geometry_type, in_or_out=in_or_out, distance_unit=distance_unit ) @@ -1582,24 +1399,7 @@ def buffer_metric(self, buffer_size: float) -> Vector: :return: Buffered shapefile. """ - crs_utm_ups = _get_utm_ups_crs(df=self.ds) - - # Reproject the shapefile in the local UTM - ds_utm = self.ds.to_crs(crs=crs_utm_ups) - - # Buffer the shapefile - ds_buffered = ds_utm.buffer(distance=buffer_size) - del ds_utm - - # Revert-project the shapefile in the original CRS - ds_buffered_origproj = ds_buffered.to_crs(crs=self.ds.crs) - del ds_buffered - - # Return a Vector object of the buffered GeoDataFrame - # TODO: Clarify what is conserved in the GeoSeries and what to pass the GeoDataFrame to not lose any attributes - vector_buffered = Vector(gpd.GeoDataFrame(geometry=ds_buffered_origproj.geometry, crs=self.ds.crs)) - - return vector_buffered + return _buffer_metric(gdf=self.ds, buffer_size=buffer_size) def get_bounds_projected(self, out_crs: CRS, densify_points: int = 5000) -> rio.coords.BoundingBox: """ @@ -1684,173 +1484,4 @@ def buffer_without_overlap(self, buffer_size: int | float, metric: bool = True, >>> plt.plot() # doctest: +SKIP """ - # Project in local UTM if metric is True - if metric: - crs_utm_ups = _get_utm_ups_crs(df=self.ds) - gdf = self.ds.to_crs(crs=crs_utm_ups) - else: - gdf = self.ds - - # Dissolve all geometries into one - merged = gdf.dissolve() - - # Add buffer around geometries - merged_buffer = merged.buffer(buffer_size) - - # Extract only the buffered area - buffer = merged_buffer.difference(merged) - - # Crop Voronoi polygons to bound geometry and add missing polygons - bound_poly = bounds2poly(gdf) - bound_poly = bound_poly.buffer(buffer_size) - voronoi_all = generate_voronoi_with_bounds(gdf, bound_poly) - if plot: - plt.figure(figsize=(16, 4)) - ax1 = plt.subplot(141) - voronoi_all.plot(ax=ax1) - gdf.plot(fc="none", ec="k", ax=ax1) - ax1.set_title("Voronoi polygons, cropped") - - # Extract Voronoi polygons only within the buffer area - voronoi_diff = voronoi_all.intersection(buffer.geometry[0]) - - # Split all polygons, and join attributes of original geometries into the Voronoi polygons - # Splitting, i.e. explode, is needed when Voronoi generate MultiPolygons that may extend over several features. - voronoi_gdf = gpd.GeoDataFrame(geometry=voronoi_diff.explode(index_parts=True)) # requires geopandas>=0.10 - joined_voronoi = gpd.tools.sjoin(gdf, voronoi_gdf, how="right") - - # Plot results -> some polygons are duplicated - if plot: - ax2 = plt.subplot(142, sharex=ax1, sharey=ax1) - joined_voronoi.plot(ax=ax2, column="index_left", alpha=0.5, ec="k") - gdf.plot(ax=ax2, column=gdf.index.values) - ax2.set_title("Buffer with duplicated polygons") - - # Find non unique Voronoi polygons, and retain only first one - _, indexes = np.unique(joined_voronoi.index, return_index=True) - unique_voronoi = joined_voronoi.iloc[indexes] - - # Plot results -> unique polygons only - if plot: - ax3 = plt.subplot(143, sharex=ax1, sharey=ax1) - unique_voronoi.plot(ax=ax3, column="index_left", alpha=0.5, ec="k") - gdf.plot(ax=ax3, column=gdf.index.values) - ax3.set_title("Buffer with unique polygons") - - # Dissolve all polygons by original index - merged_voronoi = unique_voronoi.dissolve(by="index_left") - - # Plot - if plot: - ax4 = plt.subplot(144, sharex=ax1, sharey=ax1) - gdf.plot(ax=ax4, column=gdf.index.values) - merged_voronoi.plot(column=merged_voronoi.index.values, ax=ax4, alpha=0.5) - ax4.set_title("Final buffer") - plt.show() - - # Reverse-project to the original CRS if metric is True - if metric: - merged_voronoi = merged_voronoi.to_crs(crs=self.crs) - - return Vector(merged_voronoi) - - -# ----------------------------------------- -# Additional stand-alone utility functions -# ----------------------------------------- - - -def extract_vertices(gdf: gpd.GeoDataFrame) -> list[list[tuple[float, float]]]: - r""" - Function to extract the exterior vertices of all shapes within a gpd.GeoDataFrame. - - :param gdf: The GeoDataFrame from which the vertices need to be extracted. - - :returns: A list containing a list of (x, y) positions of the vertices. The length of the primary list is equal - to the number of geometries inside gdf, and length of each sublist is the number of vertices in the geometry. - """ - vertices = [] - # Loop on all geometries within gdf - for geom in gdf.geometry: - # Extract geometry exterior(s) - if geom.geom_type == "MultiPolygon": - exteriors = [p.exterior for p in geom.geoms] - elif geom.geom_type == "Polygon": - exteriors = [geom.exterior] - elif geom.geom_type == "LineString": - exteriors = [geom] - elif geom.geom_type == "MultiLineString": - exteriors = list(geom.geoms) - else: - raise NotImplementedError(f"Geometry type {geom.geom_type} not implemented.") - - vertices.extend([list(ext.coords) for ext in exteriors]) - - return vertices - - -def generate_voronoi_polygons(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Generate Voronoi polygons (tessellation) from the vertices of all geometries in a GeoDataFrame. - - Uses scipy.spatial.voronoi. - - :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons. - - :returns: A GeoDataFrame containing the Voronoi polygons. - """ - # Extract the coordinates of the vertices of all geometries in gdf - vertices = extract_vertices(gdf) - coords = np.concatenate(vertices) - - # Create the Voronoi diagram and extract ridges - vor = Voronoi(coords) - lines = [shapely.geometry.LineString(vor.vertices[line]) for line in vor.ridge_vertices if -1 not in line] - polys = list(shapely.ops.polygonize(lines)) - if len(polys) == 0: - raise ValueError("Invalid geometry, cannot generate finite Voronoi polygons") - - # Convert into GeoDataFrame - voronoi = gpd.GeoDataFrame(geometry=gpd.GeoSeries(polys)) - voronoi.crs = gdf.crs - - return voronoi - - -def generate_voronoi_with_bounds(gdf: gpd.GeoDataFrame, bound_poly: Polygon) -> gpd.GeoDataFrame: - """ - Generate Voronoi polygons that are bounded by the polygon bound_poly, to avoid Voronoi polygons that extend \ -far beyond the original geometry. - - Voronoi polygons are created using generate_voronoi_polygons, cropped to the extent of bound_poly and gaps \ -are filled with new polygons. - - :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons. - :param: A shapely Polygon to be used for bounding the Voronoi diagrams. - - :returns: A GeoDataFrame containing the Voronoi polygons. - """ - # Create Voronoi polygons - voronoi = generate_voronoi_polygons(gdf) - - # Crop Voronoi polygons to input bound_poly extent - voronoi_crop = voronoi.intersection(bound_poly) - voronoi_crop = gpd.GeoDataFrame(geometry=voronoi_crop) # convert to DataFrame - - # Dissolve all Voronoi polygons and subtract from bounds to get gaps - voronoi_merged = voronoi_crop.dissolve() - bound_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(bound_poly)) - bound_gdf.crs = gdf.crs - gaps = bound_gdf.difference(voronoi_merged) - - # Merge cropped Voronoi with gaps, if not empty, otherwise return cropped Voronoi - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Geometry is in a geographic CRS. Results from 'area' are likely incorrect.") - tot_area = np.sum(gaps.area.values) - - if not tot_area == 0: - voronoi_all = gpd.GeoDataFrame(geometry=list(voronoi_crop.geometry) + list(gaps.geometry)) - voronoi_all.crs = gdf.crs - return voronoi_all - else: - return voronoi_crop + return _buffer_without_overlap(self.ds, buffer_size=buffer_size, metric=metric, plot=plot) diff --git a/setup.py b/setup.py index eb7bae29..7f7e78e1 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ """This file now only serves for backward-compatibility for routines explicitly calling python setup.py""" + from setuptools import setup setup() diff --git a/tests/test_config.py b/tests/test_config.py index 1803c1b9..ed1f1f05 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,5 @@ """Test configuration file.""" + import geoutils as gu diff --git a/tests/test_doc.py b/tests/test_doc.py index cbbe3ff1..753880d9 100644 --- a/tests/test_doc.py +++ b/tests/test_doc.py @@ -1,4 +1,5 @@ """Functions to test the documentation.""" + import os import platform import shutil diff --git a/tests/test_examples.py b/tests/test_examples.py index a9e4d54b..fb6cad81 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,6 +1,7 @@ """ Test the example files used for testing and documentation """ + import hashlib import warnings diff --git a/tests/test_interface/test_distance.py b/tests/test_interface/test_distance.py new file mode 100644 index 00000000..c2c1641d --- /dev/null +++ b/tests/test_interface/test_distance.py @@ -0,0 +1,230 @@ +"""Test distance functions at the interface of raster and vectors.""" + +from __future__ import annotations + +import os +import tempfile +import warnings + +import numpy as np +import pytest +import rasterio as rio + +import geoutils as gu +from geoutils._typing import NDArrayNum + + +def run_gdal_proximity( + input_raster: gu.Raster, target_values: list[float] | None, distunits: str = "GEO" +) -> NDArrayNum: + """Run GDAL's ComputeProximity and return the read numpy array.""" + # Rasterio strongly recommends against importing gdal along rio, so this is done here instead. + from osgeo import gdal, gdalconst + + gdal.UseExceptions() + + # Initiate empty GDAL raster for proximity output + drv = gdal.GetDriverByName("MEM") + proxy_ds = drv.Create("", input_raster.shape[1], input_raster.shape[0], 1, gdal.GetDataTypeByName("Float32")) + proxy_ds.GetRasterBand(1).SetNoDataValue(-9999) + + # Save input in temporary file to read with GDAL + # (avoids the nightmare of setting nodata, transform, crs in GDAL format...) + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = os.path.join(temp_dir, "input.tif") + input_raster.save(temp_path) + ds_raster_in = gdal.Open(temp_path, gdalconst.GA_ReadOnly) + + # Define GDAL options + proximity_options = ["DISTUNITS=" + distunits] + if target_values is not None: + proximity_options.insert(0, "VALUES=" + ",".join([str(tgt) for tgt in target_values])) + + # Compute proximity + gdal.ComputeProximity(ds_raster_in.GetRasterBand(1), proxy_ds.GetRasterBand(1), proximity_options) + # Save array + proxy_array = proxy_ds.GetRasterBand(1).ReadAsArray().astype("float32") + proxy_array[proxy_array == -9999] = np.nan + + # Close GDAL datasets + proxy_ds = None + ds_raster_in = None + + return proxy_array + + +class TestDistance: + + landsat_b4_path = gu.examples.get_path("everest_landsat_b4") + landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped") + everest_outlines_path = gu.examples.get_path("everest_rgi_outlines") + aster_dem_path = gu.examples.get_path("exploradores_aster_dem") + + def test_proximity_vector(self) -> None: + """ + The core functionality is already tested against GDAL in test_raster: just verify the vector-specific behaviour. + #TODO: add an artificial test as well (mirroring TODO in test_raster) + """ + + vector = gu.Vector(self.everest_outlines_path) + + # -- Test 1: with a Raster provided -- + raster1 = gu.Raster(self.landsat_b4_crop_path) + prox1 = vector.proximity(raster=raster1) + + # The proximity should have the same extent, resolution and CRS + assert raster1.georeferenced_grid_equal(prox1) + + # With the base geometry + vector.proximity(raster=raster1, geometry_type="geometry") + + # With another geometry option + vector.proximity(raster=raster1, geometry_type="centroid") + + # With only inside proximity + vector.proximity(raster=raster1, in_or_out="in") + + # -- Test 2: with no Raster provided, just grid size -- + + # Default grid size + vector.proximity() + + # With specific grid size + vector.proximity(size=(100, 100)) + + # Test all options, with both an artificial Raster (that has all target values) and a real Raster + + @pytest.mark.parametrize("distunits", ["GEO", "PIXEL"]) # type: ignore + # 0 and 1,2,3 are especially useful for the artificial Raster, and 112 for the real Raster + @pytest.mark.parametrize("target_values", [[1, 2, 3], [0], [112], None]) # type: ignore + @pytest.mark.parametrize( + "raster", + [ + gu.Raster(landsat_b4_path), + gu.Raster.from_array( + np.arange(25, dtype="int32").reshape(5, 5), transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326 + ), + ], + ) # type: ignore + def test_proximity_raster_against_gdal( + self, distunits: str, target_values: list[float] | None, raster: gu.Raster + ) -> None: + """Test that proximity matches the results of GDAL for any parameter.""" + + # TODO: When adding new rasters for tests, specify warning only for Landsat + warnings.filterwarnings("ignore", message="Setting default nodata -99999 to mask non-finite values *") + + # We generate proximity with GDAL and GeoUtils + gdal_proximity = run_gdal_proximity(raster, target_values=target_values, distunits=distunits) + # We translate distunits GDAL option into its GeoUtils equivalent + if distunits == "GEO": + distance_unit = "georeferenced" + else: + distance_unit = "pixel" + geoutils_proximity = ( + raster.proximity(distance_unit=distance_unit, target_values=target_values) + .data.data.squeeze() + .astype("float32") + ) + + # The results should be the same in all cases + try: + # In some cases, the proximity differs slightly (generally <1%) for complex settings + # (Landsat Raster with target of 112) + # It looks like GDAL might not have the right value, + # so this particular case is treated differently in tests + if target_values is not None and target_values[0] == 112 and raster.filename is not None: + # Get index and number of not almost equal point (tolerance of 10-4) + ind_not_almost_equal = np.abs(gdal_proximity - geoutils_proximity) > 1e-04 + nb_not_almost_equal = np.count_nonzero(ind_not_almost_equal) + # Check that this is a minority of points (less than 0.5%) + assert nb_not_almost_equal < 0.005 * raster.width * raster.height + + # Replace these exceptions by zero in both + gdal_proximity[ind_not_almost_equal] = 0.0 + geoutils_proximity[ind_not_almost_equal] = 0.0 + # Check that all the rest is almost equal + assert np.allclose(gdal_proximity, geoutils_proximity, atol=1e-04, equal_nan=True) + + # Otherwise, results are exactly equal + else: + assert np.array_equal(gdal_proximity, geoutils_proximity, equal_nan=True) + + # For debugging + except Exception as exception: + import matplotlib.pyplot as plt + + # Plotting the xdem and GDAL attributes for comparison (plotting "diff" can also help debug) + plt.subplot(121) + plt.imshow(gdal_proximity) + # plt.imshow(np.abs(gdal_proximity - geoutils_proximity)>0.1) + plt.colorbar() + plt.subplot(122) + plt.imshow(geoutils_proximity) + # plt.imshow(raster.data.data == 112) + plt.colorbar() + plt.show() + + # ind_not_equal = np.abs(gdal_proximity - geoutils_proximity)>0.1 + # print(gdal_proximity[ind_not_equal]) + # print(geoutils_proximity[ind_not_equal]) + + raise exception + + def test_proximity_raster_parameters(self) -> None: + """ + Test that new (different to GDAL's) proximity parameters run. + No need to test the results specifically, as those rely entirely on the previous test with GDAL, + and tests in rasterize and shapely. + #TODO: Maybe add one test with an artificial vector to check it works as intended + """ + + # -- Test 1: with self's Raster alone -- + raster1 = gu.Raster(self.landsat_b4_path) + prox1 = raster1.proximity() + + # The raster should have the same extent, resolution and CRS + assert raster1.georeferenced_grid_equal(prox1) + + # It should change with target values specified + prox2 = raster1.proximity(target_values=[255]) + assert not np.array_equal(prox1.data, prox2.data) + + # -- Test 2: with a vector provided -- + vector = gu.Vector(self.everest_outlines_path) + + # With default options (boundary geometry) + raster1.proximity(vector=vector) + + # With the base geometry + raster1.proximity(vector=vector, geometry_type="geometry") + + # With another geometry option + raster1.proximity(vector=vector, geometry_type="centroid") + + # With only inside proximity + raster1.proximity(vector=vector, in_or_out="in") + + # Paths to example data + + # Mask without nodata + mask_landsat_b4 = gu.Raster(landsat_b4_path) > 125 + # Mask with nodata + mask_aster_dem = gu.Raster(aster_dem_path) > 2000 + # Mask from an outline + mask_everest = gu.Vector(everest_outlines_path).create_mask(gu.Raster(landsat_b4_path)) + + @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore + def test_proximity_mask(self, mask: gu.Mask) -> None: + mask_orig = mask.copy() + # Run default + rast = mask.proximity() + # Check the dtype of the original mask was properly reconverted + assert mask.data.dtype == bool + # Check the original mask was not modified during reprojection + assert mask_orig.raster_equal(mask) + + # Check that output is cast back into a raster + assert isinstance(rast, gu.Raster) + # A mask is a raster, so also need to check this + assert not isinstance(rast, gu.Mask) diff --git a/tests/test_pointcloud.py b/tests/test_interface/test_gridding.py similarity index 98% rename from tests/test_pointcloud.py rename to tests/test_interface/test_gridding.py index 484f8eee..35371f25 100644 --- a/tests/test_pointcloud.py +++ b/tests/test_interface/test_gridding.py @@ -7,7 +7,7 @@ from shapely import geometry from geoutils import Raster -from geoutils.pointcloud import _grid_pointcloud +from geoutils.interface.gridding import _grid_pointcloud class TestPointCloud: diff --git a/tests/test_raster/test_interpolate.py b/tests/test_interface/test_interpolate.py similarity index 99% rename from tests/test_raster/test_interpolate.py rename to tests/test_interface/test_interpolate.py index d2c743d7..87e5c126 100644 --- a/tests/test_raster/test_interpolate.py +++ b/tests/test_interface/test_interpolate.py @@ -11,13 +11,13 @@ import geoutils as gu from geoutils import examples -from geoutils.projtools import reproject_to_latlon -from geoutils.raster.interpolate import ( +from geoutils.interface.interpolate import ( _get_dist_nodata_spread, _interp_points, _interpn_interpolator, method_to_order, ) +from geoutils.projtools import reproject_to_latlon class TestInterpolate: @@ -81,7 +81,7 @@ def test_interpn_interpolator_accuracy( @pytest.mark.parametrize("tag_aop", [None, "Area", "Point"]) # type: ignore @pytest.mark.parametrize("shift_aop", [True, False]) # type: ignore - def test_interp_points__synthetic(self, tag_aop: str | None, shift_aop: bool) -> None: + def test_interp_points__synthetic(self, tag_aop: Literal["Area", "Point"] | None, shift_aop: bool) -> None: """ Test interp_points function with synthetic data: diff --git a/tests/test_interface/test_raster_point.py b/tests/test_interface/test_raster_point.py new file mode 100644 index 00000000..3b63bb59 --- /dev/null +++ b/tests/test_interface/test_raster_point.py @@ -0,0 +1,238 @@ +"""Tests for raster-point interfacing.""" + +from __future__ import annotations + +import re + +import numpy as np +import pytest +import rasterio as rio + +import geoutils as gu +from geoutils import examples + + +class TestRasterPointInterface: + + # Paths to example data + landsat_b4_path = examples.get_path("everest_landsat_b4") + landsat_rgb_path = examples.get_path("everest_landsat_rgb") + aster_dem_path = examples.get_path("exploradores_aster_dem") + + def test_to_pointcloud(self) -> None: + """Test to_pointcloud method.""" + + # 1/ Single band synthetic data + + # Create a small raster to test point sampling on + img_arr = np.arange(25, dtype="int32").reshape(5, 5) + img0 = gu.Raster.from_array(img_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326) + + # Sample the whole raster (fraction==1) + points = img0.to_pointcloud() + points_arr = img0.to_pointcloud(as_array=True) + + # Check output types + assert isinstance(points, gu.Vector) + assert isinstance(points_arr, np.ndarray) + + # Check that both outputs (array or vector) are fully consistent, order matters here + assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0]) + assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1]) + assert np.array_equal(points.ds["b1"].values, points_arr[:, 2]) + + # Validate that 25 points were sampled (equating to img1.height * img1.width) with x, y, and band0 values. + assert points_arr.shape == (25, 3) + assert points.ds.shape == (25, 2) # One less column here due to geometry storing X and Y + # Check that X, Y and Z arrays are equal to raster array input independently of value order + x_coords, y_coords = img0.ij2xy(i=np.arange(0, 5), j=np.arange(0, 5)) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5))) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5))) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_arr.ravel())) + + # Check that subsampling works properly + points_arr = img0.to_pointcloud(subsample=0.2, as_array=True) + assert points_arr.shape == (5, 3) + + # All values should be between 0 and 25 + assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25) + + # 2/ Multi-band synthetic data + img_arr = np.arange(25, dtype="int32").reshape(5, 5) + img_3d_arr = np.stack((img_arr, 25 + img_arr, 50 + img_arr), axis=0) + img3d = gu.Raster.from_array(img_3d_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326) + + # Sample the whole raster (fraction==1) + points = img3d.to_pointcloud(auxiliary_data_bands=[2, 3]) + points_arr = img3d.to_pointcloud(as_array=True, auxiliary_data_bands=[2, 3]) + + # Check equality between both output types + assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0]) + assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1]) + assert np.array_equal(points.ds["b1"].values, points_arr[:, 2]) + assert np.array_equal(points.ds["b2"].values, points_arr[:, 3]) + assert np.array_equal(points.ds["b3"].values, points_arr[:, 4]) + + # Check it is the right data + assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5))) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5))) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_3d_arr[0, :, :].ravel())) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 3])), np.sort(img_3d_arr[1, :, :].ravel())) + assert np.array_equal(np.sort(np.asarray(points_arr[:, 4])), np.sort(img_3d_arr[2, :, :].ravel())) + + # With a subsample + points_arr = img3d.to_pointcloud(as_array=True, subsample=10, auxiliary_data_bands=[2, 3]) + assert points_arr.shape == (10, 5) + + # Check the values are still good + assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25) + assert all(25 <= points_arr[:, 3]) and all(points_arr[:, 3] < 50) + assert all(50 <= points_arr[:, 4]) and all(points_arr[:, 4] < 75) + + # 3/ Single-band real raster with nodata values + img1 = gu.Raster(self.aster_dem_path) + + # Get a large sample to ensure they should be some NaNs normally + points_arr = img1.to_pointcloud(subsample=10000, as_array=True, random_state=42) + points = img1.to_pointcloud(subsample=10000, random_state=42) + + # This should not load the image + assert not img1.is_loaded + + # The subsampled values should be valid and the right shape + assert points_arr.shape == (10000, 3) + assert points.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y + assert all(np.isfinite(points_arr[:, 2])) + + # The output should respect the default band naming and the input CRS + assert np.array_equal(points.ds.columns, ["b1", "geometry"]) + assert points.crs == img1.crs + + # Try setting the band name + points = img1.to_pointcloud(data_column_name="lol", subsample=10) + assert np.array_equal(points.ds.columns, ["lol", "geometry"]) + + # Keeping the nodata values + points_invalid = img1.to_pointcloud(subsample=10000, random_state=42, skip_nodata=False) + + # The subsampled values should not all be valid and the right shape + assert points_invalid.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y + assert any(~np.isfinite(points_invalid["b1"].values)) + + # 4/ Multi-band real raster + img2 = gu.Raster(self.landsat_rgb_path) + + # By default only loads a single band without loading + points_arr = img2.to_pointcloud(subsample=10, as_array=True) + points = img2.to_pointcloud(subsample=10) + + assert points_arr.shape == (10, 3) + assert points.ds.shape == (10, 2) # One less column here due to geometry storing X and Y + assert not img2.is_loaded + + # Storing auxiliary bands + points_arr = img2.to_pointcloud(subsample=10, as_array=True, auxiliary_data_bands=[2, 3]) + points = img2.to_pointcloud(subsample=10, auxiliary_data_bands=[2, 3]) + assert points_arr.shape == (10, 5) + assert points.ds.shape == (10, 4) # One less column here due to geometry storing X and Y + assert not img2.is_loaded + assert np.array_equal(points.ds.columns, ["b1", "b2", "b3", "geometry"]) + + # Try setting the column name of a specific band while storing all + points = img2.to_pointcloud(subsample=10, data_column_name="yes", data_band=2, auxiliary_data_bands=[1, 3]) + assert np.array_equal(points.ds.columns, ["yes", "b1", "b3", "geometry"]) + + # 5/ Error raising + with pytest.raises(ValueError, match="Data column name must be a string.*"): + img1.to_pointcloud(data_column_name=1) # type: ignore + with pytest.raises( + ValueError, + match=re.escape("Data band number must be an integer between 1 and the total number of bands (3)."), + ): + img2.to_pointcloud(data_band=4) + with pytest.raises( + ValueError, match="Passing auxiliary column names requires passing auxiliary data band numbers as well." + ): + img2.to_pointcloud(auxiliary_column_names=["a"]) + with pytest.raises( + ValueError, match="Auxiliary data band number must be an iterable containing only integers." + ): + img2.to_pointcloud(auxiliary_data_bands=[1, 2.5]) # type: ignore + img2.to_pointcloud(auxiliary_data_bands="lol") # type: ignore + with pytest.raises( + ValueError, + match=re.escape("Auxiliary data band numbers must be between 1 and the total number of bands (3)."), + ): + img2.to_pointcloud(auxiliary_data_bands=[0]) + img2.to_pointcloud(auxiliary_data_bands=[4]) + with pytest.raises( + ValueError, match=re.escape("Main data band 1 should not be listed in auxiliary data bands [1, 2].") + ): + img2.to_pointcloud(auxiliary_data_bands=[1, 2]) + with pytest.raises(ValueError, match="Auxiliary column names must be an iterable containing only strings."): + img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", 1]) + with pytest.raises( + ValueError, match="Length of auxiliary column name and data band numbers should be the same*" + ): + img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", "lol2", "lol3"]) + + def test_from_pointcloud(self) -> None: + """Test from_pointcloud method.""" + + # 1/ Create a small raster to test point sampling on + shape = (5, 5) + nodata = 100 + img_arr = np.arange(np.prod(shape), dtype="int32").reshape(shape) + transform = rio.transform.from_origin(0, 5, 1, 1) + img1 = gu.Raster.from_array(img_arr, transform=transform, crs=4326, nodata=nodata) + + # Check both inputs work (grid coords or transform+shape) on a subsample + pc1 = img1.to_pointcloud(subsample=10) + img1_sub = gu.Raster.from_pointcloud_regular(pc1, transform=transform, shape=shape) + + grid_coords1 = img1.coords(grid=False) + img1_sub2 = gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) + + assert img1_sub.raster_equal(img1_sub2) + + # Check that number of valid values are equal to point cloud size + assert np.count_nonzero(~img1_sub.data.mask) == 10 + + # With no subsampling, should get the exact same raster back + pc1_full = img1.to_pointcloud() + img1_full = gu.Raster.from_pointcloud_regular(pc1_full, transform=transform, shape=shape, nodata=nodata) + assert img1.raster_equal(img1_full, warn_failure_reason=True) + + # 2/ Single-band real raster with nodata values + img2 = gu.Raster(self.aster_dem_path) + nodata = img2.nodata + transform = img2.transform + shape = img2.shape + + # Check both inputs work (grid coords or transform+shape) on a subsample + pc2 = img2.to_pointcloud(subsample=10000, random_state=42) + img2_sub = gu.Raster.from_pointcloud_regular(pc2, transform=transform, shape=shape, nodata=nodata) + + grid_coords2 = img2.coords(grid=False) + img2_sub2 = gu.Raster.from_pointcloud_regular(pc2, grid_coords=grid_coords2, nodata=nodata) + + assert img2_sub.raster_equal(img2_sub2, warn_failure_reason=True) + + # Check that number of valid values are equal to point cloud size + assert np.count_nonzero(~img2_sub.data.mask) == 10000 + + # With no subsampling, should get the exact same raster back + pc2_full = img2.to_pointcloud() + img2_full = gu.Raster.from_pointcloud_regular(pc2_full, transform=transform, shape=shape, nodata=nodata) + assert img2.raster_equal(img2_full, warn_failure_reason=True, strict_masked=False) + + # 3/ Error raising + with pytest.raises(TypeError, match="Input grid coordinates must be 1D arrays.*"): + gu.Raster.from_pointcloud_regular(pc1, grid_coords=(1, "lol")) # type: ignore + with pytest.raises(ValueError, match="Grid coordinates must be regular*"): + grid_coords1[0][0] += 1 + gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) # type: ignore + with pytest.raises( + ValueError, match="Either grid coordinates or both geotransform and shape must be provided." + ): + gu.Raster.from_pointcloud_regular(pc1) diff --git a/tests/test_interface/test_raster_vector.py b/tests/test_interface/test_raster_vector.py new file mode 100644 index 00000000..54005fcd --- /dev/null +++ b/tests/test_interface/test_raster_vector.py @@ -0,0 +1,257 @@ +"""Tests for raster-vector interfacing.""" + +from __future__ import annotations + +import warnings + +import geopandas as gpd +import numpy as np +import pytest +from scipy.ndimage import binary_erosion +from shapely import LineString, MultiLineString, MultiPolygon, Polygon + +import geoutils as gu +from geoutils import examples + +GLACIER_OUTLINES_URL = "http://public.data.npolar.no/cryoclim/CryoClim_GAO_SJ_1990.zip" + + +class TestRasterVectorInterface: + + # Create a synthetic vector file with a square of size 1, started at position (10, 10) + poly1 = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)]) + gdf = gpd.GeoDataFrame({"geometry": [poly1]}, crs="EPSG:4326") + vector = gu.Vector(gdf) + + # Same with a square started at position (5, 5) + poly2 = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]) + gdf = gpd.GeoDataFrame({"geometry": [poly2]}, crs="EPSG:4326") + vector2 = gu.Vector(gdf) + + # Create a multipolygon with both + multipoly = MultiPolygon([poly1, poly2]) + gdf = gpd.GeoDataFrame({"geometry": [multipoly]}, crs="EPSG:4326") + vector_multipoly = gu.Vector(gdf) + + # Create a synthetic vector file with a square of size 5, started at position (8, 8) + poly3 = Polygon([(8, 8), (13, 8), (13, 13), (8, 13)]) + gdf = gpd.GeoDataFrame({"geometry": [poly3]}, crs="EPSG:4326") + vector_5 = gu.Vector(gdf) + + # Create a synthetic LineString geometry + lines = LineString([(10, 10), (11, 10), (11, 11)]) + gdf = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326") + vector_lines = gu.Vector(gdf) + + # Create a synthetic MultiLineString geometry + multilines = MultiLineString([[(10, 10), (11, 10), (11, 11)], [(5, 5), (6, 5), (6, 6)]]) + gdf = gpd.GeoDataFrame({"geometry": [multilines]}, crs="EPSG:4326") + vector_multilines = gu.Vector(gdf) + + def test_create_mask(self) -> None: + """ + Test Vector.create_mask. + """ + # First with given res and bounds -> Should be a 21 x 21 array with 0 everywhere except center pixel + vector = self.vector.copy() + out_mask = vector.create_mask(xres=1, bounds=(0, 0, 21, 21), as_array=True) + ref_mask = np.zeros((21, 21), dtype="bool") + ref_mask[10, 10] = True + assert out_mask.shape == (21, 21) + assert np.all(ref_mask == out_mask) + + # Check that vector has not been modified by accident + assert vector.bounds == self.vector.bounds + assert len(vector.ds) == len(self.vector.ds) + assert vector.crs == self.vector.crs + + # Then with a gu.Raster as reference, single band + rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326") + out_mask = vector.create_mask(rst, as_array=True) + assert out_mask.shape == (21, 21) + + # With gu.Raster, 2 bands -> fails... + # rst = gu.Raster.from_array(np.zeros((2, 21, 21)), transform=(1., 0., 0., 0., -1., 21.), crs='EPSG:4326') + # out_mask = vector.create_mask(rst) + + # Test that buffer = 0 works + out_mask_buff = vector.create_mask(rst, buffer=0, as_array=True) + assert np.all(ref_mask == out_mask_buff) + + # Test that buffer > 0 works + rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326") + out_mask = vector.create_mask(rst, as_array=True) + for buffer in np.arange(1, 8): + out_mask_buff = vector.create_mask(rst, buffer=buffer, as_array=True) + diff = out_mask_buff & ~out_mask + assert np.count_nonzero(diff) > 0 + # Difference between masks should always be thinner than buffer + 1 + eroded_diff = binary_erosion(diff.squeeze(), np.ones((buffer + 1, buffer + 1))) + assert np.count_nonzero(eroded_diff) == 0 + + # Test that buffer < 0 works + vector_5 = self.vector_5 + out_mask = vector_5.create_mask(rst, as_array=True) + for buffer in np.arange(-1, -3, -1): + out_mask_buff = vector_5.create_mask(rst, buffer=buffer, as_array=True) + diff = ~out_mask_buff & out_mask + assert np.count_nonzero(diff) > 0 + # Difference between masks should always be thinner than buffer + 1 + eroded_diff = binary_erosion(diff.squeeze(), np.ones((abs(buffer) + 1, abs(buffer) + 1))) + assert np.count_nonzero(eroded_diff) == 0 + + # Check that no warning is raised when creating a mask with a xres not multiple of vector bounds + mask = vector.create_mask(xres=1.01) + + # Check that by default, create_mask returns a Mask + assert isinstance(mask, gu.Mask) + + # Check that an error is raised if xres is not passed + with pytest.raises(ValueError, match="At least raster or xres must be set."): + vector.create_mask() + + # Check that an error is raised if buffer is the wrong type + with pytest.raises(TypeError, match="Buffer must be a number, currently set to str."): + vector.create_mask(rst, buffer="lol") # type: ignore + + # If the raster has the wrong type + with pytest.raises(TypeError, match="Raster must be a geoutils.Raster or None."): + vector.create_mask("lol") # type: ignore + + # Check that a warning is raised if the bounds were passed specifically by the user + with pytest.warns(UserWarning): + vector.create_mask(xres=1.01, bounds=(0, 0, 21, 21)) + + landsat_b4_path = examples.get_path("everest_landsat_b4") + landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped") + everest_outlines_path = gu.examples.get_path("everest_rgi_outlines") + aster_dem_path = gu.examples.get_path("exploradores_aster_dem") + aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines") + glacier_outlines = gu.Vector(GLACIER_OUTLINES_URL) + + def test_rasterize_proj(self) -> None: + # Capture the warning on resolution not matching exactly bounds + with pytest.warns(UserWarning): + burned = self.glacier_outlines.rasterize(xres=3000) + + assert burned.shape[0] == 146 + assert burned.shape[1] == 115 + + def test_rasterize_unproj(self) -> None: + """Test rasterizing an EPSG:3426 dataset into a projection.""" + + vct = gu.Vector(self.everest_outlines_path) + rst = gu.Raster(self.landsat_b4_crop_path) + + # Use Web Mercator at 30 m. + # Capture the warning on resolution not matching exactly bounds + with pytest.warns(UserWarning): + burned = vct.rasterize(xres=30, crs=3857) + + assert burned.shape[0] == 1251 + assert burned.shape[1] == 1522 + + # Typically, rasterize returns a raster + burned_in2_out1 = vct.rasterize(raster=rst, in_value=2, out_value=1) + assert isinstance(burned_in2_out1, gu.Raster) + + # For an in_value of 1 and out_value of 0 (default), it returns a mask + burned_mask = vct.rasterize(raster=rst, in_value=1) + assert isinstance(burned_mask, gu.Mask) + + # Check that rasterizing with in_value=1 is the same as creating a mask + assert burned_mask.raster_equal(vct.create_mask(raster=rst)) + + # The two rasterization should match + assert np.all(burned_in2_out1[burned_mask] == 2) + assert np.all(burned_in2_out1[~burned_mask] == 1) + + # Check that errors are raised + with pytest.raises(ValueError, match="Only one of raster or crs can be provided."): + vct.rasterize(raster=rst, crs=3857) + + @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore + def test_polygonize(self, example: str) -> None: + """Test that polygonize doesn't raise errors.""" + + img = gu.Raster(example) + + # -- Test 1: basic functioning of polygonize -- + + # Get unique value for image and the corresponding area + value = np.unique(img)[0] + pixel_area = np.count_nonzero(img.data == value) * img.res[0] * img.res[1] + + # Polygonize the raster for this value, and compute the total area + polygonized = img.polygonize(target_values=value) + polygon_area = polygonized.ds.area.sum() + + # Check that these two areas are approximately equal + assert polygon_area == pytest.approx(pixel_area) + assert isinstance(polygonized, gu.Vector) + assert polygonized.crs == img.crs + + # Check default name of data column, and that defining a custom name works the same + assert "id" in polygonized.ds.columns + polygonized2 = img.polygonize(target_values=value, data_column_name="myname") + assert "myname" in polygonized2.ds.columns + assert np.array_equal(polygonized2.ds["myname"].values, polygonized.ds["id"].values) + + # -- Test 2: data types -- + + # Check that polygonize works as expected for any input dtype (e.g. float64 being not supported by GeoPandas) + for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64"]: + img_dtype = img.copy() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", category=UserWarning, message="dtype conversion will result in a " "loss of information.*" + ) + warnings.filterwarnings( + "ignore", + category=UserWarning, + message="Unmasked values equal to the nodata value found in data array.*", + ) + img_dtype = img_dtype.astype(dtype) + value = np.unique(img_dtype)[0] + img_dtype.polygonize(target_values=value) + + # And for a boolean object, such as a mask + mask = img > value + mask.polygonize(target_values=1) + + +class TestMaskVectorInterface: + + # Paths to example data + landsat_b4_path = examples.get_path("everest_landsat_b4") + landsat_rgb_path = examples.get_path("everest_landsat_rgb") + everest_outlines_path = examples.get_path("everest_rgi_outlines") + aster_dem_path = examples.get_path("exploradores_aster_dem") + + # Mask without nodata + mask_landsat_b4 = gu.Raster(landsat_b4_path) > 125 + # Mask with nodata + mask_aster_dem = gu.Raster(aster_dem_path) > 2000 + # Mask from an outline + mask_everest = gu.Vector(everest_outlines_path).create_mask(gu.Raster(landsat_b4_path)) + + @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore + def test_polygonize(self, mask: gu.Mask) -> None: + mask_orig = mask.copy() + # Run default + vect = mask.polygonize() + # Check the dtype of the original mask was properly reconverted + assert mask.data.dtype == bool + # Check the original mask was not modified during polygonizing + assert mask_orig.raster_equal(mask) + + # Check the output is cast into a vector + assert isinstance(vect, gu.Vector) + + # Run with zero as target + vect = mask.polygonize(target_values=0) + assert isinstance(vect, gu.Vector) + + # Check a warning is raised when using a non-boolean value + with pytest.warns(UserWarning, match="In-value converted to 1 for polygonizing boolean mask."): + mask.polygonize(target_values=2) diff --git a/tests/test_pointcloud/test_pointcloud.py b/tests/test_pointcloud/test_pointcloud.py new file mode 100644 index 00000000..ec26e5d3 --- /dev/null +++ b/tests/test_pointcloud/test_pointcloud.py @@ -0,0 +1 @@ +"""Test for future PointCloud class.""" diff --git a/tests/test_projtools.py b/tests/test_projtools.py index a10baa7e..b3157210 100644 --- a/tests/test_projtools.py +++ b/tests/test_projtools.py @@ -1,6 +1,7 @@ """ Test projtools """ + import os.path import geopandas as gpd diff --git a/tests/test_raster/test_array.py b/tests/test_raster/test_array.py index 54a6c876..1e02c612 100644 --- a/tests/test_raster/test_array.py +++ b/tests/test_raster/test_array.py @@ -1,4 +1,5 @@ """Test array tools.""" + from __future__ import annotations import warnings @@ -8,6 +9,11 @@ import rasterio as rio import geoutils as gu +from geoutils.raster.array import ( + _get_array_and_mask, + _get_valid_extent, + _get_xy_rotated, +) class TestArray: @@ -15,7 +21,7 @@ class TestArray: @pytest.mark.parametrize( "mask_and_viewable", [ - (None, True), # An ndarray with no mask should support views + (None, True), # A ndarray with no mask should support views (False, True), # A masked array with an empty mask should support views ([True, False, False, False], False), # A masked array with an occupied mask should not support views. ([False, False, False, False], True), # A masked array with an empty occupied mask should support views. @@ -53,13 +59,13 @@ def test_get_array_and_mask( # Validate that incorrect shapes raise the correct error. if not check_should_pass: with pytest.raises(ValueError, match="Invalid array shape given"): - gu.raster.get_array_and_mask(array, check_shape=True) + _get_array_and_mask(array, check_shape=True) # Stop the test here as the failure is now validated. return # Get a copy of the array and check its shape (it should always pass at this point) - arr, _ = gu.raster.get_array_and_mask(array, copy=True, check_shape=True) + arr, _ = _get_array_and_mask(array, copy=True, check_shape=True) # Validate that the array is a copy assert not np.shares_memory(arr, array) @@ -76,7 +82,7 @@ def test_get_array_and_mask( warnings.simplefilter("always") # Try to create a view. - arr_view, mask = gu.raster.get_array_and_mask(array, copy=False) + arr_view, mask = _get_array_and_mask(array, copy=False) # If it should be possible, validate that there were no warnings. if view_should_be_possible: @@ -102,21 +108,21 @@ def test_get_valid_extent(self) -> None: # For no invalid values, the function should return the edges # For the array - assert (0, 4, 0, 4) == gu.raster.get_valid_extent(arr) + assert (0, 4, 0, 4) == _get_valid_extent(arr) # For the masked-array - assert (0, 4, 0, 4) == gu.raster.get_valid_extent(mask_ma) + assert (0, 4, 0, 4) == _get_valid_extent(mask_ma) # 1/ First column: # If we mask it in the masked array mask_ma[0, :] = np.ma.masked - assert (1, 4, 0, 4) == gu.raster.get_valid_extent(mask_ma) + assert (1, 4, 0, 4) == _get_valid_extent(mask_ma) # If we changed the array to NaNs arr[0, :] = np.nan - assert (1, 4, 0, 4) == gu.raster.get_valid_extent(arr) + assert (1, 4, 0, 4) == _get_valid_extent(arr) mask_ma.data[0, :] = np.nan mask_ma.mask = False - assert (1, 4, 0, 4) == gu.raster.get_valid_extent(mask_ma) + assert (1, 4, 0, 4) == _get_valid_extent(mask_ma) # 2/ First row: arr = np.ones(shape=(5, 5)) @@ -124,14 +130,14 @@ def test_get_valid_extent(self) -> None: mask_ma = np.ma.masked_array(data=arr, mask=arr_mask) # If we mask it in the masked array mask_ma[:, 0] = np.ma.masked - assert (0, 4, 1, 4) == gu.raster.get_valid_extent(mask_ma) + assert (0, 4, 1, 4) == _get_valid_extent(mask_ma) # If we changed the array to NaNs arr[:, 0] = np.nan - assert (0, 4, 1, 4) == gu.raster.get_valid_extent(arr) + assert (0, 4, 1, 4) == _get_valid_extent(arr) mask_ma.data[:, 0] = np.nan mask_ma.mask = False - assert (0, 4, 1, 4) == gu.raster.get_valid_extent(mask_ma) + assert (0, 4, 1, 4) == _get_valid_extent(mask_ma) # 3/ Last column: arr = np.ones(shape=(5, 5)) @@ -140,14 +146,14 @@ def test_get_valid_extent(self) -> None: # If we mask it in the masked array mask_ma[-1, :] = np.ma.masked - assert (0, 3, 0, 4) == gu.raster.get_valid_extent(mask_ma) + assert (0, 3, 0, 4) == _get_valid_extent(mask_ma) # If we changed the array to NaNs arr[-1, :] = np.nan - assert (0, 3, 0, 4) == gu.raster.get_valid_extent(arr) + assert (0, 3, 0, 4) == _get_valid_extent(arr) mask_ma.data[-1, :] = np.nan mask_ma.mask = False - assert (0, 3, 0, 4) == gu.raster.get_valid_extent(mask_ma) + assert (0, 3, 0, 4) == _get_valid_extent(mask_ma) # 4/ Last row: arr = np.ones(shape=(5, 5)) @@ -156,14 +162,14 @@ def test_get_valid_extent(self) -> None: # If we mask it in the masked array mask_ma[:, -1] = np.ma.masked - assert (0, 4, 0, 3) == gu.raster.get_valid_extent(mask_ma) + assert (0, 4, 0, 3) == _get_valid_extent(mask_ma) # If we changed the array to NaNs arr[:, -1] = np.nan - assert (0, 4, 0, 3) == gu.raster.get_valid_extent(arr) + assert (0, 4, 0, 3) == _get_valid_extent(arr) mask_ma.data[:, -1] = np.nan mask_ma.mask = False - assert (0, 4, 0, 3) == gu.raster.get_valid_extent(mask_ma) + assert (0, 4, 0, 3) == _get_valid_extent(mask_ma) def test_get_xy_rotated(self) -> None: """Check the function to rotate array.""" @@ -178,27 +184,27 @@ def test_get_xy_rotated(self) -> None: xx, yy = r1.coords(grid=True, force_offset="ll") # Rotating the coordinates 90 degrees should be the same as rotating the array - xx90, yy90 = gu.raster.get_xy_rotated(r1, along_track_angle=90) + xx90, yy90 = _get_xy_rotated(r1, along_track_angle=90) assert np.allclose(np.rot90(xx90), xx) assert np.allclose(np.rot90(yy90), yy) # Same for 180 degrees - xx180, yy180 = gu.raster.get_xy_rotated(r1, along_track_angle=180) + xx180, yy180 = _get_xy_rotated(r1, along_track_angle=180) assert np.allclose(np.rot90(xx180, k=2), xx) assert np.allclose(np.rot90(yy180, k=2), yy) # Same for 270 degrees - xx270, yy270 = gu.raster.get_xy_rotated(r1, along_track_angle=270) + xx270, yy270 = _get_xy_rotated(r1, along_track_angle=270) assert np.allclose(np.rot90(xx270, k=3), xx) assert np.allclose(np.rot90(yy270, k=3), yy) # 360 degrees should get us back on our feet - xx360, yy360 = gu.raster.get_xy_rotated(r1, along_track_angle=360) + xx360, yy360 = _get_xy_rotated(r1, along_track_angle=360) assert np.allclose(xx360, xx) assert np.allclose(yy360, yy) # Test that the values make sense for 45 degrees - xx45, yy45 = gu.raster.get_xy_rotated(r1, along_track_angle=45) + xx45, yy45 = _get_xy_rotated(r1, along_track_angle=45) # Should have zero on the upper left corner for xx assert xx45[0, 0] == pytest.approx(0) # Then a multiple of sqrt2 along each dimension @@ -209,4 +215,4 @@ def test_get_xy_rotated(self) -> None: # Finally, yy should be rotated by 90 assert np.allclose(np.rot90(xx45), yy45) - xx, yy = gu.raster.get_xy_rotated(r1, along_track_angle=90) + xx, yy = _get_xy_rotated(r1, along_track_angle=90) diff --git a/tests/test_raster/test_geotransformations.py b/tests/test_raster/test_geotransformations.py new file mode 100644 index 00000000..becf2d5e --- /dev/null +++ b/tests/test_raster/test_geotransformations.py @@ -0,0 +1,777 @@ +"""Test for geotransformations of raster objects.""" + +from __future__ import annotations + +import re +import warnings + +import matplotlib.pyplot as plt +import numpy as np +import pytest +import rasterio as rio + +import geoutils as gu +from geoutils import examples +from geoutils.raster.geotransformations import _resampling_method_from_str +from geoutils.raster.raster import _default_nodata + +DO_PLOT = False + + +class TestRasterGeotransformations: + + landsat_b4_path = examples.get_path("everest_landsat_b4") + landsat_b4_crop_path = examples.get_path("everest_landsat_b4_cropped") + landsat_rgb_path = examples.get_path("everest_landsat_rgb") + everest_outlines_path = examples.get_path("everest_rgi_outlines") + aster_dem_path = examples.get_path("exploradores_aster_dem") + aster_outlines_path = examples.get_path("exploradores_rgi_outlines") + + def test_resampling_str(self) -> None: + """Test that resampling methods can be given as strings instead of rio enums.""" + warnings.simplefilter("error") + assert _resampling_method_from_str("nearest") == rio.enums.Resampling.nearest # noqa + assert _resampling_method_from_str("cubic_spline") == rio.enums.Resampling.cubic_spline # noqa + + # Check that odd strings return the appropriate error. + try: + _resampling_method_from_str("CUBIC_SPLINE") # noqa + except ValueError as exception: + if "not a valid rasterio.enums.Resampling method" not in str(exception): + raise exception + + img1 = gu.Raster(self.landsat_b4_path) + img2 = gu.Raster(self.landsat_b4_crop_path) + # Set img2 pixel interpretation as "Point" to match "img1" and avoid any warnings + img2.set_area_or_point("Point", shift_area_or_point=False) + img1.set_nodata(0) + img2.set_nodata(0) + + # Resample the rasters using a new resampling method and see that the string and enum gives the same result. + img3a = img1.reproject(img2, resampling="q1") + img3b = img1.reproject(img2, resampling=rio.enums.Resampling.q1) + assert img3a.raster_equal(img3b) + + test_data = [[landsat_b4_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]] + + @pytest.mark.parametrize("data", test_data) # type: ignore + def test_crop(self, data: list[str]) -> None: + """Test for crop method, also called by square brackets through __getitem__""" + + raster_path, outlines_path = data + r = gu.Raster(raster_path) + + # -- Test with crop_geom being a list/tuple -- ## + crop_geom: list[float] = list(r.bounds) + + # Test unloaded inplace cropping conserves the shape + r.crop(crop_geom=[crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]], inplace=True) + assert len(r.data.shape) == 2 + + r = gu.Raster(raster_path) + + # Test with same bounds -> should be the same # + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert r_cropped.raster_equal(r) + + # - Test cropping each side by a random integer of pixels - # + rng = np.random.default_rng(42) + rand_int = rng.integers(1, min(r.shape) - 1) + + # Left + crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert list(r_cropped.bounds) == crop_geom2 + assert np.array_equal(r.data[:, rand_int:].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[:, rand_int:].mask, r_cropped.data.mask) + + # Right + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * r.res[0], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert list(r_cropped.bounds) == crop_geom2 + assert np.array_equal(r.data[:, :-rand_int].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[:, :-rand_int].mask, r_cropped.data.mask) + + # Bottom + crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(r.res[1]), crop_geom[2], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert list(r_cropped.bounds) == crop_geom2 + assert np.array_equal(r.data[:-rand_int, :].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[:-rand_int, :].mask, r_cropped.data.mask) + + # Top + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(r.res[1])] + r_cropped = r.crop(crop_geom2) + assert list(r_cropped.bounds) == crop_geom2 + assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data, equal_nan=True) + assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask) + + # Same but tuple + crop_geom3: tuple[float, float, float, float] = ( + crop_geom[0], + crop_geom[1], + crop_geom[2], + crop_geom[3] - rand_int * r.res[0], + ) + r_cropped = r.crop(crop_geom3) + assert list(r_cropped.bounds) == list(crop_geom3) + assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask) + + # -- Test with crop_geom being a Raster -- # + r_cropped2 = r.crop(r_cropped) + assert r_cropped2.raster_equal(r_cropped) + + # Check that bound reprojection is done automatically if the CRS differ + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") + + r_cropped_reproj = r_cropped.reproject(crs=3857) + r_cropped3 = r.crop(r_cropped_reproj) + + # Original CRS bounds can be deformed during transformation, but result should be equivalent to this + r_cropped4 = r.crop(crop_geom=r_cropped_reproj.get_bounds_projected(out_crs=r.crs)) + assert r_cropped3.raster_equal(r_cropped4) + + # -- Test with inplace=True -- # + r_copy = r.copy() + r_copy.crop(r_cropped, inplace=True) + assert r_copy.raster_equal(r_cropped) + + # - Test cropping each side with a non integer pixel, mode='match_pixel' - # + rand_float = rng.integers(1, min(r.shape) - 1) + 0.25 + + # left + crop_geom2 = [crop_geom[0] + rand_float * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float) + assert np.array_equal(r.data[:, int(rand_float) :].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[:, int(rand_float) :].mask, r_cropped.data.mask) + + # right + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_float * r.res[0], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float) + assert np.array_equal(r.data[:, : -int(rand_float)].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[:, : -int(rand_float)].mask, r_cropped.data.mask) + + # bottom + crop_geom2 = [crop_geom[0], crop_geom[1] + rand_float * abs(r.res[1]), crop_geom[2], crop_geom[3]] + r_cropped = r.crop(crop_geom2) + assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float) + assert np.array_equal(r.data[: -int(rand_float), :].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[: -int(rand_float), :].mask, r_cropped.data.mask) + + # top + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_float * abs(r.res[1])] + r_cropped = r.crop(crop_geom2) + assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float) + assert np.array_equal(r.data[int(rand_float) :, :].data, r_cropped.data.data, equal_nan=True) + assert np.array_equal(r.data[int(rand_float) :, :].mask, r_cropped.data.mask) + + # -- Test with mode='match_extent' -- # + # Test all sides at once, with rand_float less than half the smallest extent + # The cropped extent should exactly match the requested extent, res will be changed accordingly + rand_float = rng.integers(1, min(r.shape) / 2 - 1) + 0.25 + crop_geom2 = [ + crop_geom[0] + rand_float * r.res[0], + crop_geom[1] + rand_float * abs(r.res[1]), + crop_geom[2] - rand_float * r.res[0], + crop_geom[3] - rand_float * abs(r.res[1]), + ] + + # Filter warning about nodata not set in reprojection (because match_extent triggers reproject) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") + r_cropped = r.crop(crop_geom2, mode="match_extent") + + assert list(r_cropped.bounds) == crop_geom2 + # The change in resolution should be less than what would occur with +/- 1 pixel + assert np.all( + abs(np.array(r.res) - np.array(r_cropped.res)) < np.array(r.res) / np.array(r_cropped.shape)[::-1] + ) + + # Filter warning about nodata not set in reprojection (because match_extent triggers reproject) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") + r_cropped2 = r.crop(r_cropped, mode="match_extent") + assert r_cropped2.raster_equal(r_cropped) + + # -- Test with crop_geom being a Vector -- # + outlines = gu.Vector(outlines_path) + + # First, we reproject manually the outline + outlines_reproj = gu.Vector(outlines.ds.to_crs(r.crs)) + r_cropped = r.crop(outlines_reproj) + + # Calculate intersection of the two bounding boxes and make sure crop has same bounds + win_outlines = rio.windows.from_bounds(*outlines_reproj.bounds, transform=r.transform) + win_raster = rio.windows.from_bounds(*r.bounds, transform=r.transform) + final_window = win_outlines.intersection(win_raster).round_lengths().round_offsets() + new_bounds = rio.windows.bounds(final_window, transform=r.transform) + assert list(r_cropped.bounds) == list(new_bounds) + + # Second, we check that bound reprojection is done automatically if the CRS differ + r_cropped2 = r.crop(outlines) + assert list(r_cropped2.bounds) == list(new_bounds) + + # -- Test crop works as expected even if transform has been modified, e.g. through downsampling -- # + # Test that with downsampling, cropping to same bounds result in same raster + r = gu.Raster(raster_path, downsample=5) + r_test = r.crop(r.bounds) + assert r_test.raster_equal(r) + + # - Test that cropping yields the same results whether data is loaded or not - + # With integer cropping (left) + rand_int = rng.integers(1, min(r.shape) - 1) + crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] + r = gu.Raster(raster_path, downsample=5, load_data=False) + assert not r.is_loaded + r_crop_unloaded = r.crop(crop_geom2) + r.load() + r_crop_loaded = r.crop(crop_geom2) + # TODO: the following condition should be met once issue #447 is solved + # assert r_crop_unloaded.raster_equal(r_crop_loaded) + assert r_crop_unloaded.shape == r_crop_loaded.shape + assert r_crop_unloaded.transform == r_crop_loaded.transform + + # With a float number of pixels added to the right, mode 'match_pixel' + rand_float = rng.integers(1, min(r.shape) - 1) + 0.25 + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] + rand_float * r.res[0], crop_geom[3]] + r = gu.Raster(raster_path, downsample=5, load_data=False) + assert not r.is_loaded + r_crop_unloaded = r.crop(crop_geom2, mode="match_pixel") + r.load() + r_crop_loaded = r.crop(crop_geom2, mode="match_pixel") + # TODO: the following condition should be met once issue #447 is solved + # assert r_crop_unloaded.raster_equal(r_crop_loaded) + assert r_crop_unloaded.shape == r_crop_loaded.shape + assert r_crop_unloaded.transform == r_crop_loaded.transform + + # - Check related to pixel interpretation - + + # Check warning for a different area_or_point for the match-reference geometry works + r.set_area_or_point("Area", shift_area_or_point=False) + r2 = r.copy() + r2.set_area_or_point("Point", shift_area_or_point=False) + + with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'): + r.crop(r2) + + # Check that cropping preserves the interpretation + crop_geom = [crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] + r_crop = r.crop(crop_geom) + assert r_crop.area_or_point == "Area" + r2_crop = r2.crop(crop_geom) + assert r2_crop.area_or_point == "Point" + + @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path, landsat_rgb_path]) # type: ignore + def test_translate(self, example: str) -> None: + """Test translation works as intended""" + + r = gu.Raster(example) + + # Get original transform + orig_transform = r.transform + orig_bounds = r.bounds + + # Shift raster by georeferenced units (default) + # Check the default behaviour is not inplace + r_notinplace = r.translate(xoff=1, yoff=1) + assert isinstance(r_notinplace, gu.Raster) + + # Check inplace + r.translate(xoff=1, yoff=1, inplace=True) + # Both shifts should have yielded the same transform + assert r.transform == r_notinplace.transform + + # Only bounds should change + assert orig_transform.c + 1 == r.transform.c + assert orig_transform.f + 1 == r.transform.f + for attr in ["a", "b", "d", "e"]: + assert getattr(orig_transform, attr) == getattr(r.transform, attr) + + assert orig_bounds.left + 1 == r.bounds.left + assert orig_bounds.right + 1 == r.bounds.right + assert orig_bounds.bottom + 1 == r.bounds.bottom + assert orig_bounds.top + 1 == r.bounds.top + + # Shift raster using pixel units + orig_transform = r.transform + orig_bounds = r.bounds + orig_res = r.res + r.translate(xoff=1, yoff=1, distance_unit="pixel", inplace=True) + + # Only bounds should change + assert orig_transform.c + 1 * orig_res[0] == r.transform.c + assert orig_transform.f + 1 * orig_res[1] == r.transform.f + for attr in ["a", "b", "d", "e"]: + assert getattr(orig_transform, attr) == getattr(r.transform, attr) + + assert orig_bounds.left + 1 * orig_res[0] == r.bounds.left + assert orig_bounds.right + 1 * orig_res[0] == r.bounds.right + assert orig_bounds.bottom + 1 * orig_res[1] == r.bounds.bottom + assert orig_bounds.top + 1 * orig_res[1] == r.bounds.top + + # Check that an error is raised for a wrong distance_unit + with pytest.raises(ValueError, match="Argument 'distance_unit' should be either 'pixel' or 'georeferenced'."): + r.translate(xoff=1, yoff=1, distance_unit="wrong_value") # type: ignore + + @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore + def test_reproject(self, example: str) -> None: + warnings.simplefilter("error") + + # Reference raster to be used + r = gu.Raster(example) + + # -- Check proper errors are raised if nodata are not set -- # + r_nodata = r.copy() + r_nodata.set_nodata(None) + + # Make sure at least one pixel is masked for test 1 + rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True) + r_nodata.data[rand_indices] = np.ma.masked + assert np.count_nonzero(r_nodata.data.mask) > 0 + + # make sure at least one pixel is set at default nodata for test + default_nodata = _default_nodata(r_nodata.dtype) + rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True) + r_nodata.data[rand_indices] = default_nodata + assert np.count_nonzero(r_nodata.data == default_nodata) > 0 + + # 1 - if no force_source_nodata is set and masked values exist, raises an error + with pytest.raises( + ValueError, + match=re.escape( + "No nodata set, set one for the raster with self.set_nodata() or use a " + "temporary one with `force_source_nodata`." + ), + ): + _ = r_nodata.reproject(res=r_nodata.res[0] / 2, nodata=0) + + # 2 - if no nodata is set and default value conflicts with existing value, a warning is raised + with pytest.warns( + UserWarning, + match=re.escape( + f"For reprojection, nodata must be set. Default chosen value " + f"{_default_nodata(r_nodata.dtype)} exists in self.data. This may have unexpected " + f"consequences. Consider setting a different nodata with self.set_nodata()." + ), + ): + r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata) + assert r_test.nodata == default_nodata + + # 3 - if default nodata does not conflict, should not raise a warning + r_nodata.data[r_nodata.data == default_nodata] = 3 + r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata) + assert r_test.nodata == default_nodata + + # -- Test setting each combination of georeferences bounds, res and size -- # + + # specific for the landsat test case, default nodata 255 cannot be used (see above), so use 0 + if r.nodata is None: + r.set_nodata(0) + + # - Test size - this should modify the shape, and hence resolution, but not the bounds - + out_size = (r.shape[1] // 2, r.shape[0] // 2) # Outsize is (ncol, nrow) + r_test = r.reproject(grid_size=out_size) + assert r_test.shape == (out_size[1], out_size[0]) + assert r_test.res != r.res + assert r_test.bounds == r.bounds + + # - Test bounds - + # if bounds is a multiple of res, outptut res should be preserved + bounds = np.copy(r.bounds) + dst_bounds = rio.coords.BoundingBox( + left=bounds[0], bottom=bounds[1] + r.res[0], right=bounds[2] - 2 * r.res[1], top=bounds[3] + ) + r_test = r.reproject(bounds=dst_bounds) + assert r_test.bounds == dst_bounds + assert r_test.res == r.res + + # Create bounds with 1/2 and 1/3 pixel extra on the right/bottom. + bounds = np.copy(r.bounds) + dst_bounds = rio.coords.BoundingBox( + left=bounds[0], bottom=bounds[1] - r.res[0] / 3.0, right=bounds[2] + r.res[1] / 2.0, top=bounds[3] + ) + + # If bounds are not a multiple of res, the latter will be updated accordingly + r_test = r.reproject(bounds=dst_bounds) + assert r_test.bounds == dst_bounds + assert r_test.res != r.res + + # - Test size and bounds - + r_test = r.reproject(grid_size=out_size, bounds=dst_bounds) + assert r_test.shape == (out_size[1], out_size[0]) + assert r_test.bounds == dst_bounds + + # - Test res - + # Using a single value, output res will be enforced, resolution will be different + res_single = r.res[0] * 2 + r_test = r.reproject(res=res_single) + assert r_test.res == (res_single, res_single) + assert r_test.shape != r.shape + + # Using a tuple + res_tuple = (r.res[0] * 0.5, r.res[1] * 4) + r_test = r.reproject(res=res_tuple) + assert r_test.res == res_tuple + assert r_test.shape != r.shape + + # - Test res and bounds - + # Bounds will be enforced for upper-left pixel, but adjusted by up to one pixel for the lower right bound. + # for single res value + r_test = r.reproject(bounds=dst_bounds, res=res_single) + assert r_test.res == (res_single, res_single) + assert r_test.bounds.left == dst_bounds.left + assert r_test.bounds.top == dst_bounds.top + assert np.abs(r_test.bounds.right - dst_bounds.right) < res_single + assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_single + + # For tuple + r_test = r.reproject(bounds=dst_bounds, res=res_tuple) + assert r_test.res == res_tuple + assert r_test.bounds.left == dst_bounds.left + assert r_test.bounds.top == dst_bounds.top + assert np.abs(r_test.bounds.right - dst_bounds.right) < res_tuple[0] + assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_tuple[1] + + # - Test crs - + out_crs = rio.crs.CRS.from_epsg(4326) + r_test = r.reproject(crs=out_crs) + assert r_test.crs.to_epsg() == 4326 + + # -- Additional tests -- + # First, make sure dst_bounds extend beyond current extent to create nodata + dst_bounds = rio.coords.BoundingBox( + left=bounds[0], bottom=bounds[1] - r.res[0], right=bounds[2] + 2 * r.res[1], top=bounds[3] + ) + r_test = r.reproject(bounds=dst_bounds) + assert np.count_nonzero(r_test.data.mask) > 0 + + # If nodata falls outside the original image range, check range is preserved (with nearest interpolation) + r_float = r.astype("float32") # type: ignore + if (r_float.nodata < np.min(r_float)) or (r_float.nodata > np.max(r_float)): + r_test = r_float.reproject(bounds=dst_bounds, resampling="nearest") + assert r_test.nodata == r_float.nodata + assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0 # Some values should be set to nodata + assert np.min(r_test.data) == np.min(r_float.data) # But min and max should not be affected + assert np.max(r_test.data) == np.max(r_float.data) + + # Check that nodata works as expected + r_test = r_float.reproject(bounds=dst_bounds, nodata=9999) + assert r_test.nodata == 9999 + assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0 + + # Test that reproject works the same whether data is already loaded or not + assert r.is_loaded + r_test1 = r.reproject(crs=out_crs, nodata=0) + r_unload = gu.Raster(example, load_data=False) + assert not r_unload.is_loaded + r_test2 = r_unload.reproject(crs=out_crs, nodata=0) + assert r_test1.raster_equal(r_test2) + + # Test that reproject does not fail with resolution as np.integer or np.float types, single value or tuple + astype_funcs = [int, np.int32, float, np.float64] + for astype_func in astype_funcs: + r.reproject(res=astype_func(20.5), nodata=0) + for i in range(len(astype_funcs)): + for j in range(len(astype_funcs)): + r.reproject(res=(astype_funcs[i](20.5), astype_funcs[j](10.5)), nodata=0) + + # Test that reprojection works for several bands + for n in [2, 3, 4]: + img1 = gu.Raster.from_array( + np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(0, 500, 1, 1), crs=4326 + ) + + img2 = gu.Raster.from_array( + np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(50, 500, 1, 1), crs=4326 + ) + + out_img = img2.reproject(img1) + assert np.shape(out_img.data) == (n, 500, 500) + assert (out_img.count, *out_img.shape) == (n, 500, 500) + + # Test that the rounding of resolution is correct for large decimal numbers + # (we take an example that used to fail, see issue #354 and #357) + data = np.ones((4759, 2453)) + transform = rio.transform.Affine( + 24.12423878332849, 0.0, 238286.29553975424, 0.0, -24.12423878332849, 6995453.456051373 + ) + crs = rio.CRS.from_epsg(32633) + nodata = -9999.0 + rst = gu.Raster.from_array(data=data, transform=transform, crs=crs, nodata=nodata) + + rst_reproj = rst.reproject(bounds=rst.bounds, res=(20.0, 20.0)) + # This used to be 19.999999999999999 due to floating point precision + assert rst_reproj.res == (20.0, 20.0) + + # -- Test match reference functionalities -- + + # - Create 2 artificial rasters - + # for r2b, bounds are cropped to the upper left by an integer number of pixels (i.e. crop) + # for r2, resolution is also set to 2/3 the input res + min_size = min(r.shape) + rng = np.random.default_rng(42) + rand_int = rng.integers(min_size / 10, min(r.shape) - min_size / 10) + new_transform = rio.transform.from_origin( + r.bounds.left + rand_int * r.res[0], r.bounds.top - rand_int * abs(r.res[1]), r.res[0], r.res[1] + ) + + # data is cropped to the same extent + new_data = r.data[rand_int::, rand_int::] + r2b = gu.Raster.from_array(data=new_data, transform=new_transform, crs=r.crs, nodata=r.nodata) + + # Create a raster with different resolution + dst_res = r.res[0] * 2 / 3 + r2 = r2b.reproject(res=dst_res) + assert r2.res == (dst_res, dst_res) + + # Assert the initial rasters are different + assert r.bounds != r2b.bounds + assert r.shape != r2b.shape + assert r.bounds != r2.bounds + assert r.shape != r2.shape + assert r.res != r2.res + + # Test reprojecting with ref=r2b (i.e. crop) -> output should have same shape, bounds and data, i.e. be the + # same object + r3 = r.reproject(r2b) + assert r3.bounds == r2b.bounds + assert r3.shape == r2b.shape + assert r3.bounds == r2b.bounds + assert r3.transform == r2b.transform + assert np.array_equal(r3.data.data, r2b.data.data, equal_nan=True) + assert np.array_equal(r3.data.mask, r2b.data.mask) + + if DO_PLOT: + fig1, ax1 = plt.subplots() + r.plot(ax=ax1, title="Raster 1") + + fig2, ax2 = plt.subplots() + r2b.plot(ax=ax2, title="Raster 2") + + fig3, ax3 = plt.subplots() + r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2") + + plt.show() + + # Test reprojecting with ref=r2 -> output should have same shape, bounds and transform + # Data should be slightly different due to difference in input resolution + r3 = r.reproject(r2) + assert r3.bounds == r2.bounds + assert r3.shape == r2.shape + assert r3.bounds == r2.bounds + assert r3.transform == r2.transform + assert not np.array_equal(r3.data.data, r2.data.data, equal_nan=True) + + if DO_PLOT: + fig1, ax1 = plt.subplots() + r.plot(ax=ax1, title="Raster 1") + + fig2, ax2 = plt.subplots() + r2.plot(ax=ax2, title="Raster 2") + + fig3, ax3 = plt.subplots() + r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2") + + plt.show() + + # -- Check that if mask is modified afterwards, it is taken into account during reproject -- # + # Create a raster with (additional) random gaps + r_gaps = r.copy() + nsamples = 200 + rand_indices = gu.raster.subsample_array(r_gaps.data, nsamples, return_indices=True) + r_gaps.data[rand_indices] = np.ma.masked + assert np.sum(r_gaps.data.mask) - np.sum(r.data.mask) == nsamples # sanity check + + # reproject raster, and reproject mask. Check that both have same number of masked pixels + # TODO: should test other resampling algo + r_gaps_reproj = r_gaps.reproject(res=dst_res, resampling="nearest") + mask = gu.Raster.from_array( + r_gaps.data.mask.astype("uint8"), crs=r_gaps.crs, transform=r_gaps.transform, nodata=None + ) + mask_reproj = mask.reproject(res=dst_res, nodata=255, resampling="nearest") + # Final masked pixels are those originally masked (=1) and the values masked during reproject, e.g. edges + tot_masked_true = np.count_nonzero(mask_reproj.data.mask) + np.count_nonzero(mask_reproj.data == 1) + assert np.count_nonzero(r_gaps_reproj.data.mask) == tot_masked_true + + # If a nodata is set, make sure it is preserved + r_nodata = r.copy() + + r_nodata.set_nodata(0) + + r3 = r_nodata.reproject(r2) + assert r_nodata.nodata == r3.nodata + + # -- Check inplace behaviour works -- # + + # Check when transform is updated (via res) + r_tmp_res = r.copy() + r_res = r_tmp_res.reproject(res=r.res[0] / 2) + r_tmp_res.reproject(res=r.res[0] / 2, inplace=True) + + assert r_res.raster_equal(r_tmp_res) + + # Check when CRS is updated + r_tmp_crs = r.copy() + r_crs = r_tmp_crs.reproject(crs=out_crs) + r_tmp_crs.reproject(crs=out_crs, inplace=True) + + assert r_crs.raster_equal(r_tmp_crs) + + # -- Test additional errors raised for argument combinations -- # + + # If both ref and crs are set + with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")): + _ = r.reproject(ref=r2, crs=r.crs) + + # Size and res are mutually exclusive + with pytest.raises(ValueError, match=re.escape("size and res both specified. Specify only one.")): + _ = r.reproject(grid_size=(10, 10), res=50) + + # If wrong type for `ref` + with pytest.raises( + TypeError, match=re.escape("Type of ref not understood, must be path to file (str), Raster.") + ): + _ = r.reproject(ref=3) + + # If input reference is string and file and does not exist + with pytest.raises(ValueError, match=re.escape("Reference raster does not exist.")): + _ = r.reproject(ref="no_file.tif") + + # -- Check warning for area_or_point works -- # + r.set_area_or_point("Area", shift_area_or_point=False) + r2 = r.copy() + r2.set_area_or_point("Point", shift_area_or_point=False) + + with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'): + r.reproject(r2) + + # Check that reprojecting preserves interpretation + r_reproj = r.reproject(res=r.res[0] * 2) + assert r_reproj.area_or_point == "Area" + r2_reproj = r2.reproject(res=r2.res[0] * 2) + assert r2_reproj.area_or_point == "Point" + + +class TestMaskGeotransformations: + # Paths to example data + landsat_b4_path = examples.get_path("everest_landsat_b4") + landsat_rgb_path = examples.get_path("everest_landsat_rgb") + everest_outlines_path = examples.get_path("everest_rgi_outlines") + aster_dem_path = examples.get_path("exploradores_aster_dem") + + # Mask without nodata + mask_landsat_b4 = gu.Raster(landsat_b4_path) > 125 + # Mask with nodata + mask_aster_dem = gu.Raster(aster_dem_path) > 2000 + # Mask from an outline + mask_everest = gu.Vector(everest_outlines_path).create_mask(gu.Raster(landsat_b4_path)) + + @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore + def test_crop(self, mask: gu.Mask) -> None: + # Test with same bounds -> should be the same # + + mask_orig = mask.copy() + crop_geom = mask.bounds + mask_cropped = mask.crop(crop_geom) + assert mask_cropped.raster_equal(mask) + + # Check if instance is respected + assert isinstance(mask_cropped, gu.Mask) + # Check the dtype of the original mask was properly reconverted + assert mask.data.dtype == bool + # Check the original mask was not modified during cropping + assert mask_orig.raster_equal(mask) + + # Check inplace behaviour works + mask_tmp = mask.copy() + mask_tmp.crop(crop_geom, inplace=True) + assert mask_tmp.raster_equal(mask_cropped) + + # - Test cropping each side by a random integer of pixels - # + rng = np.random.default_rng(42) + rand_int = rng.integers(1, min(mask.shape) - 1) + + # Left + crop_geom2 = [crop_geom[0] + rand_int * mask.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] + mask_cropped = mask.crop(crop_geom2) + assert list(mask_cropped.bounds) == crop_geom2 + assert np.array_equal(mask.data[:, rand_int:].data, mask_cropped.data.data, equal_nan=True) + assert np.array_equal(mask.data[:, rand_int:].mask, mask_cropped.data.mask) + + # Right + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * mask.res[0], crop_geom[3]] + mask_cropped = mask.crop(crop_geom2) + assert list(mask_cropped.bounds) == crop_geom2 + assert np.array_equal(mask.data[:, :-rand_int].data, mask_cropped.data.data, equal_nan=True) + assert np.array_equal(mask.data[:, :-rand_int].mask, mask_cropped.data.mask) + + # Bottom + crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(mask.res[1]), crop_geom[2], crop_geom[3]] + mask_cropped = mask.crop(crop_geom2) + assert list(mask_cropped.bounds) == crop_geom2 + assert np.array_equal(mask.data[:-rand_int, :].data, mask_cropped.data.data, equal_nan=True) + assert np.array_equal(mask.data[:-rand_int, :].mask, mask_cropped.data.mask) + + # Top + crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(mask.res[1])] + mask_cropped = mask.crop(crop_geom2) + assert list(mask_cropped.bounds) == crop_geom2 + assert np.array_equal(mask.data[rand_int:, :].data, mask_cropped.data, equal_nan=True) + assert np.array_equal(mask.data[rand_int:, :].mask, mask_cropped.data.mask) + + # Test inplace + mask_orig = mask.copy() + mask_orig.crop(crop_geom2, inplace=True) + assert list(mask_orig.bounds) == crop_geom2 + assert np.array_equal(mask.data[rand_int:, :].data, mask_orig.data, equal_nan=True) + assert np.array_equal(mask.data[rand_int:, :].mask, mask_orig.data.mask) + + # Run with match_extent, check that inplace or not yields the same result + + # TODO: Pretty sketchy with the current functioning of "match_extent", + # should we just remove it from Raster.crop() ? + + # mask_cropped = mask.crop(crop_geom2, inplace=False, mode="match_extent") + # mask_orig.crop(crop_geom2, mode="match_extent") + # assert mask_cropped.raster_equal(mask_orig) + + @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore + def test_reproject(self, mask: gu.Mask) -> None: + # Test 1: with a classic resampling (bilinear) + + # Reproject mask - resample to 100 x 100 grid + mask_orig = mask.copy() + mask_reproj = mask.reproject(grid_size=(100, 100), force_source_nodata=2) + + # Check instance is respected + assert isinstance(mask_reproj, gu.Mask) + # Check the dtype of the original mask was properly reconverted + assert mask.data.dtype == bool + # Check the original mask was not modified during reprojection + assert mask_orig.raster_equal(mask) + + # Check inplace behaviour works + mask_tmp = mask.copy() + mask_tmp.reproject(grid_size=(100, 100), force_source_nodata=2, inplace=True) + assert mask_tmp.raster_equal(mask_reproj) + + # This should be equivalent to converting the array to uint8, reprojecting, converting back + mask_uint8 = mask.astype("uint8") + mask_uint8_reproj = mask_uint8.reproject(grid_size=(100, 100), force_source_nodata=2) + mask_uint8_reproj.data = mask_uint8_reproj.data.astype("bool") + + assert mask_reproj.raster_equal(mask_uint8_reproj) + + # Test 2: should raise a warning when the resampling differs from nearest + + with pytest.warns( + UserWarning, + match="Reprojecting a mask with a resampling method other than 'nearest', " + "the boolean array will be converted to float during interpolation.", + ): + mask.reproject(res=50, resampling="bilinear", force_source_nodata=2) diff --git a/tests/test_raster/test_multiraster.py b/tests/test_raster/test_multiraster.py index f93d2741..86cd7803 100644 --- a/tests/test_raster/test_multiraster.py +++ b/tests/test_raster/test_multiraster.py @@ -1,6 +1,7 @@ """ Test tools involving multiple rasters. """ + from __future__ import annotations import warnings @@ -203,7 +204,7 @@ def test_stack_rasters(self, rasters) -> None: # type: ignore assert rasters.img.width == pytest.approx(stacked_img.width, abs=1) else: assert rasters.img.shape == stacked_img.shape - assert type(stacked_img) == gu.Raster # Check output object is always Raster, whatever input was given + assert isinstance(stacked_img, gu.Raster) # Check output object is always Raster, whatever input was given assert np.count_nonzero(np.isnan(stacked_img.data)) == 0 # Check no NaNs introduced merged_bounds = gu.projtools.merge_bounds( diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py index ed16c0ae..404e4745 100644 --- a/tests/test_raster/test_raster.py +++ b/tests/test_raster/test_raster.py @@ -1,6 +1,7 @@ """ Test functions for raster """ + from __future__ import annotations import os @@ -22,51 +23,11 @@ import geoutils as gu from geoutils import examples from geoutils._typing import MArrayNum, NDArrayNum -from geoutils.misc import resampling_method_from_str from geoutils.raster.raster import _default_nodata, _default_rio_attrs DO_PLOT = False -def run_gdal_proximity( - input_raster: gu.Raster, target_values: list[float] | None, distunits: str = "GEO" -) -> NDArrayNum: - """Run GDAL's ComputeProximity and return the read numpy array.""" - # Rasterio strongly recommends against importing gdal along rio, so this is done here instead. - from osgeo import gdal, gdalconst - - gdal.UseExceptions() - - # Initiate empty GDAL raster for proximity output - drv = gdal.GetDriverByName("MEM") - proxy_ds = drv.Create("", input_raster.shape[1], input_raster.shape[0], 1, gdal.GetDataTypeByName("Float32")) - proxy_ds.GetRasterBand(1).SetNoDataValue(-9999) - - # Save input in temporary file to read with GDAL - # (avoids the nightmare of setting nodata, transform, crs in GDAL format...) - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = os.path.join(temp_dir, "input.tif") - input_raster.save(temp_path) - ds_raster_in = gdal.Open(temp_path, gdalconst.GA_ReadOnly) - - # Define GDAL options - proximity_options = ["DISTUNITS=" + distunits] - if target_values is not None: - proximity_options.insert(0, "VALUES=" + ",".join([str(tgt) for tgt in target_values])) - - # Compute proximity - gdal.ComputeProximity(ds_raster_in.GetRasterBand(1), proxy_ds.GetRasterBand(1), proximity_options) - # Save array - proxy_array = proxy_ds.GetRasterBand(1).ReadAsArray().astype("float32") - proxy_array[proxy_array == -9999] = np.nan - - # Close GDAL datasets - proxy_ds = None - ds_raster_in = None - - return proxy_array - - class TestRaster: landsat_b4_path = examples.get_path("everest_landsat_b4") landsat_b4_crop_path = examples.get_path("everest_landsat_b4_cropped") @@ -1048,7 +1009,7 @@ def test_copy(self, example: str) -> None: # When passing the new array as a NaN ndarray, only the valid data is equal, because masked data is NaN in one # case, and -9999 in the other - r_arr = gu.raster.get_array_and_mask(r)[0] + r_arr = gu.raster.array._get_array_and_mask(r)[0] r2 = r.copy(new_array=r_arr) assert np.ma.allequal(r.data, r2.data) # If a nodata value exists, and we update the NaN pixels to be that nodata value, then the two Rasters should @@ -1251,610 +1212,6 @@ def test_getitem_setitem(self, example: str) -> None: with pytest.raises(ValueError, match=re.escape(message_raster.format(op_name_assign))): rst[mask] = 1 - test_data = [[landsat_b4_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]] - - @pytest.mark.parametrize("data", test_data) # type: ignore - def test_crop(self, data: list[str]) -> None: - """Test for crop method, also called by square brackets through __getitem__""" - - raster_path, outlines_path = data - r = gu.Raster(raster_path) - - # -- Test with crop_geom being a list/tuple -- ## - crop_geom: list[float] = list(r.bounds) - - # Test unloaded inplace cropping conserves the shape - r.crop(crop_geom=[crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]], inplace=True) - assert len(r.data.shape) == 2 - - r = gu.Raster(raster_path) - - # Test with same bounds -> should be the same # - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert r_cropped.raster_equal(r) - - # - Test cropping each side by a random integer of pixels - # - rng = np.random.default_rng(42) - rand_int = rng.integers(1, min(r.shape) - 1) - - # Left - crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert list(r_cropped.bounds) == crop_geom2 - assert np.array_equal(r.data[:, rand_int:].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[:, rand_int:].mask, r_cropped.data.mask) - - # Right - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * r.res[0], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert list(r_cropped.bounds) == crop_geom2 - assert np.array_equal(r.data[:, :-rand_int].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[:, :-rand_int].mask, r_cropped.data.mask) - - # Bottom - crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(r.res[1]), crop_geom[2], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert list(r_cropped.bounds) == crop_geom2 - assert np.array_equal(r.data[:-rand_int, :].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[:-rand_int, :].mask, r_cropped.data.mask) - - # Top - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(r.res[1])] - r_cropped = r.crop(crop_geom2) - assert list(r_cropped.bounds) == crop_geom2 - assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data, equal_nan=True) - assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask) - - # Same but tuple - crop_geom3: tuple[float, float, float, float] = ( - crop_geom[0], - crop_geom[1], - crop_geom[2], - crop_geom[3] - rand_int * r.res[0], - ) - r_cropped = r.crop(crop_geom3) - assert list(r_cropped.bounds) == list(crop_geom3) - assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask) - - # -- Test with crop_geom being a Raster -- # - r_cropped2 = r.crop(r_cropped) - assert r_cropped2.raster_equal(r_cropped) - - # Check that bound reprojection is done automatically if the CRS differ - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") - - r_cropped_reproj = r_cropped.reproject(crs=3857) - r_cropped3 = r.crop(r_cropped_reproj) - - # Original CRS bounds can be deformed during transformation, but result should be equivalent to this - r_cropped4 = r.crop(crop_geom=r_cropped_reproj.get_bounds_projected(out_crs=r.crs)) - assert r_cropped3.raster_equal(r_cropped4) - - # -- Test with inplace=True -- # - r_copy = r.copy() - r_copy.crop(r_cropped, inplace=True) - assert r_copy.raster_equal(r_cropped) - - # - Test cropping each side with a non integer pixel, mode='match_pixel' - # - rand_float = rng.integers(1, min(r.shape) - 1) + 0.25 - - # left - crop_geom2 = [crop_geom[0] + rand_float * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float) - assert np.array_equal(r.data[:, int(rand_float) :].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[:, int(rand_float) :].mask, r_cropped.data.mask) - - # right - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_float * r.res[0], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float) - assert np.array_equal(r.data[:, : -int(rand_float)].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[:, : -int(rand_float)].mask, r_cropped.data.mask) - - # bottom - crop_geom2 = [crop_geom[0], crop_geom[1] + rand_float * abs(r.res[1]), crop_geom[2], crop_geom[3]] - r_cropped = r.crop(crop_geom2) - assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float) - assert np.array_equal(r.data[: -int(rand_float), :].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[: -int(rand_float), :].mask, r_cropped.data.mask) - - # top - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_float * abs(r.res[1])] - r_cropped = r.crop(crop_geom2) - assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float) - assert np.array_equal(r.data[int(rand_float) :, :].data, r_cropped.data.data, equal_nan=True) - assert np.array_equal(r.data[int(rand_float) :, :].mask, r_cropped.data.mask) - - # -- Test with mode='match_extent' -- # - # Test all sides at once, with rand_float less than half the smallest extent - # The cropped extent should exactly match the requested extent, res will be changed accordingly - rand_float = rng.integers(1, min(r.shape) / 2 - 1) + 0.25 - crop_geom2 = [ - crop_geom[0] + rand_float * r.res[0], - crop_geom[1] + rand_float * abs(r.res[1]), - crop_geom[2] - rand_float * r.res[0], - crop_geom[3] - rand_float * abs(r.res[1]), - ] - - # Filter warning about nodata not set in reprojection (because match_extent triggers reproject) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") - r_cropped = r.crop(crop_geom2, mode="match_extent") - - assert list(r_cropped.bounds) == crop_geom2 - # The change in resolution should be less than what would occur with +/- 1 pixel - assert np.all( - abs(np.array(r.res) - np.array(r_cropped.res)) < np.array(r.res) / np.array(r_cropped.shape)[::-1] - ) - - # Filter warning about nodata not set in reprojection (because match_extent triggers reproject) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") - r_cropped2 = r.crop(r_cropped, mode="match_extent") - assert r_cropped2.raster_equal(r_cropped) - - # -- Test with crop_geom being a Vector -- # - outlines = gu.Vector(outlines_path) - - # First, we reproject manually the outline - outlines_reproj = gu.Vector(outlines.ds.to_crs(r.crs)) - r_cropped = r.crop(outlines_reproj) - - # Calculate intersection of the two bounding boxes and make sure crop has same bounds - win_outlines = rio.windows.from_bounds(*outlines_reproj.bounds, transform=r.transform) - win_raster = rio.windows.from_bounds(*r.bounds, transform=r.transform) - final_window = win_outlines.intersection(win_raster).round_lengths().round_offsets() - new_bounds = rio.windows.bounds(final_window, transform=r.transform) - assert list(r_cropped.bounds) == list(new_bounds) - - # Second, we check that bound reprojection is done automatically if the CRS differ - r_cropped2 = r.crop(outlines) - assert list(r_cropped2.bounds) == list(new_bounds) - - # -- Test crop works as expected even if transform has been modified, e.g. through downsampling -- # - # Test that with downsampling, cropping to same bounds result in same raster - r = gu.Raster(raster_path, downsample=5) - r_test = r.crop(r.bounds) - assert r_test.raster_equal(r) - - # - Test that cropping yields the same results whether data is loaded or not - - # With integer cropping (left) - rand_int = rng.integers(1, min(r.shape) - 1) - crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] - r = gu.Raster(raster_path, downsample=5, load_data=False) - assert not r.is_loaded - r_crop_unloaded = r.crop(crop_geom2) - r.load() - r_crop_loaded = r.crop(crop_geom2) - # TODO: the following condition should be met once issue #447 is solved - # assert r_crop_unloaded.raster_equal(r_crop_loaded) - assert r_crop_unloaded.shape == r_crop_loaded.shape - assert r_crop_unloaded.transform == r_crop_loaded.transform - - # With a float number of pixels added to the right, mode 'match_pixel' - rand_float = rng.integers(1, min(r.shape) - 1) + 0.25 - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] + rand_float * r.res[0], crop_geom[3]] - r = gu.Raster(raster_path, downsample=5, load_data=False) - assert not r.is_loaded - r_crop_unloaded = r.crop(crop_geom2, mode="match_pixel") - r.load() - r_crop_loaded = r.crop(crop_geom2, mode="match_pixel") - # TODO: the following condition should be met once issue #447 is solved - # assert r_crop_unloaded.raster_equal(r_crop_loaded) - assert r_crop_unloaded.shape == r_crop_loaded.shape - assert r_crop_unloaded.transform == r_crop_loaded.transform - - # - Check related to pixel interpretation - - - # Check warning for a different area_or_point for the match-reference geometry works - r.set_area_or_point("Area", shift_area_or_point=False) - r2 = r.copy() - r2.set_area_or_point("Point", shift_area_or_point=False) - - with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'): - r.crop(r2) - - # Check that cropping preserves the interpretation - crop_geom = [crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] - r_crop = r.crop(crop_geom) - assert r_crop.area_or_point == "Area" - r2_crop = r2.crop(crop_geom) - assert r2_crop.area_or_point == "Point" - - @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path, landsat_rgb_path]) # type: ignore - def test_translate(self, example: str) -> None: - """Test translation works as intended""" - - r = gu.Raster(example) - - # Get original transform - orig_transform = r.transform - orig_bounds = r.bounds - - # Shift raster by georeferenced units (default) - # Check the default behaviour is not inplace - r_notinplace = r.translate(xoff=1, yoff=1) - assert isinstance(r_notinplace, gu.Raster) - - # Check inplace - r.translate(xoff=1, yoff=1, inplace=True) - # Both shifts should have yielded the same transform - assert r.transform == r_notinplace.transform - - # Only bounds should change - assert orig_transform.c + 1 == r.transform.c - assert orig_transform.f + 1 == r.transform.f - for attr in ["a", "b", "d", "e"]: - assert getattr(orig_transform, attr) == getattr(r.transform, attr) - - assert orig_bounds.left + 1 == r.bounds.left - assert orig_bounds.right + 1 == r.bounds.right - assert orig_bounds.bottom + 1 == r.bounds.bottom - assert orig_bounds.top + 1 == r.bounds.top - - # Shift raster using pixel units - orig_transform = r.transform - orig_bounds = r.bounds - orig_res = r.res - r.translate(xoff=1, yoff=1, distance_unit="pixel", inplace=True) - - # Only bounds should change - assert orig_transform.c + 1 * orig_res[0] == r.transform.c - assert orig_transform.f + 1 * orig_res[1] == r.transform.f - for attr in ["a", "b", "d", "e"]: - assert getattr(orig_transform, attr) == getattr(r.transform, attr) - - assert orig_bounds.left + 1 * orig_res[0] == r.bounds.left - assert orig_bounds.right + 1 * orig_res[0] == r.bounds.right - assert orig_bounds.bottom + 1 * orig_res[1] == r.bounds.bottom - assert orig_bounds.top + 1 * orig_res[1] == r.bounds.top - - # Check that an error is raised for a wrong distance_unit - with pytest.raises(ValueError, match="Argument 'distance_unit' should be either 'pixel' or 'georeferenced'."): - r.translate(xoff=1, yoff=1, distance_unit="wrong_value") # type: ignore - - @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore - def test_reproject(self, example: str) -> None: - warnings.simplefilter("error") - - # Reference raster to be used - r = gu.Raster(example) - - # -- Check proper errors are raised if nodata are not set -- # - r_nodata = r.copy() - r_nodata.set_nodata(None) - - # Make sure at least one pixel is masked for test 1 - rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True) - r_nodata.data[rand_indices] = np.ma.masked - assert np.count_nonzero(r_nodata.data.mask) > 0 - - # make sure at least one pixel is set at default nodata for test - default_nodata = _default_nodata(r_nodata.dtype) - rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True) - r_nodata.data[rand_indices] = default_nodata - assert np.count_nonzero(r_nodata.data == default_nodata) > 0 - - # 1 - if no force_source_nodata is set and masked values exist, raises an error - with pytest.raises( - ValueError, - match=re.escape( - "No nodata set, set one for the raster with self.set_nodata() or use a " - "temporary one with `force_source_nodata`." - ), - ): - _ = r_nodata.reproject(res=r_nodata.res[0] / 2, nodata=0) - - # 2 - if no nodata is set and default value conflicts with existing value, a warning is raised - with pytest.warns( - UserWarning, - match=re.escape( - f"For reprojection, nodata must be set. Default chosen value " - f"{_default_nodata(r_nodata.dtype)} exists in self.data. This may have unexpected " - f"consequences. Consider setting a different nodata with self.set_nodata()." - ), - ): - r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata) - assert r_test.nodata == default_nodata - - # 3 - if default nodata does not conflict, should not raise a warning - r_nodata.data[r_nodata.data == default_nodata] = 3 - r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata) - assert r_test.nodata == default_nodata - - # -- Test setting each combination of georeferences bounds, res and size -- # - - # specific for the landsat test case, default nodata 255 cannot be used (see above), so use 0 - if r.nodata is None: - r.set_nodata(0) - - # - Test size - this should modify the shape, and hence resolution, but not the bounds - - out_size = (r.shape[1] // 2, r.shape[0] // 2) # Outsize is (ncol, nrow) - r_test = r.reproject(grid_size=out_size) - assert r_test.shape == (out_size[1], out_size[0]) - assert r_test.res != r.res - assert r_test.bounds == r.bounds - - # - Test bounds - - # if bounds is a multiple of res, outptut res should be preserved - bounds = np.copy(r.bounds) - dst_bounds = rio.coords.BoundingBox( - left=bounds[0], bottom=bounds[1] + r.res[0], right=bounds[2] - 2 * r.res[1], top=bounds[3] - ) - r_test = r.reproject(bounds=dst_bounds) - assert r_test.bounds == dst_bounds - assert r_test.res == r.res - - # Create bounds with 1/2 and 1/3 pixel extra on the right/bottom. - bounds = np.copy(r.bounds) - dst_bounds = rio.coords.BoundingBox( - left=bounds[0], bottom=bounds[1] - r.res[0] / 3.0, right=bounds[2] + r.res[1] / 2.0, top=bounds[3] - ) - - # If bounds are not a multiple of res, the latter will be updated accordingly - r_test = r.reproject(bounds=dst_bounds) - assert r_test.bounds == dst_bounds - assert r_test.res != r.res - - # - Test size and bounds - - r_test = r.reproject(grid_size=out_size, bounds=dst_bounds) - assert r_test.shape == (out_size[1], out_size[0]) - assert r_test.bounds == dst_bounds - - # - Test res - - # Using a single value, output res will be enforced, resolution will be different - res_single = r.res[0] * 2 - r_test = r.reproject(res=res_single) - assert r_test.res == (res_single, res_single) - assert r_test.shape != r.shape - - # Using a tuple - res_tuple = (r.res[0] * 0.5, r.res[1] * 4) - r_test = r.reproject(res=res_tuple) - assert r_test.res == res_tuple - assert r_test.shape != r.shape - - # - Test res and bounds - - # Bounds will be enforced for upper-left pixel, but adjusted by up to one pixel for the lower right bound. - # for single res value - r_test = r.reproject(bounds=dst_bounds, res=res_single) - assert r_test.res == (res_single, res_single) - assert r_test.bounds.left == dst_bounds.left - assert r_test.bounds.top == dst_bounds.top - assert np.abs(r_test.bounds.right - dst_bounds.right) < res_single - assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_single - - # For tuple - r_test = r.reproject(bounds=dst_bounds, res=res_tuple) - assert r_test.res == res_tuple - assert r_test.bounds.left == dst_bounds.left - assert r_test.bounds.top == dst_bounds.top - assert np.abs(r_test.bounds.right - dst_bounds.right) < res_tuple[0] - assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_tuple[1] - - # - Test crs - - out_crs = rio.crs.CRS.from_epsg(4326) - r_test = r.reproject(crs=out_crs) - assert r_test.crs.to_epsg() == 4326 - - # -- Additional tests -- - # First, make sure dst_bounds extend beyond current extent to create nodata - dst_bounds = rio.coords.BoundingBox( - left=bounds[0], bottom=bounds[1] - r.res[0], right=bounds[2] + 2 * r.res[1], top=bounds[3] - ) - r_test = r.reproject(bounds=dst_bounds) - assert np.count_nonzero(r_test.data.mask) > 0 - - # If nodata falls outside the original image range, check range is preserved (with nearest interpolation) - r_float = r.astype("float32") # type: ignore - if (r_float.nodata < np.min(r_float)) or (r_float.nodata > np.max(r_float)): - r_test = r_float.reproject(bounds=dst_bounds, resampling="nearest") - assert r_test.nodata == r_float.nodata - assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0 # Some values should be set to nodata - assert np.min(r_test.data) == np.min(r_float.data) # But min and max should not be affected - assert np.max(r_test.data) == np.max(r_float.data) - - # Check that nodata works as expected - r_test = r_float.reproject(bounds=dst_bounds, nodata=9999) - assert r_test.nodata == 9999 - assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0 - - # Test that reproject works the same whether data is already loaded or not - assert r.is_loaded - r_test1 = r.reproject(crs=out_crs, nodata=0) - r_unload = gu.Raster(example, load_data=False) - assert not r_unload.is_loaded - r_test2 = r_unload.reproject(crs=out_crs, nodata=0) - assert r_test1.raster_equal(r_test2) - - # Test that reproject does not fail with resolution as np.integer or np.float types, single value or tuple - astype_funcs = [int, np.int32, float, np.float64] - for astype_func in astype_funcs: - r.reproject(res=astype_func(20.5), nodata=0) - for i in range(len(astype_funcs)): - for j in range(len(astype_funcs)): - r.reproject(res=(astype_funcs[i](20.5), astype_funcs[j](10.5)), nodata=0) - - # Test that reprojection works for several bands - for n in [2, 3, 4]: - img1 = gu.Raster.from_array( - np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(0, 500, 1, 1), crs=4326 - ) - - img2 = gu.Raster.from_array( - np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(50, 500, 1, 1), crs=4326 - ) - - out_img = img2.reproject(img1) - assert np.shape(out_img.data) == (n, 500, 500) - assert (out_img.count, *out_img.shape) == (n, 500, 500) - - # Test that the rounding of resolution is correct for large decimal numbers - # (we take an example that used to fail, see issue #354 and #357) - data = np.ones((4759, 2453)) - transform = rio.transform.Affine( - 24.12423878332849, 0.0, 238286.29553975424, 0.0, -24.12423878332849, 6995453.456051373 - ) - crs = rio.CRS.from_epsg(32633) - nodata = -9999.0 - rst = gu.Raster.from_array(data=data, transform=transform, crs=crs, nodata=nodata) - - rst_reproj = rst.reproject(bounds=rst.bounds, res=(20.0, 20.0)) - # This used to be 19.999999999999999 due to floating point precision - assert rst_reproj.res == (20.0, 20.0) - - # -- Test match reference functionalities -- - - # - Create 2 artificial rasters - - # for r2b, bounds are cropped to the upper left by an integer number of pixels (i.e. crop) - # for r2, resolution is also set to 2/3 the input res - min_size = min(r.shape) - rng = np.random.default_rng(42) - rand_int = rng.integers(min_size / 10, min(r.shape) - min_size / 10) - new_transform = rio.transform.from_origin( - r.bounds.left + rand_int * r.res[0], r.bounds.top - rand_int * abs(r.res[1]), r.res[0], r.res[1] - ) - - # data is cropped to the same extent - new_data = r.data[rand_int::, rand_int::] - r2b = gu.Raster.from_array(data=new_data, transform=new_transform, crs=r.crs, nodata=r.nodata) - - # Create a raster with different resolution - dst_res = r.res[0] * 2 / 3 - r2 = r2b.reproject(res=dst_res) - assert r2.res == (dst_res, dst_res) - - # Assert the initial rasters are different - assert r.bounds != r2b.bounds - assert r.shape != r2b.shape - assert r.bounds != r2.bounds - assert r.shape != r2.shape - assert r.res != r2.res - - # Test reprojecting with ref=r2b (i.e. crop) -> output should have same shape, bounds and data, i.e. be the - # same object - r3 = r.reproject(r2b) - assert r3.bounds == r2b.bounds - assert r3.shape == r2b.shape - assert r3.bounds == r2b.bounds - assert r3.transform == r2b.transform - assert np.array_equal(r3.data.data, r2b.data.data, equal_nan=True) - assert np.array_equal(r3.data.mask, r2b.data.mask) - - if DO_PLOT: - fig1, ax1 = plt.subplots() - r.plot(ax=ax1, title="Raster 1") - - fig2, ax2 = plt.subplots() - r2b.plot(ax=ax2, title="Raster 2") - - fig3, ax3 = plt.subplots() - r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2") - - plt.show() - - # Test reprojecting with ref=r2 -> output should have same shape, bounds and transform - # Data should be slightly different due to difference in input resolution - r3 = r.reproject(r2) - assert r3.bounds == r2.bounds - assert r3.shape == r2.shape - assert r3.bounds == r2.bounds - assert r3.transform == r2.transform - assert not np.array_equal(r3.data.data, r2.data.data, equal_nan=True) - - if DO_PLOT: - fig1, ax1 = plt.subplots() - r.plot(ax=ax1, title="Raster 1") - - fig2, ax2 = plt.subplots() - r2.plot(ax=ax2, title="Raster 2") - - fig3, ax3 = plt.subplots() - r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2") - - plt.show() - - # -- Check that if mask is modified afterwards, it is taken into account during reproject -- # - # Create a raster with (additional) random gaps - r_gaps = r.copy() - nsamples = 200 - rand_indices = gu.raster.subsample_array(r_gaps.data, nsamples, return_indices=True) - r_gaps.data[rand_indices] = np.ma.masked - assert np.sum(r_gaps.data.mask) - np.sum(r.data.mask) == nsamples # sanity check - - # reproject raster, and reproject mask. Check that both have same number of masked pixels - # TODO: should test other resampling algo - r_gaps_reproj = r_gaps.reproject(res=dst_res, resampling="nearest") - mask = gu.Raster.from_array( - r_gaps.data.mask.astype("uint8"), crs=r_gaps.crs, transform=r_gaps.transform, nodata=None - ) - mask_reproj = mask.reproject(res=dst_res, nodata=255, resampling="nearest") - # Final masked pixels are those originally masked (=1) and the values masked during reproject, e.g. edges - tot_masked_true = np.count_nonzero(mask_reproj.data.mask) + np.count_nonzero(mask_reproj.data == 1) - assert np.count_nonzero(r_gaps_reproj.data.mask) == tot_masked_true - - # If a nodata is set, make sure it is preserved - r_nodata = r.copy() - - r_nodata.set_nodata(0) - - r3 = r_nodata.reproject(r2) - assert r_nodata.nodata == r3.nodata - - # -- Check inplace behaviour works -- # - - # Check when transform is updated (via res) - r_tmp_res = r.copy() - r_res = r_tmp_res.reproject(res=r.res[0] / 2) - r_tmp_res.reproject(res=r.res[0] / 2, inplace=True) - - assert r_res.raster_equal(r_tmp_res) - - # Check when CRS is updated - r_tmp_crs = r.copy() - r_crs = r_tmp_crs.reproject(crs=out_crs) - r_tmp_crs.reproject(crs=out_crs, inplace=True) - - assert r_crs.raster_equal(r_tmp_crs) - - # -- Test additional errors raised for argument combinations -- # - - # If both ref and crs are set - with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")): - _ = r.reproject(ref=r2, crs=r.crs) - - # Size and res are mutually exclusive - with pytest.raises(ValueError, match=re.escape("size and res both specified. Specify only one.")): - _ = r.reproject(grid_size=(10, 10), res=50) - - # If wrong type for `ref` - with pytest.raises( - TypeError, match=re.escape("Type of ref not understood, must be path to file (str), Raster.") - ): - _ = r.reproject(ref=3) - - # If input reference is string and file and does not exist - with pytest.raises(ValueError, match=re.escape("Reference raster does not exist.")): - _ = r.reproject(ref="no_file.tif") - - # -- Check warning for area_or_point works -- # - r.set_area_or_point("Area", shift_area_or_point=False) - r2 = r.copy() - r2.set_area_or_point("Point", shift_area_or_point=False) - - with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'): - r.reproject(r2) - - # Check that reprojecting preserves interpretation - r_reproj = r.reproject(res=r.res[0] * 2) - assert r_reproj.area_or_point == "Area" - r2_reproj = r2.reproject(res=r2.res[0] * 2) - assert r2_reproj.area_or_point == "Point" - @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore def test_intersection(self, example: list[str]) -> None: """Check the behaviour of the intersection function""" @@ -2302,7 +1659,7 @@ def test_astype(self, example: str) -> None: # The multi-band example will not have a colorbar, so not used in tests @pytest.mark.parametrize("example", [landsat_b4_path, landsat_b4_crop_path, aster_dem_path]) # type: ignore @pytest.mark.parametrize("figsize", np.arange(2, 20, 2)) # type: ignore - def test_show_cbar(self, example, figsize) -> None: + def test_plot_cbar(self, example, figsize) -> None: """ Test cbar matches plot height. """ @@ -2330,7 +1687,7 @@ def test_show_cbar(self, example, figsize) -> None: # Assert height is the same assert h == pytest.approx(h_cbar) - def test_show(self) -> None: + def test_plot(self) -> None: # Read single band raster and RGB raster img = gu.Raster(self.landsat_b4_path) img_RGB = gu.Raster(self.landsat_rgb_path) @@ -2587,408 +1944,6 @@ def test_split_bands(self) -> None: red_c.data.data.squeeze().astype("float32"), img.data.data[0, :, :].astype("float32"), equal_nan=True ) - def test_resampling_str(self) -> None: - """Test that resampling methods can be given as strings instead of rio enums.""" - warnings.simplefilter("error") - assert resampling_method_from_str("nearest") == rio.enums.Resampling.nearest # noqa - assert resampling_method_from_str("cubic_spline") == rio.enums.Resampling.cubic_spline # noqa - - # Check that odd strings return the appropriate error. - try: - resampling_method_from_str("CUBIC_SPLINE") # noqa - except ValueError as exception: - if "not a valid rasterio.enums.Resampling method" not in str(exception): - raise exception - - img1 = gu.Raster(self.landsat_b4_path) - img2 = gu.Raster(self.landsat_b4_crop_path) - # Set img2 pixel interpretation as "Point" to match "img1" and avoid any warnings - img2.set_area_or_point("Point", shift_area_or_point=False) - img1.set_nodata(0) - img2.set_nodata(0) - - # Resample the rasters using a new resampling method and see that the string and enum gives the same result. - img3a = img1.reproject(img2, resampling="q1") - img3b = img1.reproject(img2, resampling=rio.enums.Resampling.q1) - assert img3a.raster_equal(img3b) - - @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore - def test_polygonize(self, example: str) -> None: - """Test that polygonize doesn't raise errors.""" - - img = gu.Raster(example) - - # -- Test 1: basic functioning of polygonize -- - - # Get unique value for image and the corresponding area - value = np.unique(img)[0] - pixel_area = np.count_nonzero(img.data == value) * img.res[0] * img.res[1] - - # Polygonize the raster for this value, and compute the total area - polygonized = img.polygonize(target_values=value) - polygon_area = polygonized.ds.area.sum() - - # Check that these two areas are approximately equal - assert polygon_area == pytest.approx(pixel_area) - assert isinstance(polygonized, gu.Vector) - assert polygonized.crs == img.crs - - # Check default name of data column, and that defining a custom name works the same - assert "id" in polygonized.ds.columns - polygonized2 = img.polygonize(target_values=value, data_column_name="myname") - assert "myname" in polygonized2.ds.columns - assert np.array_equal(polygonized2.ds["myname"].values, polygonized.ds["id"].values) - - # -- Test 2: data types -- - - # Check that polygonize works as expected for any input dtype (e.g. float64 being not supported by GeoPandas) - for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64"]: - img_dtype = img.copy() - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", category=UserWarning, message="dtype conversion will result in a " "loss of information.*" - ) - warnings.filterwarnings( - "ignore", - category=UserWarning, - message="Unmasked values equal to the nodata value found in data array.*", - ) - img_dtype = img_dtype.astype(dtype) - value = np.unique(img_dtype)[0] - img_dtype.polygonize(target_values=value) - - # And for a boolean object, such as a mask - mask = img > value - mask.polygonize(target_values=1) - - # Test all options, with both an artificial Raster (that has all target values) and a real Raster - @pytest.mark.parametrize("distunits", ["GEO", "PIXEL"]) # type: ignore - # 0 and 1,2,3 are especially useful for the artificial Raster, and 112 for the real Raster - @pytest.mark.parametrize("target_values", [[1, 2, 3], [0], [112], None]) # type: ignore - @pytest.mark.parametrize( - "raster", - [ - gu.Raster(landsat_b4_path), - gu.Raster.from_array( - np.arange(25, dtype="int32").reshape(5, 5), transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326 - ), - ], - ) # type: ignore - def test_proximity_against_gdal(self, distunits: str, target_values: list[float] | None, raster: gu.Raster) -> None: - """Test that proximity matches the results of GDAL for any parameter.""" - - # TODO: When adding new rasters for tests, specify warning only for Landsat - warnings.filterwarnings("ignore", message="Setting default nodata -99999 to mask non-finite values *") - - # We generate proximity with GDAL and GeoUtils - gdal_proximity = run_gdal_proximity(raster, target_values=target_values, distunits=distunits) - # We translate distunits GDAL option into its GeoUtils equivalent - if distunits == "GEO": - distance_unit = "georeferenced" - else: - distance_unit = "pixel" - geoutils_proximity = ( - raster.proximity(distance_unit=distance_unit, target_values=target_values) - .data.data.squeeze() - .astype("float32") - ) - - # The results should be the same in all cases - try: - # In some cases, the proximity differs slightly (generally <1%) for complex settings - # (Landsat Raster with target of 112) - # It looks like GDAL might not have the right value, - # so this particular case is treated differently in tests - if target_values is not None and target_values[0] == 112 and raster.filename is not None: - # Get index and number of not almost equal point (tolerance of 10-4) - ind_not_almost_equal = np.abs(gdal_proximity - geoutils_proximity) > 1e-04 - nb_not_almost_equal = np.count_nonzero(ind_not_almost_equal) - # Check that this is a minority of points (less than 0.5%) - assert nb_not_almost_equal < 0.005 * raster.width * raster.height - - # Replace these exceptions by zero in both - gdal_proximity[ind_not_almost_equal] = 0.0 - geoutils_proximity[ind_not_almost_equal] = 0.0 - # Check that all the rest is almost equal - assert np.allclose(gdal_proximity, geoutils_proximity, atol=1e-04, equal_nan=True) - - # Otherwise, results are exactly equal - else: - assert np.array_equal(gdal_proximity, geoutils_proximity, equal_nan=True) - - # For debugging - except Exception as exception: - import matplotlib.pyplot as plt - - # Plotting the xdem and GDAL attributes for comparison (plotting "diff" can also help debug) - plt.subplot(121) - plt.imshow(gdal_proximity) - # plt.imshow(np.abs(gdal_proximity - geoutils_proximity)>0.1) - plt.colorbar() - plt.subplot(122) - plt.imshow(geoutils_proximity) - # plt.imshow(raster.data.data == 112) - plt.colorbar() - plt.show() - - # ind_not_equal = np.abs(gdal_proximity - geoutils_proximity)>0.1 - # print(gdal_proximity[ind_not_equal]) - # print(geoutils_proximity[ind_not_equal]) - - raise exception - - def test_proximity_parameters(self) -> None: - """ - Test that new (different to GDAL's) proximity parameters run. - No need to test the results specifically, as those rely entirely on the previous test with GDAL, - and tests in rasterize and shapely. - #TODO: Maybe add one test with an artificial vector to check it works as intended - """ - - # -- Test 1: with self's Raster alone -- - raster1 = gu.Raster(self.landsat_b4_path) - prox1 = raster1.proximity() - - # The raster should have the same extent, resolution and CRS - assert raster1.georeferenced_grid_equal(prox1) - - # It should change with target values specified - prox2 = raster1.proximity(target_values=[255]) - assert not np.array_equal(prox1.data, prox2.data) - - # -- Test 2: with a vector provided -- - vector = gu.Vector(self.everest_outlines_path) - - # With default options (boundary geometry) - raster1.proximity(vector=vector) - - # With the base geometry - raster1.proximity(vector=vector, geometry_type="geometry") - - # With another geometry option - raster1.proximity(vector=vector, geometry_type="centroid") - - # With only inside proximity - raster1.proximity(vector=vector, in_or_out="in") - - def test_to_pointcloud(self) -> None: - """Test to_pointcloud method.""" - - # 1/ Single band synthetic data - - # Create a small raster to test point sampling on - img_arr = np.arange(25, dtype="int32").reshape(5, 5) - img0 = gu.Raster.from_array(img_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326) - - # Sample the whole raster (fraction==1) - points = img0.to_pointcloud() - points_arr = img0.to_pointcloud(as_array=True) - - # Check output types - assert isinstance(points, gu.Vector) - assert isinstance(points_arr, np.ndarray) - - # Check that both outputs (array or vector) are fully consistent, order matters here - assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0]) - assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1]) - assert np.array_equal(points.ds["b1"].values, points_arr[:, 2]) - - # Validate that 25 points were sampled (equating to img1.height * img1.width) with x, y, and band0 values. - assert points_arr.shape == (25, 3) - assert points.ds.shape == (25, 2) # One less column here due to geometry storing X and Y - # Check that X, Y and Z arrays are equal to raster array input independently of value order - x_coords, y_coords = img0.ij2xy(i=np.arange(0, 5), j=np.arange(0, 5)) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5))) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5))) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_arr.ravel())) - - # Check that subsampling works properly - points_arr = img0.to_pointcloud(subsample=0.2, as_array=True) - assert points_arr.shape == (5, 3) - - # All values should be between 0 and 25 - assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25) - - # 2/ Multi-band synthetic data - img_arr = np.arange(25, dtype="int32").reshape(5, 5) - img_3d_arr = np.stack((img_arr, 25 + img_arr, 50 + img_arr), axis=0) - img3d = gu.Raster.from_array(img_3d_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326) - - # Sample the whole raster (fraction==1) - points = img3d.to_pointcloud(auxiliary_data_bands=[2, 3]) - points_arr = img3d.to_pointcloud(as_array=True, auxiliary_data_bands=[2, 3]) - - # Check equality between both output types - assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0]) - assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1]) - assert np.array_equal(points.ds["b1"].values, points_arr[:, 2]) - assert np.array_equal(points.ds["b2"].values, points_arr[:, 3]) - assert np.array_equal(points.ds["b3"].values, points_arr[:, 4]) - - # Check it is the right data - assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5))) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5))) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_3d_arr[0, :, :].ravel())) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 3])), np.sort(img_3d_arr[1, :, :].ravel())) - assert np.array_equal(np.sort(np.asarray(points_arr[:, 4])), np.sort(img_3d_arr[2, :, :].ravel())) - - # With a subsample - points_arr = img3d.to_pointcloud(as_array=True, subsample=10, auxiliary_data_bands=[2, 3]) - assert points_arr.shape == (10, 5) - - # Check the values are still good - assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25) - assert all(25 <= points_arr[:, 3]) and all(points_arr[:, 3] < 50) - assert all(50 <= points_arr[:, 4]) and all(points_arr[:, 4] < 75) - - # 3/ Single-band real raster with nodata values - img1 = gu.Raster(self.aster_dem_path) - - # Get a large sample to ensure they should be some NaNs normally - points_arr = img1.to_pointcloud(subsample=10000, as_array=True, random_state=42) - points = img1.to_pointcloud(subsample=10000, random_state=42) - - # This should not load the image - assert not img1.is_loaded - - # The subsampled values should be valid and the right shape - assert points_arr.shape == (10000, 3) - assert points.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y - assert all(np.isfinite(points_arr[:, 2])) - - # The output should respect the default band naming and the input CRS - assert np.array_equal(points.ds.columns, ["b1", "geometry"]) - assert points.crs == img1.crs - - # Try setting the band name - points = img1.to_pointcloud(data_column_name="lol", subsample=10) - assert np.array_equal(points.ds.columns, ["lol", "geometry"]) - - # Keeping the nodata values - points_invalid = img1.to_pointcloud(subsample=10000, random_state=42, skip_nodata=False) - - # The subsampled values should not all be valid and the right shape - assert points_invalid.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y - assert any(~np.isfinite(points_invalid["b1"].values)) - - # 4/ Multi-band real raster - img2 = gu.Raster(self.landsat_rgb_path) - - # By default only loads a single band without loading - points_arr = img2.to_pointcloud(subsample=10, as_array=True) - points = img2.to_pointcloud(subsample=10) - - assert points_arr.shape == (10, 3) - assert points.ds.shape == (10, 2) # One less column here due to geometry storing X and Y - assert not img2.is_loaded - - # Storing auxiliary bands - points_arr = img2.to_pointcloud(subsample=10, as_array=True, auxiliary_data_bands=[2, 3]) - points = img2.to_pointcloud(subsample=10, auxiliary_data_bands=[2, 3]) - assert points_arr.shape == (10, 5) - assert points.ds.shape == (10, 4) # One less column here due to geometry storing X and Y - assert not img2.is_loaded - assert np.array_equal(points.ds.columns, ["b1", "b2", "b3", "geometry"]) - - # Try setting the column name of a specific band while storing all - points = img2.to_pointcloud(subsample=10, data_column_name="yes", data_band=2, auxiliary_data_bands=[1, 3]) - assert np.array_equal(points.ds.columns, ["yes", "b1", "b3", "geometry"]) - - # 5/ Error raising - with pytest.raises(ValueError, match="Data column name must be a string.*"): - img1.to_pointcloud(data_column_name=1) # type: ignore - with pytest.raises( - ValueError, - match=re.escape("Data band number must be an integer between 1 and the total number of bands (3)."), - ): - img2.to_pointcloud(data_band=4) - with pytest.raises( - ValueError, match="Passing auxiliary column names requires passing auxiliary data band numbers as well." - ): - img2.to_pointcloud(auxiliary_column_names=["a"]) - with pytest.raises( - ValueError, match="Auxiliary data band number must be an iterable containing only integers." - ): - img2.to_pointcloud(auxiliary_data_bands=[1, 2.5]) # type: ignore - img2.to_pointcloud(auxiliary_data_bands="lol") # type: ignore - with pytest.raises( - ValueError, - match=re.escape("Auxiliary data band numbers must be between 1 and the total number of bands (3)."), - ): - img2.to_pointcloud(auxiliary_data_bands=[0]) - img2.to_pointcloud(auxiliary_data_bands=[4]) - with pytest.raises( - ValueError, match=re.escape("Main data band 1 should not be listed in auxiliary data bands [1, 2].") - ): - img2.to_pointcloud(auxiliary_data_bands=[1, 2]) - with pytest.raises(ValueError, match="Auxiliary column names must be an iterable containing only strings."): - img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", 1]) - with pytest.raises( - ValueError, match="Length of auxiliary column name and data band numbers should be the same*" - ): - img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", "lol2", "lol3"]) - - def test_from_pointcloud(self) -> None: - """Test from_pointcloud method.""" - - # 1/ Create a small raster to test point sampling on - shape = (5, 5) - nodata = 100 - img_arr = np.arange(np.prod(shape), dtype="int32").reshape(shape) - transform = rio.transform.from_origin(0, 5, 1, 1) - img1 = gu.Raster.from_array(img_arr, transform=transform, crs=4326, nodata=nodata) - - # Check both inputs work (grid coords or transform+shape) on a subsample - pc1 = img1.to_pointcloud(subsample=10) - img1_sub = gu.Raster.from_pointcloud_regular(pc1, transform=transform, shape=shape) - - grid_coords1 = img1.coords(grid=False) - img1_sub2 = gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) - - assert img1_sub.raster_equal(img1_sub2) - - # Check that number of valid values are equal to point cloud size - assert np.count_nonzero(~img1_sub.data.mask) == 10 - - # With no subsampling, should get the exact same raster back - pc1_full = img1.to_pointcloud() - img1_full = gu.Raster.from_pointcloud_regular(pc1_full, transform=transform, shape=shape, nodata=nodata) - assert img1.raster_equal(img1_full, warn_failure_reason=True) - - # 2/ Single-band real raster with nodata values - img2 = gu.Raster(self.aster_dem_path) - nodata = img2.nodata - transform = img2.transform - shape = img2.shape - - # Check both inputs work (grid coords or transform+shape) on a subsample - pc2 = img2.to_pointcloud(subsample=10000, random_state=42) - img2_sub = gu.Raster.from_pointcloud_regular(pc2, transform=transform, shape=shape, nodata=nodata) - - grid_coords2 = img2.coords(grid=False) - img2_sub2 = gu.Raster.from_pointcloud_regular(pc2, grid_coords=grid_coords2, nodata=nodata) - - assert img2_sub.raster_equal(img2_sub2, warn_failure_reason=True) - - # Check that number of valid values are equal to point cloud size - assert np.count_nonzero(~img2_sub.data.mask) == 10000 - - # With no subsampling, should get the exact same raster back - pc2_full = img2.to_pointcloud() - img2_full = gu.Raster.from_pointcloud_regular(pc2_full, transform=transform, shape=shape, nodata=nodata) - assert img2.raster_equal(img2_full, warn_failure_reason=True, strict_masked=False) - - # 3/ Error raising - with pytest.raises(TypeError, match="Input grid coordinates must be 1D arrays.*"): - gu.Raster.from_pointcloud_regular(pc1, grid_coords=(1, "lol")) # type: ignore - with pytest.raises(ValueError, match="Grid coordinates must be regular*"): - grid_coords1[0][0] += 1 - gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) # type: ignore - with pytest.raises( - ValueError, match="Either grid coordinates or both geotransform and shape must be provided." - ): - gu.Raster.from_pointcloud_regular(pc1) - class TestMask: # Paths to example data @@ -3147,149 +2102,6 @@ def test_implicit_logical_casting_real(self, example: str) -> None: assert np.array_equal(mask.data.data, rst.data.data >= 1) assert np.array_equal(mask.data.mask, rst.data.mask) - @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore - def test_reproject(self, mask: gu.Mask) -> None: - # Test 1: with a classic resampling (bilinear) - - # Reproject mask - resample to 100 x 100 grid - mask_orig = mask.copy() - mask_reproj = mask.reproject(grid_size=(100, 100), force_source_nodata=2) - - # Check instance is respected - assert isinstance(mask_reproj, gu.Mask) - # Check the dtype of the original mask was properly reconverted - assert mask.data.dtype == bool - # Check the original mask was not modified during reprojection - assert mask_orig.raster_equal(mask) - - # Check inplace behaviour works - mask_tmp = mask.copy() - mask_tmp.reproject(grid_size=(100, 100), force_source_nodata=2, inplace=True) - assert mask_tmp.raster_equal(mask_reproj) - - # This should be equivalent to converting the array to uint8, reprojecting, converting back - mask_uint8 = mask.astype("uint8") - mask_uint8_reproj = mask_uint8.reproject(grid_size=(100, 100), force_source_nodata=2) - mask_uint8_reproj.data = mask_uint8_reproj.data.astype("bool") - - assert mask_reproj.raster_equal(mask_uint8_reproj) - - # Test 2: should raise a warning when the resampling differs from nearest - - with pytest.warns( - UserWarning, - match="Reprojecting a mask with a resampling method other than 'nearest', " - "the boolean array will be converted to float during interpolation.", - ): - mask.reproject(res=50, resampling="bilinear", force_source_nodata=2) - - @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore - def test_crop(self, mask: gu.Mask) -> None: - # Test with same bounds -> should be the same # - - mask_orig = mask.copy() - crop_geom = mask.bounds - mask_cropped = mask.crop(crop_geom) - assert mask_cropped.raster_equal(mask) - - # Check if instance is respected - assert isinstance(mask_cropped, gu.Mask) - # Check the dtype of the original mask was properly reconverted - assert mask.data.dtype == bool - # Check the original mask was not modified during cropping - assert mask_orig.raster_equal(mask) - - # Check inplace behaviour works - mask_tmp = mask.copy() - mask_tmp.crop(crop_geom, inplace=True) - assert mask_tmp.raster_equal(mask_cropped) - - # - Test cropping each side by a random integer of pixels - # - rng = np.random.default_rng(42) - rand_int = rng.integers(1, min(mask.shape) - 1) - - # Left - crop_geom2 = [crop_geom[0] + rand_int * mask.res[0], crop_geom[1], crop_geom[2], crop_geom[3]] - mask_cropped = mask.crop(crop_geom2) - assert list(mask_cropped.bounds) == crop_geom2 - assert np.array_equal(mask.data[:, rand_int:].data, mask_cropped.data.data, equal_nan=True) - assert np.array_equal(mask.data[:, rand_int:].mask, mask_cropped.data.mask) - - # Right - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * mask.res[0], crop_geom[3]] - mask_cropped = mask.crop(crop_geom2) - assert list(mask_cropped.bounds) == crop_geom2 - assert np.array_equal(mask.data[:, :-rand_int].data, mask_cropped.data.data, equal_nan=True) - assert np.array_equal(mask.data[:, :-rand_int].mask, mask_cropped.data.mask) - - # Bottom - crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(mask.res[1]), crop_geom[2], crop_geom[3]] - mask_cropped = mask.crop(crop_geom2) - assert list(mask_cropped.bounds) == crop_geom2 - assert np.array_equal(mask.data[:-rand_int, :].data, mask_cropped.data.data, equal_nan=True) - assert np.array_equal(mask.data[:-rand_int, :].mask, mask_cropped.data.mask) - - # Top - crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(mask.res[1])] - mask_cropped = mask.crop(crop_geom2) - assert list(mask_cropped.bounds) == crop_geom2 - assert np.array_equal(mask.data[rand_int:, :].data, mask_cropped.data, equal_nan=True) - assert np.array_equal(mask.data[rand_int:, :].mask, mask_cropped.data.mask) - - # Test inplace - mask_orig = mask.copy() - mask_orig.crop(crop_geom2, inplace=True) - assert list(mask_orig.bounds) == crop_geom2 - assert np.array_equal(mask.data[rand_int:, :].data, mask_orig.data, equal_nan=True) - assert np.array_equal(mask.data[rand_int:, :].mask, mask_orig.data.mask) - - # Run with match_extent, check that inplace or not yields the same result - - # TODO: Pretty sketchy with the current functioning of "match_extent", - # should we just remove it from Raster.crop() ? - - # mask_cropped = mask.crop(crop_geom2, inplace=False, mode="match_extent") - # mask_orig.crop(crop_geom2, mode="match_extent") - # assert mask_cropped.raster_equal(mask_orig) - - @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore - def test_polygonize(self, mask: gu.Mask) -> None: - - mask_orig = mask.copy() - # Run default - vect = mask.polygonize() - # Check the dtype of the original mask was properly reconverted - assert mask.data.dtype == bool - # Check the original mask was not modified during polygonizing - assert mask_orig.raster_equal(mask) - - # Check the output is cast into a vector - assert isinstance(vect, gu.Vector) - - # Run with zero as target - vect = mask.polygonize(target_values=0) - assert isinstance(vect, gu.Vector) - - # Check a warning is raised when using a non-boolean value - with pytest.warns(UserWarning, match="In-value converted to 1 for polygonizing boolean mask."): - mask.polygonize(target_values=2) - - @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore - def test_proximity(self, mask: gu.Mask) -> None: - - mask_orig = mask.copy() - # Run default - rast = mask.proximity() - # Check the dtype of the original mask was properly reconverted - assert mask.data.dtype == bool - # Check the original mask was not modified during reprojection - assert mask_orig.raster_equal(mask) - - # Check that output is cast back into a raster - assert isinstance(rast, gu.Raster) - # A mask is a raster, so also need to check this - assert not isinstance(rast, gu.Mask) - @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore def test_save(self, mask: gu.Mask) -> None: """Test saving for masks""" diff --git a/tests/test_raster/test_satimg.py b/tests/test_raster/test_satimg.py index b6aa3ff6..c05a46ee 100644 --- a/tests/test_raster/test_satimg.py +++ b/tests/test_raster/test_satimg.py @@ -1,6 +1,7 @@ """ Test functions for SatelliteImage class """ + import datetime import datetime as dt import sys diff --git a/tests/test_vector.py b/tests/test_vector.py deleted file mode 100644 index 08d848d3..00000000 --- a/tests/test_vector.py +++ /dev/null @@ -1,927 +0,0 @@ -from __future__ import annotations - -import inspect -import os.path -import pathlib -import re -import tempfile -import warnings - -import geopandas as gpd -import geopandas.base -import matplotlib.pyplot as plt -import numpy as np -import pyproj -import pytest -from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal -from pandas.testing import assert_series_equal -from scipy.ndimage import binary_erosion -from shapely.geometry.base import BaseGeometry -from shapely.geometry.linestring import LineString -from shapely.geometry.multilinestring import MultiLineString -from shapely.geometry.multipolygon import MultiPolygon -from shapely.geometry.polygon import Polygon - -import geoutils as gu - -GLACIER_OUTLINES_URL = "http://public.data.npolar.no/cryoclim/CryoClim_GAO_SJ_1990.zip" - - -class TestVector: - landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped") - everest_outlines_path = gu.examples.get_path("everest_rgi_outlines") - aster_dem_path = gu.examples.get_path("exploradores_aster_dem") - aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines") - glacier_outlines = gu.Vector(GLACIER_OUTLINES_URL) - - def test_init(self) -> None: - """Test class initiation works as intended""" - - # First, with a URL filename - v = gu.Vector(GLACIER_OUTLINES_URL) - assert isinstance(v, gu.Vector) - - # Second, with a string filename - v0 = gu.Vector(self.aster_outlines_path) - assert isinstance(v0, gu.Vector) - - # Third, with a pathlib path - path = pathlib.Path(self.aster_outlines_path) - v1 = gu.Vector(path) - assert isinstance(v1, gu.Vector) - - # Fourth, with a geopandas dataframe - v2 = gu.Vector(gpd.read_file(self.aster_outlines_path)) - assert isinstance(v2, gu.Vector) - - # Fifth, passing a Vector itself (points back to Vector passed) - v3 = gu.Vector(v2) - assert isinstance(v3, gu.Vector) - - # Check errors are raised when filename has wrong type - with pytest.raises(TypeError, match="Filename argument should be a string, Path or geopandas.GeoDataFrame."): - gu.Vector(1) # type: ignore - - def test_copy(self) -> None: - vector2 = self.glacier_outlines.copy() - - assert vector2 is not self.glacier_outlines - - vector2.ds = vector2.ds.query("NAME == 'Ayerbreen'") - - assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0] - - def test_info(self) -> None: - - v = gu.Vector(GLACIER_OUTLINES_URL) - - # Check default runs without error (prints to screen) - output = v.info() - assert output is None - - # Otherwise returns info - output2 = v.info(verbose=False) - assert isinstance(output2, str) - list_prints = ["Filename", "Coordinate system", "Extent", "Number of features", "Attributes"] - assert all(p in output2 for p in list_prints) - - def test_query(self) -> None: - vector2 = self.glacier_outlines.query("NAME == 'Ayerbreen'") - - assert vector2 is not self.glacier_outlines - - assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0] - - def test_save(self) -> None: - """Test the save wrapper for GeoDataFrame.to_file().""" - - vector = gu.Vector(self.aster_outlines_path) - - # Create a temporary file in a temporary directory - temp_dir = tempfile.TemporaryDirectory() - temp_file = os.path.join(temp_dir.name, "test.gpkg") - - # Save and check the file exists - vector.save(temp_file) - assert os.path.exists(temp_file) - - # Open and check the object is the same - vector_save = gu.Vector(temp_file) - vector_save.vector_equal(vector) - - def test_bounds(self) -> None: - bounds = self.glacier_outlines.bounds - - assert bounds.left < bounds.right - assert bounds.bottom < bounds.top - - assert bounds.left == self.glacier_outlines.ds.total_bounds[0] - assert bounds.bottom == self.glacier_outlines.ds.total_bounds[1] - assert bounds.right == self.glacier_outlines.ds.total_bounds[2] - assert bounds.top == self.glacier_outlines.ds.total_bounds[3] - - def test_footprint(self) -> None: - - footprint = self.glacier_outlines.footprint - - assert isinstance(footprint, gu.Vector) - assert footprint.vector_equal(self.glacier_outlines.get_footprint_projected(self.glacier_outlines.crs)) - - def test_reproject(self) -> None: - """Test that the reproject function works as intended""" - - v0 = gu.Vector(self.aster_outlines_path) - r0 = gu.Raster(self.aster_dem_path) - v1 = gu.Vector(self.everest_outlines_path) - - # First, test with a EPSG integer - v1 = v0.reproject(crs=32617) - assert isinstance(v1, gu.Vector) - assert v1.crs.to_epsg() == 32617 - - # Check the inplace behaviour matches the not-inplace one - v2 = v0.copy() - v2.reproject(crs=32617, inplace=True) - v2.vector_equal(v1) - - # Check that the reprojection is the same as with geopandas - gpd1 = v0.ds.to_crs(epsg=32617) - assert_geodataframe_equal(gpd1, v1.ds) - - # Second, with a Raster object - v2 = v0.reproject(r0) - assert v2.crs == r0.crs - - # Third, with a Vector object that has a different CRS - assert v0.crs != v1.crs - v3 = v0.reproject(v1) - assert v3.crs == v1.crs - - # Fourth, check that errors are raised when appropriate - # When no destination CRS is defined, or both dst_crs and dst_ref are passed - with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")): - v0.reproject() - v0.reproject(ref=r0, crs=32617) - # If the path provided does not exist - with pytest.raises(ValueError, match=re.escape("Reference raster or vector path does not exist.")): - v0.reproject(ref="tmp.lol") - # If it exists but cannot be opened by rasterio or fiona - with pytest.raises(ValueError, match=re.escape("Could not open raster or vector with rasterio or pyogrio.")): - v0.reproject(ref="geoutils/examples.py") - # If input of wrong type - with pytest.raises(TypeError, match=re.escape("Type of ref must be string path to file, Raster or Vector.")): - v0.reproject(ref=10) # type: ignore - - def test_rasterize_proj(self) -> None: - # Capture the warning on resolution not matching exactly bounds - with pytest.warns(UserWarning): - burned = self.glacier_outlines.rasterize(xres=3000) - - assert burned.shape[0] == 146 - assert burned.shape[1] == 115 - - def test_rasterize_unproj(self) -> None: - """Test rasterizing an EPSG:3426 dataset into a projection.""" - - vct = gu.Vector(self.everest_outlines_path) - rst = gu.Raster(self.landsat_b4_crop_path) - - # Use Web Mercator at 30 m. - # Capture the warning on resolution not matching exactly bounds - with pytest.warns(UserWarning): - burned = vct.rasterize(xres=30, crs=3857) - - assert burned.shape[0] == 1251 - assert burned.shape[1] == 1522 - - # Typically, rasterize returns a raster - burned_in2_out1 = vct.rasterize(raster=rst, in_value=2, out_value=1) - assert isinstance(burned_in2_out1, gu.Raster) - - # For an in_value of 1 and out_value of 0 (default), it returns a mask - burned_mask = vct.rasterize(raster=rst, in_value=1) - assert isinstance(burned_mask, gu.Mask) - - # Check that rasterizing with in_value=1 is the same as creating a mask - assert burned_mask.raster_equal(vct.create_mask(raster=rst)) - - # The two rasterization should match - assert np.all(burned_in2_out1[burned_mask] == 2) - assert np.all(burned_in2_out1[~burned_mask] == 1) - - # Check that errors are raised - with pytest.raises(ValueError, match="Only one of raster or crs can be provided."): - vct.rasterize(raster=rst, crs=3857) - - test_data = [[landsat_b4_crop_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]] - - @pytest.mark.parametrize("data", test_data) # type: ignore - def test_crop(self, data: list[str]) -> None: - # Load data - raster_path, outlines_path = data - rst = gu.Raster(raster_path) - outlines = gu.Vector(outlines_path) - - # Need to reproject to r.crs. Otherwise, crop will work but will be approximate - # Because outlines might be warped in a different crs - outlines.ds = outlines.ds.to_crs(rst.crs) - - # Crop - outlines_new = outlines.copy() - outlines_new.crop(crop_geom=rst, inplace=True) - - # Check default behaviour - crop and return copy - outlines_copy = outlines.crop(crop_geom=rst) - - # Crop by passing bounds - outlines_new_bounds = outlines.copy() - outlines_new_bounds.crop(crop_geom=list(rst.bounds), inplace=True) - assert_geodataframe_equal(outlines_new.ds, outlines_new_bounds.ds) - # Check the return-by-copy as well - assert_geodataframe_equal(outlines_copy.ds, outlines_new_bounds.ds) - - # Verify that geometries intersect with raster bound - rst_poly = gu.projtools.bounds2poly(rst.bounds) - intersects_new = [] - for poly in outlines_new.ds.geometry: - intersects_new.append(poly.intersects(rst_poly)) - - assert np.all(intersects_new) - - # Check that some of the original outlines did not intersect and were removed - intersects_old = [] - for poly in outlines.ds.geometry: - intersects_old.append(poly.intersects(rst_poly)) - - assert np.sum(intersects_old) == np.sum(intersects_new) - - # Check that some features were indeed removed - assert np.sum(~np.array(intersects_old)) > 0 - - # Check that error is raised when cropGeom argument is invalid - with pytest.raises(TypeError, match="Crop geometry must be a Raster, Vector, or list of coordinates."): - outlines.crop(1, inplace=True) # type: ignore - - def test_translate(self) -> None: - - vector = gu.Vector(self.everest_outlines_path) - - # Check default behaviour is not inplace - vector_shifted = vector.translate(xoff=2.5, yoff=5.7) - assert isinstance(vector_shifted, gu.Vector) - assert_geoseries_equal(vector_shifted.geometry, vector.geometry.translate(xoff=2.5, yoff=5.7)) - - # Check inplace behaviour works correctly - vector2 = vector.copy() - output = vector2.translate(xoff=2.5, yoff=5.7, inplace=True) - assert output is None - assert_geoseries_equal(vector2.geometry, vector_shifted.geometry) - - def test_proximity(self) -> None: - """ - The core functionality is already tested against GDAL in test_raster: just verify the vector-specific behaviour. - #TODO: add an artificial test as well (mirroring TODO in test_raster) - """ - - vector = gu.Vector(self.everest_outlines_path) - - # -- Test 1: with a Raster provided -- - raster1 = gu.Raster(self.landsat_b4_crop_path) - prox1 = vector.proximity(raster=raster1) - - # The proximity should have the same extent, resolution and CRS - assert raster1.georeferenced_grid_equal(prox1) - - # With the base geometry - vector.proximity(raster=raster1, geometry_type="geometry") - - # With another geometry option - vector.proximity(raster=raster1, geometry_type="centroid") - - # With only inside proximity - vector.proximity(raster=raster1, in_or_out="in") - - # -- Test 2: with no Raster provided, just grid size -- - - # Default grid size - vector.proximity() - - # With specific grid size - vector.proximity(size=(100, 100)) - - -class TestSynthetic: - # Create a synthetic vector file with a square of size 1, started at position (10, 10) - poly1 = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)]) - gdf = gpd.GeoDataFrame({"geometry": [poly1]}, crs="EPSG:4326") - vector = gu.Vector(gdf) - - # Same with a square started at position (5, 5) - poly2 = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]) - gdf = gpd.GeoDataFrame({"geometry": [poly2]}, crs="EPSG:4326") - vector2 = gu.Vector(gdf) - - # Create a multipolygon with both - multipoly = MultiPolygon([poly1, poly2]) - gdf = gpd.GeoDataFrame({"geometry": [multipoly]}, crs="EPSG:4326") - vector_multipoly = gu.Vector(gdf) - - # Create a synthetic vector file with a square of size 5, started at position (8, 8) - poly3 = Polygon([(8, 8), (13, 8), (13, 13), (8, 13)]) - gdf = gpd.GeoDataFrame({"geometry": [poly3]}, crs="EPSG:4326") - vector_5 = gu.Vector(gdf) - - # Create a synthetic LineString geometry - lines = LineString([(10, 10), (11, 10), (11, 11)]) - gdf = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326") - vector_lines = gu.Vector(gdf) - - # Create a synthetic MultiLineString geometry - multilines = MultiLineString([[(10, 10), (11, 10), (11, 11)], [(5, 5), (6, 5), (6, 6)]]) - gdf = gpd.GeoDataFrame({"geometry": [multilines]}, crs="EPSG:4326") - vector_multilines = gu.Vector(gdf) - - def test_create_mask(self) -> None: - """ - Test Vector.create_mask. - """ - # First with given res and bounds -> Should be a 21 x 21 array with 0 everywhere except center pixel - vector = self.vector.copy() - out_mask = vector.create_mask(xres=1, bounds=(0, 0, 21, 21), as_array=True) - ref_mask = np.zeros((21, 21), dtype="bool") - ref_mask[10, 10] = True - assert out_mask.shape == (21, 21) - assert np.all(ref_mask == out_mask) - - # Check that vector has not been modified by accident - assert vector.bounds == self.vector.bounds - assert len(vector.ds) == len(self.vector.ds) - assert vector.crs == self.vector.crs - - # Then with a gu.Raster as reference, single band - rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326") - out_mask = vector.create_mask(rst, as_array=True) - assert out_mask.shape == (21, 21) - - # With gu.Raster, 2 bands -> fails... - # rst = gu.Raster.from_array(np.zeros((2, 21, 21)), transform=(1., 0., 0., 0., -1., 21.), crs='EPSG:4326') - # out_mask = vector.create_mask(rst) - - # Test that buffer = 0 works - out_mask_buff = vector.create_mask(rst, buffer=0, as_array=True) - assert np.all(ref_mask == out_mask_buff) - - # Test that buffer > 0 works - rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326") - out_mask = vector.create_mask(rst, as_array=True) - for buffer in np.arange(1, 8): - out_mask_buff = vector.create_mask(rst, buffer=buffer, as_array=True) - diff = out_mask_buff & ~out_mask - assert np.count_nonzero(diff) > 0 - # Difference between masks should always be thinner than buffer + 1 - eroded_diff = binary_erosion(diff.squeeze(), np.ones((buffer + 1, buffer + 1))) - assert np.count_nonzero(eroded_diff) == 0 - - # Test that buffer < 0 works - vector_5 = self.vector_5 - out_mask = vector_5.create_mask(rst, as_array=True) - for buffer in np.arange(-1, -3, -1): - out_mask_buff = vector_5.create_mask(rst, buffer=buffer, as_array=True) - diff = ~out_mask_buff & out_mask - assert np.count_nonzero(diff) > 0 - # Difference between masks should always be thinner than buffer + 1 - eroded_diff = binary_erosion(diff.squeeze(), np.ones((abs(buffer) + 1, abs(buffer) + 1))) - assert np.count_nonzero(eroded_diff) == 0 - - # Check that no warning is raised when creating a mask with a xres not multiple of vector bounds - mask = vector.create_mask(xres=1.01) - - # Check that by default, create_mask returns a Mask - assert isinstance(mask, gu.Mask) - - # Check that an error is raised if xres is not passed - with pytest.raises(ValueError, match="At least raster or xres must be set."): - vector.create_mask() - - # Check that an error is raised if buffer is the wrong type - with pytest.raises(TypeError, match="Buffer must be a number, currently set to str."): - vector.create_mask(rst, buffer="lol") # type: ignore - - # If the raster has the wrong type - with pytest.raises(TypeError, match="Raster must be a geoutils.Raster or None."): - vector.create_mask("lol") # type: ignore - - # Check that a warning is raised if the bounds were passed specifically by the user - with pytest.warns(UserWarning): - vector.create_mask(xres=1.01, bounds=(0, 0, 21, 21)) - - def test_extract_vertices(self) -> None: - """ - Test that extract_vertices works with simple geometries. - """ - # Polygons - vertices = gu.vector.extract_vertices(self.vector.ds) - assert len(vertices) == 1 - assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)]] - - # MultiPolygons - vertices = gu.vector.extract_vertices(self.vector_multipoly.ds) - assert len(vertices) == 2 - assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)] - assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0), (5.0, 6.0), (5.0, 5.0)] - - # LineString - vertices = gu.vector.extract_vertices(self.vector_lines.ds) - assert len(vertices) == 1 - assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)]] - - # MultiLineString - vertices = gu.vector.extract_vertices(self.vector_multilines.ds) - assert len(vertices) == 2 - assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)] - assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0)] - - def test_generate_voronoi(self) -> None: - """ - Check that vector.generate_voronoi_polygons works on a simple Polygon. - Does not work with simple shapes as squares or triangles as the diagram is infinite. - For now, test on a set of two squares. - """ - # Check with a multipolygon - voronoi = gu.vector.generate_voronoi_polygons(self.vector_multipoly.ds) - assert len(voronoi) == 2 - vertices = gu.vector.extract_vertices(voronoi) - assert vertices == [ - [(5.5, 10.5), (10.5, 10.5), (10.5, 5.5), (5.5, 10.5)], - [(5.5, 10.5), (10.5, 5.5), (5.5, 5.5), (5.5, 10.5)], - ] - - # Check that it fails with proper error for too simple geometries - expected_message = "Invalid geometry, cannot generate finite Voronoi polygons" - with pytest.raises(ValueError, match=expected_message): - voronoi = gu.vector.generate_voronoi_polygons(self.vector.ds) - - def test_buffer_metric(self) -> None: - """Check that metric buffering works""" - - # Case with two squares: test that the buffered area is without deformations - # https://epsg.io/32631 - utm31_x_center = 500000 - utm31_y_center = 4649776 - poly1_utm31 = Polygon( - [ - (utm31_x_center, utm31_y_center), - (utm31_x_center + 1, utm31_y_center), - (utm31_x_center + 1, utm31_y_center + 1), - (utm31_x_center, utm31_y_center + 1), - ] - ) - - poly2_utm31 = Polygon( - [ - (utm31_x_center + 10, utm31_y_center + 10), - (utm31_x_center + 11, utm31_y_center + 10), - (utm31_x_center + 11, utm31_y_center + 11), - (utm31_x_center + 10, utm31_y_center + 11), - ] - ) - - # We initiate the squares of size 1x1 in a UTM projection - two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[poly1_utm31, poly2_utm31], crs="EPSG:32631")) - - # Their area should now be 1 for each polygon - assert two_squares.ds.area.values[0] == 1 - assert two_squares.ds.area.values[1] == 1 - - # We buffer them - two_squares_utm_buffered = two_squares.buffer_metric(buffer_size=1.0) - - # Their area should now be 1 (square) + 4 (buffer along the sides) + 4*(pi*1**2 /4) - # (buffer of corners = quarter-disks) - expected_area = 1 + 4 + np.pi - assert two_squares_utm_buffered.ds.area.values[0] == pytest.approx(expected_area, abs=0.01) - assert two_squares_utm_buffered.ds.area.values[1] == pytest.approx(expected_area, abs=0.01) - - # And the new GeoDataFrame should exactly match that of one buffer from the original one - direct_gpd_buffer = gu.Vector( - gpd.GeoDataFrame(geometry=two_squares.ds.buffer(distance=1.0).geometry, crs=two_squares.crs) - ) - assert_geodataframe_equal(direct_gpd_buffer.ds, two_squares_utm_buffered.ds) - - # Now, if we reproject the original vector in a non-metric system - two_squares_geographic = gu.Vector(two_squares.ds.to_crs(epsg=4326)) - # We buffer directly the Vector object in the non-metric system - two_squares_geographic_buffered = two_squares_geographic.buffer_metric(buffer_size=1.0) - # Then, we reproject that vector in the UTM zone - two_squares_geographic_buffered_reproj = gu.Vector( - two_squares_geographic_buffered.ds.to_crs(crs=two_squares.crs) - ) - - # Their area should now be the same as before for each polygon - assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01) - assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01) - - # And this time, it is the reprojected GeoDataFrame that should almost match (within a tolerance of 10e-06) - assert all(direct_gpd_buffer.ds.geom_equals_exact(two_squares_geographic_buffered_reproj.ds, tolerance=10e-6)) - - def test_buffer_without_overlap(self, monkeypatch) -> None: # type: ignore - """ - Check that non-overlapping buffer feature works. Does not work on simple geometries, so test on MultiPolygon. - Yet, very simple geometries yield unexpected results, as is the case for the second test case here. - """ - # Case 1, test with two squares, in separate Polygons - two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[self.poly1, self.poly2], crs="EPSG:4326")) - - # Check with buffers that should not overlap - # ------------------------------------------ - buffer_size = 2 - # We force metric = False, so buffer should raise a GeoPandas warning - with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"): - buffer = two_squares.buffer_without_overlap(buffer_size, metric=False) - - # Output should be of same size as input and same geometry type - assert len(buffer.ds) == len(two_squares.ds) - assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type) - - # Extract individual geometries - polys = [] - for geom in buffer.ds.geometry: - if geom.geom_type in ["MultiPolygon"]: - polys.extend(list(geom)) - else: - polys.append(geom) - - # Check they do not overlap - for i in range(len(polys)): - for j in range(i + 1, len(polys)): - assert not polys[i].intersects(polys[j]) - - # buffer should yield the same result as create_mask with buffer, minus the original mask - mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) - mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size) - mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) - assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer) - - # Case 2 - Check with buffers that overlap -> this case is actually not the expected result ! - # ------------------------------- - buffer_size = 5 - # We force metric = False, so buffer should raise a GeoPandas warning - with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"): - buffer = two_squares.buffer_without_overlap(buffer_size, metric=False) - - # Output should be of same size as input and same geometry type - assert len(buffer.ds) == len(two_squares.ds) - assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type) - - # Extract individual geometries - polys = [] - for geom in buffer.ds.geometry: - if geom.geom_type in ["MultiPolygon"]: - polys.extend(list(geom)) - else: - polys.append(geom) - - # Check they do not overlap - for i in range(len(polys)): - for j in range(i + 1, len(polys)): - assert polys[i].intersection(polys[j]).area == 0 - - # buffer should yield the same result as create_mask with buffer, minus the original mask - mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) - mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size) - mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) - assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer) - - # Check that plotting runs without errors and close it - monkeypatch.setattr(plt, "show", lambda: None) - two_squares.buffer_without_overlap(buffer_size, plot=True) - - -class NeedToImplementWarning(FutureWarning): - """Warning to remember to implement new GeoPandas methods""" - - -class TestGeoPandasMethods: - # Use two synthetic vectors - poly = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)]) - gdf1 = gpd.GeoDataFrame({"geometry": [poly]}, crs="EPSG:4326") - synthvec1 = gu.Vector(gdf1) - - # Create a synthetic LineString geometry - lines = LineString([(10, 10), (10.5, 10.5), (11, 11)]) - gdf2 = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326") - synthvec2 = gu.Vector(gdf2) - - # Use two real-life vectors - realvec1 = gu.Vector(gu.examples.get_path("exploradores_rgi_outlines")) - realvec2 = gu.Vector(gu.examples.get_path("everest_rgi_outlines")) - - # Properties and methods derived from Shapely or GeoPandas - # List of properties and methods with non-geometric output that are implemented in GeoUtils - main_properties = ["crs", "geometry", "total_bounds"] - nongeo_properties = [ - "area", - "length", - "interiors", - "geom_type", - "is_empty", - "is_ring", - "is_simple", - "is_valid", - "has_z", - ] - nongeo_methods = [ - "contains", - "geom_equals", - "geom_almost_equals", - "geom_equals_exact", - "crosses", - "disjoint", - "intersects", - "overlaps", - "touches", - "within", - "covers", - "covered_by", - "distance", - ] - - # List of properties and methods with geometric output that are implemented in GeoUtils - geo_properties = ["boundary", "unary_union", "centroid", "convex_hull", "envelope", "exterior"] - geo_methods = [ - "representative_point", - "normalize", - "make_valid", - "difference", - "symmetric_difference", - "union", - "intersection", - "clip_by_rect", - "buffer", - "simplify", - "affine_transform", - "translate", - "rotate", - "scale", - "skew", - "dissolve", - "explode", - "sjoin", - "sjoin_nearest", - "overlay", - "to_crs", - "set_crs", - "rename_geometry", - "set_geometry", - "clip", - ] - # List of class methods - io_methods = [ - "from_file", - "from_postgis", - "from_dict", - "from_features", - "to_feather", - "to_parquet", - "to_file", - "to_postgis", - "to_json", - "to_wkb", - "to_wkt", - "to_csv", - ] - - # List of other properties and methods - other = ["has_sindex", "sindex", "estimate_utm_crs", "cx", "iterfeatures"] - all_declared = ( - main_properties + nongeo_methods + nongeo_properties + geo_methods + geo_properties + other + io_methods - ) - - # Exceptions for GeoPandasBase functions not implemented (or deprecrated) in GeoSeries/GeoDataFrame - exceptions_unimplemented = [ - "plot", - "explore", - "cascaded_union", - "bounds", - "relate", - "project", - "interpolate", - "equals", - "type", - "convert_dtypes", - "merge", - "apply", - "astype", - "minimum_bounding_circle", - "minimum_bounding_radius", - "get_coordinates", - "hilbert_distance", - "sample_points", - "copy", - ] - # Exceptions for IO/conversion that can be done directly from .ds - all_exceptions = exceptions_unimplemented - - # Get all GeoPandasBase public methods with some exceptions - geobase_methods = gpd.base.GeoPandasBase.__dict__.copy() - - # Get all GeoDataFrame public methods with some exceptions - gdf_methods = gpd.GeoDataFrame.__dict__.copy() - - def test_overridden_funcs_exist(self) -> None: - """Check that all methods listed above exist in Vector.""" - - # Check that all methods declared in the class above exist in Vector - vector_methods = gu.Vector.__dict__ - - list_missing = [method for method in self.all_declared if method not in vector_methods.keys()] - - assert len(list_missing) == 0, print(f"Test method listed that is not in GeoUtils: {list_missing}") - - def test_geopandas_coverage(self) -> None: - """Check that all existing methods of GeoPandas are overridden, with a couple exceptions.""" - - # Merge the two - all_methods = self.geobase_methods.copy() - all_methods.update(self.gdf_methods) - - # Remove exceptions we don't want to reuse from GeoPandas (mirrored in Vector) - name_all_methods = list(all_methods.keys()) - public_methods = [method for method in name_all_methods if method[0] != "_"] - - covered_methods = [method for method in public_methods if method not in self.all_exceptions] - - # Check that all methods declared in the class above are covered in Vector - list_missing = [method for method in covered_methods if method not in self.all_declared] - - if len(list_missing) != 0: - warnings.warn( - f"New GeoPandas methods are not implemented in GeoUtils: {list_missing}", NeedToImplementWarning - ) - - @pytest.mark.parametrize("method", nongeo_methods + geo_methods) # type: ignore - def test_overridden_funcs_args(self, method: str) -> None: - """Check that all methods overridden have the same arguments as in GeoPandas.""" - - # Get GeoPandas class where the methods live - if method in self.geobase_methods.keys(): - upstream_class = gpd.base.GeoPandasBase - elif method in self.gdf_methods.keys(): - upstream_class = gpd.GeoDataFrame - else: - raise ValueError("Method did not belong to GeoDataFrame or GeoPandasBase class.") - - # Get a full argument inspection object for each class - argspec_upstream = inspect.getfullargspec(getattr(upstream_class, method)) - argspec_geoutils = inspect.getfullargspec(getattr(gu.Vector, method)) - - # Check that all positional arguments are the same - if argspec_upstream.args != argspec_geoutils.args: - warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) - - # Check that the *args and **kwargs argument are declared consistently - if argspec_upstream.varargs != argspec_geoutils.varargs: - warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) - - if argspec_upstream.varkw != argspec_geoutils.varkw: - warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) - - # Check that default argument values are the same - if argspec_upstream.defaults != argspec_geoutils.defaults: - warnings.warn("Default argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) - - @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore - @pytest.mark.parametrize("method", nongeo_properties) # type: ignore - def test_nongeo_properties(self, vector: gu.Vector, method: str) -> None: - """Check non-geometric properties are consistent with GeoPandas.""" - - # Remove warnings about operations in a non-projected system, and future changes - warnings.simplefilter("ignore", category=UserWarning) - warnings.simplefilter("ignore", category=FutureWarning) - - # Get method for each class - output_geoutils = getattr(vector, method) - output_geopandas = getattr(vector.ds, method) - - # Assert equality - assert_series_equal(output_geoutils, output_geopandas) - - @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore - @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore - @pytest.mark.parametrize("method", nongeo_methods) # type: ignore - def test_nongeo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None: - """ - Check non-geometric methods are consistent with GeoPandas. - All these methods require two inputs ("other", "df", or "right" argument), except one. - """ - - # Remove warnings about operations in a non-projected system, and future changes - warnings.simplefilter("ignore", category=UserWarning) - warnings.simplefilter("ignore", category=FutureWarning) - - # Get method for each class - if method != "geom_equals_exact": - output_geoutils = getattr(vector1, method)(vector2) - output_geopandas = getattr(vector1.ds, method)(vector2.ds) - else: - output_geoutils = getattr(vector1, method)(vector2, tolerance=0.1) - output_geopandas = getattr(vector1.ds, method)(vector2.ds, tolerance=0.1) - - # Assert equality - assert_series_equal(output_geoutils, output_geopandas) - - @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore - @pytest.mark.parametrize("method", geo_properties) # type: ignore - def test_geo_properties(self, vector: gu.Vector, method: str) -> None: - """Check geometric properties are consistent with GeoPandas.""" - - # Remove warnings about operations in a non-projected system, and future changes - warnings.simplefilter("ignore", category=UserWarning) - warnings.simplefilter("ignore", category=FutureWarning) - - # Get method for each class - output_geoutils = getattr(vector, method) - output_geopandas = getattr(vector.ds, method) - - # Assert output types - assert isinstance(output_geoutils, gu.Vector) - assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame, BaseGeometry)) - - # Separate cases depending on GeoPandas' output - if isinstance(output_geopandas, gpd.GeoSeries): - # Assert geoseries equality - assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas) - elif isinstance(output_geopandas, BaseGeometry): - assert_geodataframe_equal( - output_geoutils.ds, gpd.GeoDataFrame({"geometry": [output_geopandas]}, crs=vector.crs) - ) - else: - assert_geodataframe_equal(output_geoutils.ds, output_geopandas) - - specific_method_args = { - "buffer": {"distance": 1}, - "clip_by_rect": {"xmin": 10.5, "ymin": 10.5, "xmax": 11, "ymax": 11}, - "affine_transform": {"matrix": [1, 1, 1, 1, 1, 1]}, - "translate": {"xoff": 1, "yoff": 1, "zoff": 0}, - "rotate": {"angle": 90}, - "scale": {"xfact": 1.1, "yfact": 1.1, "zfact": 1.1, "origin": "center"}, - "skew": {"xs": 1.1, "ys": 1.1}, - "interpolate": {"distance": 1}, - "simplify": {"tolerance": 0.1}, - "to_crs": {"crs": pyproj.CRS.from_epsg(32610)}, - "set_crs": {"crs": pyproj.CRS.from_epsg(32610), "allow_override": True}, - "rename_geometry": {"col": "lol"}, - "set_geometry": {"col": synthvec1.geometry}, - "clip": {"mask": poly}, - } - - @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore - @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore - @pytest.mark.parametrize("method", geo_methods) # type: ignore - def test_geo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None: - """Check geometric methods are consistent with GeoPandas.""" - - # Remove warnings about operations in a non-projected system, and future changes - warnings.simplefilter("ignore", category=UserWarning) - warnings.simplefilter("ignore", category=FutureWarning) - - # Methods that require two inputs - if method in [ - "difference", - "symmetric_difference", - "union", - "intersection", - "sjoin", - "sjoin_nearest", - "overlay", - ]: - output_geoutils = getattr(vector1, method)(vector2) - output_geopandas = getattr(vector1.ds, method)(vector2.ds) - # Methods that require zero input - elif method in ["representative_point", "normalize", "make_valid", "dissolve", "explode"]: - output_geoutils = getattr(vector1, method)() - output_geopandas = getattr(vector1.ds, method)() - elif method in self.specific_method_args.keys(): - output_geoutils = getattr(vector1, method)(**self.specific_method_args[method]) - output_geopandas = getattr(vector1.ds, method)(**self.specific_method_args[method]) - else: - raise ValueError(f"The method '{method}' is not covered by this test.") - - # Assert output types - assert isinstance(output_geoutils, gu.Vector) - assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame)) - - # Separate cases depending on GeoPandas' output, and nature of the function - # Simplify is a special case that can make geometries invalid, so adjust test - if method == "simplify": - # TODO: Unskip this random test failure (one index not matching) when this is fixed in GeoPandas/Shapely - pass - # assert_geoseries_equal( - # output_geopandas.make_valid(), output_geoutils.ds.geometry.make_valid(), check_less_precise=True - # ) - # For geoseries output, check equality of it - elif isinstance(output_geopandas, gpd.GeoSeries): - assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas) - # For geodataframe output, check equality - else: - assert_geodataframe_equal(output_geoutils.ds, output_geopandas) diff --git a/tests/test_vector/test_geometric.py b/tests/test_vector/test_geometric.py new file mode 100644 index 00000000..20d38463 --- /dev/null +++ b/tests/test_vector/test_geometric.py @@ -0,0 +1,227 @@ +"""Tests for geometry operations on vectors.""" + +from __future__ import annotations + +import geopandas as gpd +import matplotlib.pyplot as plt +import numpy as np +import pytest +from geopandas.testing import assert_geodataframe_equal +from shapely import LineString, MultiLineString, MultiPolygon, Polygon + +import geoutils as gu +from geoutils.vector.geometric import _extract_vertices, _generate_voronoi_polygons + + +class TestGeometric: + + # Create a synthetic vector file with a square of size 1, started at position (10, 10) + poly1 = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)]) + gdf = gpd.GeoDataFrame({"geometry": [poly1]}, crs="EPSG:4326") + vector = gu.Vector(gdf) + + # Same with a square started at position (5, 5) + poly2 = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]) + gdf = gpd.GeoDataFrame({"geometry": [poly2]}, crs="EPSG:4326") + vector2 = gu.Vector(gdf) + + # Create a multipolygon with both + multipoly = MultiPolygon([poly1, poly2]) + gdf = gpd.GeoDataFrame({"geometry": [multipoly]}, crs="EPSG:4326") + vector_multipoly = gu.Vector(gdf) + + # Create a synthetic vector file with a square of size 5, started at position (8, 8) + poly3 = Polygon([(8, 8), (13, 8), (13, 13), (8, 13)]) + gdf = gpd.GeoDataFrame({"geometry": [poly3]}, crs="EPSG:4326") + vector_5 = gu.Vector(gdf) + + # Create a synthetic LineString geometry + lines = LineString([(10, 10), (11, 10), (11, 11)]) + gdf = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326") + vector_lines = gu.Vector(gdf) + + # Create a synthetic MultiLineString geometry + multilines = MultiLineString([[(10, 10), (11, 10), (11, 11)], [(5, 5), (6, 5), (6, 6)]]) + gdf = gpd.GeoDataFrame({"geometry": [multilines]}, crs="EPSG:4326") + vector_multilines = gu.Vector(gdf) + + def test_extract_vertices(self) -> None: + """ + Test that extract_vertices works with simple geometries. + """ + # Polygons + vertices = _extract_vertices(self.vector.ds) + assert len(vertices) == 1 + assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)]] + + # MultiPolygons + vertices = _extract_vertices(self.vector_multipoly.ds) + assert len(vertices) == 2 + assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)] + assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0), (5.0, 6.0), (5.0, 5.0)] + + # LineString + vertices = _extract_vertices(self.vector_lines.ds) + assert len(vertices) == 1 + assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)]] + + # MultiLineString + vertices = _extract_vertices(self.vector_multilines.ds) + assert len(vertices) == 2 + assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)] + assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0)] + + def test_generate_voronoi(self) -> None: + """ + Check that vector.generate_voronoi_polygons works on a simple Polygon. + Does not work with simple shapes as squares or triangles as the diagram is infinite. + For now, test on a set of two squares. + """ + # Check with a multipolygon + voronoi = _generate_voronoi_polygons(self.vector_multipoly.ds) + assert len(voronoi) == 2 + vertices = _extract_vertices(voronoi) + assert vertices == [ + [(5.5, 10.5), (10.5, 10.5), (10.5, 5.5), (5.5, 10.5)], + [(5.5, 10.5), (10.5, 5.5), (5.5, 5.5), (5.5, 10.5)], + ] + + # Check that it fails with proper error for too simple geometries + expected_message = "Invalid geometry, cannot generate finite Voronoi polygons" + with pytest.raises(ValueError, match=expected_message): + voronoi = _generate_voronoi_polygons(self.vector.ds) + + def test_buffer_metric(self) -> None: + """Check that metric buffering works""" + + # Case with two squares: test that the buffered area is without deformations + # https://epsg.io/32631 + utm31_x_center = 500000 + utm31_y_center = 4649776 + poly1_utm31 = Polygon( + [ + (utm31_x_center, utm31_y_center), + (utm31_x_center + 1, utm31_y_center), + (utm31_x_center + 1, utm31_y_center + 1), + (utm31_x_center, utm31_y_center + 1), + ] + ) + + poly2_utm31 = Polygon( + [ + (utm31_x_center + 10, utm31_y_center + 10), + (utm31_x_center + 11, utm31_y_center + 10), + (utm31_x_center + 11, utm31_y_center + 11), + (utm31_x_center + 10, utm31_y_center + 11), + ] + ) + + # We initiate the squares of size 1x1 in a UTM projection + two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[poly1_utm31, poly2_utm31], crs="EPSG:32631")) + + # Their area should now be 1 for each polygon + assert two_squares.ds.area.values[0] == 1 + assert two_squares.ds.area.values[1] == 1 + + # We buffer them + two_squares_utm_buffered = two_squares.buffer_metric(buffer_size=1.0) + + # Their area should now be 1 (square) + 4 (buffer along the sides) + 4*(pi*1**2 /4) + # (buffer of corners = quarter-disks) + expected_area = 1 + 4 + np.pi + assert two_squares_utm_buffered.ds.area.values[0] == pytest.approx(expected_area, abs=0.01) + assert two_squares_utm_buffered.ds.area.values[1] == pytest.approx(expected_area, abs=0.01) + + # And the new GeoDataFrame should exactly match that of one buffer from the original one + direct_gpd_buffer = gu.Vector( + gpd.GeoDataFrame(geometry=two_squares.ds.buffer(distance=1.0).geometry, crs=two_squares.crs) + ) + assert_geodataframe_equal(direct_gpd_buffer.ds, two_squares_utm_buffered.ds) + + # Now, if we reproject the original vector in a non-metric system + two_squares_geographic = gu.Vector(two_squares.ds.to_crs(epsg=4326)) + # We buffer directly the Vector object in the non-metric system + two_squares_geographic_buffered = two_squares_geographic.buffer_metric(buffer_size=1.0) + # Then, we reproject that vector in the UTM zone + two_squares_geographic_buffered_reproj = gu.Vector( + two_squares_geographic_buffered.ds.to_crs(crs=two_squares.crs) + ) + + # Their area should now be the same as before for each polygon + assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01) + assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01) + + # And this time, it is the reprojected GeoDataFrame that should almost match (within a tolerance of 10e-06) + assert all(direct_gpd_buffer.ds.geom_equals_exact(two_squares_geographic_buffered_reproj.ds, tolerance=10e-6)) + + def test_buffer_without_overlap(self, monkeypatch) -> None: # type: ignore + """ + Check that non-overlapping buffer feature works. Does not work on simple geometries, so test on MultiPolygon. + Yet, very simple geometries yield unexpected results, as is the case for the second test case here. + """ + # Case 1, test with two squares, in separate Polygons + two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[self.poly1, self.poly2], crs="EPSG:4326")) + + # Check with buffers that should not overlap + # ------------------------------------------ + buffer_size = 2 + # We force metric = False, so buffer should raise a GeoPandas warning + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"): + buffer = two_squares.buffer_without_overlap(buffer_size, metric=False) + + # Output should be of same size as input and same geometry type + assert len(buffer.ds) == len(two_squares.ds) + assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type) + + # Extract individual geometries + polys = [] + for geom in buffer.ds.geometry: + if geom.geom_type in ["MultiPolygon"]: + polys.extend(list(geom)) + else: + polys.append(geom) + + # Check they do not overlap + for i in range(len(polys)): + for j in range(i + 1, len(polys)): + assert not polys[i].intersects(polys[j]) + + # buffer should yield the same result as create_mask with buffer, minus the original mask + mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) + mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size) + mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) + assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer) + + # Case 2 - Check with buffers that overlap -> this case is actually not the expected result ! + # ------------------------------- + buffer_size = 5 + # We force metric = False, so buffer should raise a GeoPandas warning + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"): + buffer = two_squares.buffer_without_overlap(buffer_size, metric=False) + + # Output should be of same size as input and same geometry type + assert len(buffer.ds) == len(two_squares.ds) + assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type) + + # Extract individual geometries + polys = [] + for geom in buffer.ds.geometry: + if geom.geom_type in ["MultiPolygon"]: + polys.extend(list(geom)) + else: + polys.append(geom) + + # Check they do not overlap + for i in range(len(polys)): + for j in range(i + 1, len(polys)): + assert polys[i].intersection(polys[j]).area == 0 + + # buffer should yield the same result as create_mask with buffer, minus the original mask + mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) + mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size) + mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21)) + assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer) + + # Check that plotting runs without errors and close it + monkeypatch.setattr(plt, "show", lambda: None) + two_squares.buffer_without_overlap(buffer_size, plot=True) diff --git a/tests/test_vector/test_geotransformations_vector.py b/tests/test_vector/test_geotransformations_vector.py new file mode 100644 index 00000000..e1db15df --- /dev/null +++ b/tests/test_vector/test_geotransformations_vector.py @@ -0,0 +1,128 @@ +"""Tests for geotransformations of vectors.""" + +from __future__ import annotations + +import re + +import numpy as np +import pytest +from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal + +import geoutils as gu + + +class TestGeotransformations: + + landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped") + everest_outlines_path = gu.examples.get_path("everest_rgi_outlines") + aster_dem_path = gu.examples.get_path("exploradores_aster_dem") + aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines") + + def test_reproject(self) -> None: + """Test that the reproject function works as intended""" + + v0 = gu.Vector(self.aster_outlines_path) + r0 = gu.Raster(self.aster_dem_path) + v1 = gu.Vector(self.everest_outlines_path) + + # First, test with a EPSG integer + v1 = v0.reproject(crs=32617) + assert isinstance(v1, gu.Vector) + assert v1.crs.to_epsg() == 32617 + + # Check the inplace behaviour matches the not-inplace one + v2 = v0.copy() + v2.reproject(crs=32617, inplace=True) + v2.vector_equal(v1) + + # Check that the reprojection is the same as with geopandas + gpd1 = v0.ds.to_crs(epsg=32617) + assert_geodataframe_equal(gpd1, v1.ds) + + # Second, with a Raster object + v2 = v0.reproject(r0) + assert v2.crs == r0.crs + + # Third, with a Vector object that has a different CRS + assert v0.crs != v1.crs + v3 = v0.reproject(v1) + assert v3.crs == v1.crs + + # Fourth, check that errors are raised when appropriate + # When no destination CRS is defined, or both dst_crs and dst_ref are passed + with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")): + v0.reproject() + v0.reproject(ref=r0, crs=32617) + # If the path provided does not exist + with pytest.raises(ValueError, match=re.escape("Reference raster or vector path does not exist.")): + v0.reproject(ref="tmp.lol") + # If it exists but cannot be opened by rasterio or fiona + with pytest.raises(ValueError, match=re.escape("Could not open raster or vector with rasterio or pyogrio.")): + v0.reproject(ref="geoutils/examples.py") + # If input of wrong type + with pytest.raises(TypeError, match=re.escape("Type of ref must be string path to file, Raster or Vector.")): + v0.reproject(ref=10) # type: ignore + + test_data = [[landsat_b4_crop_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]] + + @pytest.mark.parametrize("data", test_data) # type: ignore + def test_crop(self, data: list[str]) -> None: + # Load data + raster_path, outlines_path = data + rst = gu.Raster(raster_path) + outlines = gu.Vector(outlines_path) + + # Need to reproject to r.crs. Otherwise, crop will work but will be approximate + # Because outlines might be warped in a different crs + outlines.ds = outlines.ds.to_crs(rst.crs) + + # Crop + outlines_new = outlines.copy() + outlines_new.crop(crop_geom=rst, inplace=True) + + # Check default behaviour - crop and return copy + outlines_copy = outlines.crop(crop_geom=rst) + + # Crop by passing bounds + outlines_new_bounds = outlines.copy() + outlines_new_bounds.crop(crop_geom=list(rst.bounds), inplace=True) + assert_geodataframe_equal(outlines_new.ds, outlines_new_bounds.ds) + # Check the return-by-copy as well + assert_geodataframe_equal(outlines_copy.ds, outlines_new_bounds.ds) + + # Verify that geometries intersect with raster bound + rst_poly = gu.projtools.bounds2poly(rst.bounds) + intersects_new = [] + for poly in outlines_new.ds.geometry: + intersects_new.append(poly.intersects(rst_poly)) + + assert np.all(intersects_new) + + # Check that some of the original outlines did not intersect and were removed + intersects_old = [] + for poly in outlines.ds.geometry: + intersects_old.append(poly.intersects(rst_poly)) + + assert np.sum(intersects_old) == np.sum(intersects_new) + + # Check that some features were indeed removed + assert np.sum(~np.array(intersects_old)) > 0 + + # Check that error is raised when cropGeom argument is invalid + with pytest.raises(TypeError, match="Crop geometry must be a Raster, Vector, or list of coordinates."): + outlines.crop(1, inplace=True) # type: ignore + + def test_translate(self) -> None: + + vector = gu.Vector(self.everest_outlines_path) + + # Check default behaviour is not inplace + vector_shifted = vector.translate(xoff=2.5, yoff=5.7) + assert isinstance(vector_shifted, gu.Vector) + assert_geoseries_equal(vector_shifted.geometry, vector.geometry.translate(xoff=2.5, yoff=5.7)) + + # Check inplace behaviour works correctly + vector2 = vector.copy() + output = vector2.translate(xoff=2.5, yoff=5.7, inplace=True) + assert output is None + assert_geoseries_equal(vector2.geometry, vector_shifted.geometry) diff --git a/tests/test_vector/test_vector.py b/tests/test_vector/test_vector.py new file mode 100644 index 00000000..bc524325 --- /dev/null +++ b/tests/test_vector/test_vector.py @@ -0,0 +1,454 @@ +"""Test functions specific to the Vector class.""" + +from __future__ import annotations + +import inspect +import os.path +import pathlib +import tempfile +import warnings + +import geopandas as gpd +import geopandas.base +import pyproj +import pytest +from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal +from pandas.testing import assert_series_equal +from shapely.geometry.base import BaseGeometry +from shapely.geometry.linestring import LineString +from shapely.geometry.polygon import Polygon + +import geoutils as gu + +GLACIER_OUTLINES_URL = "http://public.data.npolar.no/cryoclim/CryoClim_GAO_SJ_1990.zip" + + +class TestVector: + landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped") + everest_outlines_path = gu.examples.get_path("everest_rgi_outlines") + aster_dem_path = gu.examples.get_path("exploradores_aster_dem") + aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines") + glacier_outlines = gu.Vector(GLACIER_OUTLINES_URL) + + def test_init(self) -> None: + """Test class initiation works as intended""" + + # First, with a URL filename + v = gu.Vector(GLACIER_OUTLINES_URL) + assert isinstance(v, gu.Vector) + + # Second, with a string filename + v0 = gu.Vector(self.aster_outlines_path) + assert isinstance(v0, gu.Vector) + + # Third, with a pathlib path + path = pathlib.Path(self.aster_outlines_path) + v1 = gu.Vector(path) + assert isinstance(v1, gu.Vector) + + # Fourth, with a geopandas dataframe + v2 = gu.Vector(gpd.read_file(self.aster_outlines_path)) + assert isinstance(v2, gu.Vector) + + # Fifth, passing a Vector itself (points back to Vector passed) + v3 = gu.Vector(v2) + assert isinstance(v3, gu.Vector) + + # Check errors are raised when filename has wrong type + with pytest.raises(TypeError, match="Filename argument should be a string, Path or geopandas.GeoDataFrame."): + gu.Vector(1) # type: ignore + + def test_copy(self) -> None: + vector2 = self.glacier_outlines.copy() + + assert vector2 is not self.glacier_outlines + + vector2.ds = vector2.ds.query("NAME == 'Ayerbreen'") + + assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0] + + def test_info(self) -> None: + + v = gu.Vector(GLACIER_OUTLINES_URL) + + # Check default runs without error (prints to screen) + output = v.info() + assert output is None + + # Otherwise returns info + output2 = v.info(verbose=False) + assert isinstance(output2, str) + list_prints = ["Filename", "Coordinate system", "Extent", "Number of features", "Attributes"] + assert all(p in output2 for p in list_prints) + + def test_query(self) -> None: + vector2 = self.glacier_outlines.query("NAME == 'Ayerbreen'") + + assert vector2 is not self.glacier_outlines + + assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0] + + def test_save(self) -> None: + """Test the save wrapper for GeoDataFrame.to_file().""" + + vector = gu.Vector(self.aster_outlines_path) + + # Create a temporary file in a temporary directory + temp_dir = tempfile.TemporaryDirectory() + temp_file = os.path.join(temp_dir.name, "test.gpkg") + + # Save and check the file exists + vector.save(temp_file) + assert os.path.exists(temp_file) + + # Open and check the object is the same + vector_save = gu.Vector(temp_file) + vector_save.vector_equal(vector) + + def test_bounds(self) -> None: + bounds = self.glacier_outlines.bounds + + assert bounds.left < bounds.right + assert bounds.bottom < bounds.top + + assert bounds.left == self.glacier_outlines.ds.total_bounds[0] + assert bounds.bottom == self.glacier_outlines.ds.total_bounds[1] + assert bounds.right == self.glacier_outlines.ds.total_bounds[2] + assert bounds.top == self.glacier_outlines.ds.total_bounds[3] + + def test_footprint(self) -> None: + + footprint = self.glacier_outlines.footprint + + assert isinstance(footprint, gu.Vector) + assert footprint.vector_equal(self.glacier_outlines.get_footprint_projected(self.glacier_outlines.crs)) + + +class NeedToImplementWarning(FutureWarning): + """Warning to remember to implement new GeoPandas methods""" + + +class TestGeoPandasMethods: + # Use two synthetic vectors + poly = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)]) + gdf1 = gpd.GeoDataFrame({"geometry": [poly]}, crs="EPSG:4326") + synthvec1 = gu.Vector(gdf1) + + # Create a synthetic LineString geometry + lines = LineString([(10, 10), (10.5, 10.5), (11, 11)]) + gdf2 = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326") + synthvec2 = gu.Vector(gdf2) + + # Use two real-life vectors + realvec1 = gu.Vector(gu.examples.get_path("exploradores_rgi_outlines")) + realvec2 = gu.Vector(gu.examples.get_path("everest_rgi_outlines")) + + # Properties and methods derived from Shapely or GeoPandas + # List of properties and methods with non-geometric output that are implemented in GeoUtils + main_properties = ["crs", "geometry", "total_bounds"] + nongeo_properties = [ + "area", + "length", + "interiors", + "geom_type", + "is_empty", + "is_ring", + "is_simple", + "is_valid", + "has_z", + ] + nongeo_methods = [ + "contains", + "geom_equals", + "geom_almost_equals", + "geom_equals_exact", + "crosses", + "disjoint", + "intersects", + "overlaps", + "touches", + "within", + "covers", + "covered_by", + "distance", + ] + + # List of properties and methods with geometric output that are implemented in GeoUtils + geo_properties = ["boundary", "unary_union", "centroid", "convex_hull", "envelope", "exterior"] + geo_methods = [ + "representative_point", + "normalize", + "make_valid", + "difference", + "symmetric_difference", + "union", + "intersection", + "clip_by_rect", + "buffer", + "simplify", + "affine_transform", + "translate", + "rotate", + "scale", + "skew", + "dissolve", + "explode", + "sjoin", + "sjoin_nearest", + "overlay", + "to_crs", + "set_crs", + "rename_geometry", + "set_geometry", + "clip", + ] + # List of class methods + io_methods = [ + "from_file", + "from_postgis", + "from_dict", + "from_features", + "to_feather", + "to_parquet", + "to_file", + "to_postgis", + "to_json", + "to_wkb", + "to_wkt", + "to_csv", + ] + + # List of other properties and methods + other = ["has_sindex", "sindex", "estimate_utm_crs", "cx", "iterfeatures"] + all_declared = ( + main_properties + nongeo_methods + nongeo_properties + geo_methods + geo_properties + other + io_methods + ) + + # Exceptions for GeoPandasBase functions not implemented (or deprecrated) in GeoSeries/GeoDataFrame + exceptions_unimplemented = [ + "plot", + "explore", + "cascaded_union", + "bounds", + "relate", + "project", + "interpolate", + "equals", + "type", + "convert_dtypes", + "merge", + "apply", + "astype", + "minimum_bounding_circle", + "minimum_bounding_radius", + "get_coordinates", + "hilbert_distance", + "sample_points", + "copy", + ] + # Exceptions for IO/conversion that can be done directly from .ds + all_exceptions = exceptions_unimplemented + + # Get all GeoPandasBase public methods with some exceptions + geobase_methods = gpd.base.GeoPandasBase.__dict__.copy() + + # Get all GeoDataFrame public methods with some exceptions + gdf_methods = gpd.GeoDataFrame.__dict__.copy() + + def test_overridden_funcs_exist(self) -> None: + """Check that all methods listed above exist in Vector.""" + + # Check that all methods declared in the class above exist in Vector + vector_methods = gu.Vector.__dict__ + + list_missing = [method for method in self.all_declared if method not in vector_methods.keys()] + + assert len(list_missing) == 0, print(f"Test method listed that is not in GeoUtils: {list_missing}") + + def test_geopandas_coverage(self) -> None: + """Check that all existing methods of GeoPandas are overridden, with a couple exceptions.""" + + # Merge the two + all_methods = self.geobase_methods.copy() + all_methods.update(self.gdf_methods) + + # Remove exceptions we don't want to reuse from GeoPandas (mirrored in Vector) + name_all_methods = list(all_methods.keys()) + public_methods = [method for method in name_all_methods if method[0] != "_"] + + covered_methods = [method for method in public_methods if method not in self.all_exceptions] + + # Check that all methods declared in the class above are covered in Vector + list_missing = [method for method in covered_methods if method not in self.all_declared] + + if len(list_missing) != 0: + warnings.warn( + f"New GeoPandas methods are not implemented in GeoUtils: {list_missing}", NeedToImplementWarning + ) + + @pytest.mark.parametrize("method", nongeo_methods + geo_methods) # type: ignore + def test_overridden_funcs_args(self, method: str) -> None: + """Check that all methods overridden have the same arguments as in GeoPandas.""" + + # Get GeoPandas class where the methods live + if method in self.geobase_methods.keys(): + upstream_class = gpd.base.GeoPandasBase + elif method in self.gdf_methods.keys(): + upstream_class = gpd.GeoDataFrame + else: + raise ValueError("Method did not belong to GeoDataFrame or GeoPandasBase class.") + + # Get a full argument inspection object for each class + argspec_upstream = inspect.getfullargspec(getattr(upstream_class, method)) + argspec_geoutils = inspect.getfullargspec(getattr(gu.Vector, method)) + + # Check that all positional arguments are the same + if argspec_upstream.args != argspec_geoutils.args: + warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) + + # Check that the *args and **kwargs argument are declared consistently + if argspec_upstream.varargs != argspec_geoutils.varargs: + warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) + + if argspec_upstream.varkw != argspec_geoutils.varkw: + warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) + + # Check that default argument values are the same + if argspec_upstream.defaults != argspec_geoutils.defaults: + warnings.warn("Default argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning) + + @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore + @pytest.mark.parametrize("method", nongeo_properties) # type: ignore + def test_nongeo_properties(self, vector: gu.Vector, method: str) -> None: + """Check non-geometric properties are consistent with GeoPandas.""" + + # Remove warnings about operations in a non-projected system, and future changes + warnings.simplefilter("ignore", category=UserWarning) + warnings.simplefilter("ignore", category=FutureWarning) + + # Get method for each class + output_geoutils = getattr(vector, method) + output_geopandas = getattr(vector.ds, method) + + # Assert equality + assert_series_equal(output_geoutils, output_geopandas) + + @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore + @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore + @pytest.mark.parametrize("method", nongeo_methods) # type: ignore + def test_nongeo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None: + """ + Check non-geometric methods are consistent with GeoPandas. + All these methods require two inputs ("other", "df", or "right" argument), except one. + """ + + # Remove warnings about operations in a non-projected system, and future changes + warnings.simplefilter("ignore", category=UserWarning) + warnings.simplefilter("ignore", category=FutureWarning) + + # Get method for each class + if method != "geom_equals_exact": + output_geoutils = getattr(vector1, method)(vector2) + output_geopandas = getattr(vector1.ds, method)(vector2.ds) + else: + output_geoutils = getattr(vector1, method)(vector2, tolerance=0.1) + output_geopandas = getattr(vector1.ds, method)(vector2.ds, tolerance=0.1) + + # Assert equality + assert_series_equal(output_geoutils, output_geopandas) + + @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore + @pytest.mark.parametrize("method", geo_properties) # type: ignore + def test_geo_properties(self, vector: gu.Vector, method: str) -> None: + """Check geometric properties are consistent with GeoPandas.""" + + # Remove warnings about operations in a non-projected system, and future changes + warnings.simplefilter("ignore", category=UserWarning) + warnings.simplefilter("ignore", category=FutureWarning) + + # Get method for each class + output_geoutils = getattr(vector, method) + output_geopandas = getattr(vector.ds, method) + + # Assert output types + assert isinstance(output_geoutils, gu.Vector) + assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame, BaseGeometry)) + + # Separate cases depending on GeoPandas' output + if isinstance(output_geopandas, gpd.GeoSeries): + # Assert geoseries equality + assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas) + elif isinstance(output_geopandas, BaseGeometry): + assert_geodataframe_equal( + output_geoutils.ds, gpd.GeoDataFrame({"geometry": [output_geopandas]}, crs=vector.crs) + ) + else: + assert_geodataframe_equal(output_geoutils.ds, output_geopandas) + + specific_method_args = { + "buffer": {"distance": 1}, + "clip_by_rect": {"xmin": 10.5, "ymin": 10.5, "xmax": 11, "ymax": 11}, + "affine_transform": {"matrix": [1, 1, 1, 1, 1, 1]}, + "translate": {"xoff": 1, "yoff": 1, "zoff": 0}, + "rotate": {"angle": 90}, + "scale": {"xfact": 1.1, "yfact": 1.1, "zfact": 1.1, "origin": "center"}, + "skew": {"xs": 1.1, "ys": 1.1}, + "interpolate": {"distance": 1}, + "simplify": {"tolerance": 0.1}, + "to_crs": {"crs": pyproj.CRS.from_epsg(32610)}, + "set_crs": {"crs": pyproj.CRS.from_epsg(32610), "allow_override": True}, + "rename_geometry": {"col": "lol"}, + "set_geometry": {"col": synthvec1.geometry}, + "clip": {"mask": poly}, + } + + @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore + @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore + @pytest.mark.parametrize("method", geo_methods) # type: ignore + def test_geo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None: + """Check geometric methods are consistent with GeoPandas.""" + + # Remove warnings about operations in a non-projected system, and future changes + warnings.simplefilter("ignore", category=UserWarning) + warnings.simplefilter("ignore", category=FutureWarning) + + # Methods that require two inputs + if method in [ + "difference", + "symmetric_difference", + "union", + "intersection", + "sjoin", + "sjoin_nearest", + "overlay", + ]: + output_geoutils = getattr(vector1, method)(vector2) + output_geopandas = getattr(vector1.ds, method)(vector2.ds) + # Methods that require zero input + elif method in ["representative_point", "normalize", "make_valid", "dissolve", "explode"]: + output_geoutils = getattr(vector1, method)() + output_geopandas = getattr(vector1.ds, method)() + elif method in self.specific_method_args.keys(): + output_geoutils = getattr(vector1, method)(**self.specific_method_args[method]) + output_geopandas = getattr(vector1.ds, method)(**self.specific_method_args[method]) + else: + raise ValueError(f"The method '{method}' is not covered by this test.") + + # Assert output types + assert isinstance(output_geoutils, gu.Vector) + assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame)) + + # Separate cases depending on GeoPandas' output, and nature of the function + # Simplify is a special case that can make geometries invalid, so adjust test + if method == "simplify": + # TODO: Unskip this random test failure (one index not matching) when this is fixed in GeoPandas/Shapely + pass + # assert_geoseries_equal( + # output_geopandas.make_valid(), output_geoutils.ds.geometry.make_valid(), check_less_precise=True + # ) + # For geoseries output, check equality of it + elif isinstance(output_geopandas, gpd.GeoSeries): + assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas) + # For geodataframe output, check equality + else: + assert_geodataframe_equal(output_geoutils.ds, output_geopandas)