diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f7bd04bc..af06fcb3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@ ci:
autoupdate_schedule: quarterly
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.3.0
+ rev: v5.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
@@ -12,10 +12,14 @@ repos:
- id: check-merge-conflict
# Fix common spelling mistakes
- repo: https://github.com/codespell-project/codespell
- rev: v2.2.1
+ rev: v2.3.0
hooks:
- id: codespell
- args: [--ignore-words-list=alos, --ignore-regex=\bnin\b]
+ args: [
+ '--ignore-words-list', 'alos,inout,vor',
+ '--ignore-regex', '\bnin\b',
+ '--'
+ ]
types_or: [python, rst, markdown]
files: ^(geoutils|doc|tests)/
@@ -27,23 +31,24 @@ repos:
# Format the code aggressively using black
- repo: https://github.com/psf/black
- rev: 22.10.0
+ rev: 24.10.0
hooks:
- id: black
args: [--line-length=120]
# Lint the code using flake8
- repo: https://github.com/pycqa/flake8
- rev: 3.9.2
+ rev: 7.1.1
hooks:
- id: flake8
args: [
- --max-line-length=120,
- --extend-ignore=E203, # flake8 disagrees with black, so this should be ignored.
+ '--max-line-length', '120', # we can write dicts however we want
+ '--extend-ignore', 'E203,B028', # flake8 disagrees with black, so this should be ignored.
+ '--'
]
additional_dependencies:
- - flake8-comprehensions==3.1.0
- - flake8-bugbear==21.3.2
+ - flake8-comprehensions
+ - flake8-bugbear
files: ^(geoutils|tests)
# Lint the code using mypy
- repo: https://github.com/pre-commit/mirrors-mypy
@@ -63,26 +68,26 @@ repos:
--disable-error-code=var-annotated,
--disable-error-code=no-any-return
]
- additional_dependencies: [tokenize-rt==3.2.0, numpy==1.22]
+ additional_dependencies: [tokenize-rt==3.2.0, numpy==1.26]
files: ^(geoutils|tests)
# Sort imports using isort
- repo: https://github.com/PyCQA/isort
- rev: 5.12.0
+ rev: 5.13.2
hooks:
- id: isort
args: [ "--profile", "black" ]
# Automatically upgrade syntax to a minimum version
- repo: https://github.com/asottile/pyupgrade
- rev: v3.1.0
+ rev: v3.19.0
hooks:
- id: pyupgrade
args: [--py37-plus]
# Various formattings
- repo: https://github.com/pre-commit/pygrep-hooks
- rev: v1.9.0
+ rev: v1.10.0
hooks:
# Single backticks should apparently not be used
- id: rst-backticks
@@ -101,7 +106,7 @@ repos:
# Add custom regex lints (see .relint.yml)
- repo: https://github.com/codingjoe/relint
- rev: 2.0.0
+ rev: 3.3.1
hooks:
- id: relint
- repo: local
diff --git a/doc/source/background.md b/doc/source/background.md
index f357fc2c..1855ea0b 100644
--- a/doc/source/background.md
+++ b/doc/source/background.md
@@ -51,7 +51,7 @@ In details, those mean:
- **Reproducibility:** all code is version-controlled and release-based, to ensure consistency of dependent packages and works;
-- **Open-source:** all code is accessible and re-usable to anyone in the community, for transparency and open governance.
+- **Open-source:** all code is accessible and reusable to anyone in the community, for transparency and open governance.
```{note}
:class: margin
diff --git a/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py b/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py
index 46de30d4..4a424662 100644
--- a/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py
+++ b/doc/source/code/about_geoutils_sidebyside_raster_rasterio.py
@@ -35,6 +35,7 @@
ma2 = rast2.read(masked=True)
ma_result = (1 + ma2) / (ma1_reproj)
+
# Equivalent of saving
# (requires to define a logical
# nodata for the data type)
diff --git a/doc/source/conf.py b/doc/source/conf.py
index cf8def54..c18f6b0b 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -202,7 +202,7 @@ def setup(app):
'to update your code see here. ⚠️'
"
Future changes will come with deprecation warnings! 🙂"
),
- "show_toc_level": 3
+ "show_toc_level": 3,
# "logo_only": True,
# "icon_links": [
# {
diff --git a/doc/source/core_array_funcs.md b/doc/source/core_array_funcs.md
index fba675ef..f982b3b9 100644
--- a/doc/source/core_array_funcs.md
+++ b/doc/source/core_array_funcs.md
@@ -33,7 +33,7 @@ matching georeferencing or shape, respectively.
These functions inherently support the casting of different {attr}`~geoutils.Raster.dtype` and values masked by {attr}`~geoutils.Raster.nodata` in the
{class}`~numpy.ma.MaskedArray`.
-Below, we re-use the same example created in {ref}`core-py-ops`.
+Below, we reuse the same example created in {ref}`core-py-ops`.
```{code-cell} ipython3
:tags: [hide-input, hide-output]
diff --git a/doc/source/georeferencing.md b/doc/source/georeferencing.md
index 08fd84e4..88841b86 100644
--- a/doc/source/georeferencing.md
+++ b/doc/source/georeferencing.md
@@ -124,7 +124,7 @@ vect.get_footprint_projected(vect.crs).plot()
### Grid (only for rasters)
A raster's grid origin and resolution are defined by its geotransform attribute, {attr}`~geoutils.Raster.transform`.
-Comined with the 2D shape of the data array {attr}`~geoutils.Raster.shape` (and independently of the number of
+Combined with the 2D shape of the data array {attr}`~geoutils.Raster.shape` (and independently of the number of
bands {attr}`~geoutils.Raster.bands`), these two attributes define the georeferenced grid of a raster.
From it are derived the resolution {attr}`~geoutils.Raster.res`, and {attr}`~geoutils.Raster.height` and
diff --git a/doc/source/sphinxext.py b/doc/source/sphinxext.py
index 05cec930..53ff95a8 100644
--- a/doc/source/sphinxext.py
+++ b/doc/source/sphinxext.py
@@ -1,4 +1,6 @@
"""Functions for documentation configuration only, importable by sphinx"""
+
+
# To reset resolution setting for each sphinx-gallery example
def reset_mpl(gallery_conf, fname):
# To get a good resolution for displayed figures
diff --git a/examples/analysis/array_numerics/numpy_interfacing.py b/examples/analysis/array_numerics/numpy_interfacing.py
index 98adf3d2..fde9ecb8 100644
--- a/examples/analysis/array_numerics/numpy_interfacing.py
+++ b/examples/analysis/array_numerics/numpy_interfacing.py
@@ -4,6 +4,7 @@
This example demonstrates NumPy interfacing with rasters on :class:`Rasters`. See :ref:`core-array-funcs` for more details.
"""
+
# %%
# We open a raster.
diff --git a/examples/analysis/array_numerics/python_arithmetic.py b/examples/analysis/array_numerics/python_arithmetic.py
index 1ae11bcb..207460f8 100644
--- a/examples/analysis/array_numerics/python_arithmetic.py
+++ b/examples/analysis/array_numerics/python_arithmetic.py
@@ -4,6 +4,7 @@
This example demonstrates arithmetic operations using raster arithmetic on :class:`Rasters`. See :ref:`core-py-ops` for more details.
"""
+
# %%
# We open a raster
diff --git a/examples/analysis/geospatial/buffer_voronoi.py b/examples/analysis/geospatial/buffer_voronoi.py
index 554f43bf..343b1e13 100644
--- a/examples/analysis/geospatial/buffer_voronoi.py
+++ b/examples/analysis/geospatial/buffer_voronoi.py
@@ -4,6 +4,7 @@
This example demonstrates the metric buffering of a vector using :func:`~geoutils.Vector.buffer_metric` and :func:`~geoutils.Vector.buffer_without_overlap`.
"""
+
# %%
# We open an example vector
diff --git a/examples/analysis/geospatial/proximity_metric.py b/examples/analysis/geospatial/proximity_metric.py
index c73964be..98afbf8b 100644
--- a/examples/analysis/geospatial/proximity_metric.py
+++ b/examples/analysis/geospatial/proximity_metric.py
@@ -4,6 +4,7 @@
This example demonstrates the calculation of proximity distances to a raster or vector using :func:`~geoutils.Raster.proximity`.
"""
+
# %%
# We open an example raster, and a vector for which we select a single feature
diff --git a/examples/analysis/point_extraction/interpolation.py b/examples/analysis/point_extraction/interpolation.py
index 5682b717..05450692 100644
--- a/examples/analysis/point_extraction/interpolation.py
+++ b/examples/analysis/point_extraction/interpolation.py
@@ -4,6 +4,7 @@
This example demonstrates the 2D interpolation of raster values to points using :func:`~geoutils.Raster.interp_points`.
"""
+
# %%
# We open an example raster, a digital elevation model in South America.
diff --git a/examples/analysis/point_extraction/reduction.py b/examples/analysis/point_extraction/reduction.py
index 533c2d5d..c2ea4028 100644
--- a/examples/analysis/point_extraction/reduction.py
+++ b/examples/analysis/point_extraction/reduction.py
@@ -4,6 +4,7 @@
This example demonstrates the reduction of windowed raster values around a point using :func:`~geoutils.Raster.value_at_coords`.
"""
+
# %%
# We open an example raster, a digital elevation model in South America.
diff --git a/examples/handling/georeferencing/crop_raster.py b/examples/handling/georeferencing/crop_raster.py
index 369b8380..3c0efeb5 100644
--- a/examples/handling/georeferencing/crop_raster.py
+++ b/examples/handling/georeferencing/crop_raster.py
@@ -4,6 +4,7 @@
This example demonstrates the cropping of a raster using :func:`geoutils.Raster.crop`.
"""
+
# %%
# We open a raster and vector, and subset the latter.
diff --git a/examples/handling/georeferencing/crop_vector.py b/examples/handling/georeferencing/crop_vector.py
index 65587bb7..604c3074 100644
--- a/examples/handling/georeferencing/crop_vector.py
+++ b/examples/handling/georeferencing/crop_vector.py
@@ -4,6 +4,7 @@
This example demonstrates the cropping of a vector using :func:`geoutils.Vector.crop`.
"""
+
# %%
# We open a raster and vector.
diff --git a/examples/handling/georeferencing/reproj_raster.py b/examples/handling/georeferencing/reproj_raster.py
index 924ddb91..8cf4db25 100644
--- a/examples/handling/georeferencing/reproj_raster.py
+++ b/examples/handling/georeferencing/reproj_raster.py
@@ -4,6 +4,7 @@
This example demonstrates the reprojection of a raster using :func:`geoutils.Raster.reproject`.
"""
+
# %%
# We open two example rasters.
diff --git a/examples/handling/georeferencing/reproj_vector.py b/examples/handling/georeferencing/reproj_vector.py
index aca9782a..e529ab31 100644
--- a/examples/handling/georeferencing/reproj_vector.py
+++ b/examples/handling/georeferencing/reproj_vector.py
@@ -4,6 +4,7 @@
This example demonstrates the reprojection of a vector using :func:`geoutils.Vector.reproject`.
"""
+
# %%
# We open a raster and vector.
diff --git a/examples/handling/interface/create_mask.py b/examples/handling/interface/create_mask.py
index 8657009a..35495608 100644
--- a/examples/handling/interface/create_mask.py
+++ b/examples/handling/interface/create_mask.py
@@ -4,6 +4,7 @@
This example demonstrates the creation of a mask from a vector using :func:`geoutils.Vector.create_mask`.
"""
+
# %%
# We open a raster and vector.
diff --git a/examples/handling/interface/polygonize.py b/examples/handling/interface/polygonize.py
index ea8d1280..8f3b8f1b 100644
--- a/examples/handling/interface/polygonize.py
+++ b/examples/handling/interface/polygonize.py
@@ -4,6 +4,7 @@
This example demonstrates the polygonizing of a raster using :func:`geoutils.Raster.polygonize` and :func:`geoutils.Mask.polygonize`.
"""
+
# %%
# We open a raster.
diff --git a/examples/handling/interface/rasterize.py b/examples/handling/interface/rasterize.py
index a8bb1a91..b4517d96 100644
--- a/examples/handling/interface/rasterize.py
+++ b/examples/handling/interface/rasterize.py
@@ -4,6 +4,7 @@
This example demonstrates the rasterizing of a vector using :func:`geoutils.Vector.rasterize`.
"""
+
# %%
# We open a raster and vector.
diff --git a/examples/handling/interface/topoints.py b/examples/handling/interface/topoints.py
index fcf3df54..1b758584 100644
--- a/examples/handling/interface/topoints.py
+++ b/examples/handling/interface/topoints.py
@@ -4,6 +4,7 @@
This example demonstrates the conversion of a raster to point vector using :func:`geoutils.Raster.to_points`.
"""
+
# %%
# We open a raster.
diff --git a/geoutils/_config.py b/geoutils/_config.py
index 7ecb9830..8fe35ebc 100644
--- a/geoutils/_config.py
+++ b/geoutils/_config.py
@@ -1,4 +1,5 @@
"""Setup of runtime-compile configuration of GeoUtils."""
+
from __future__ import annotations
import configparser
diff --git a/geoutils/_typing.py b/geoutils/_typing.py
index d8aacec9..ecf21256 100644
--- a/geoutils/_typing.py
+++ b/geoutils/_typing.py
@@ -1,4 +1,5 @@
"""Typing aliases for internal use."""
+
from __future__ import annotations
import sys
diff --git a/geoutils/examples.py b/geoutils/examples.py
index 1662e53a..ed315608 100644
--- a/geoutils/examples.py
+++ b/geoutils/examples.py
@@ -1,4 +1,5 @@
"""Utility functions to download and find example data."""
+
import os
import tarfile
import tempfile
diff --git a/geoutils/interface/__init__.py b/geoutils/interface/__init__.py
new file mode 100644
index 00000000..78379c95
--- /dev/null
+++ b/geoutils/interface/__init__.py
@@ -0,0 +1,5 @@
+from geoutils.interface.distance import * # noqa
+from geoutils.interface.gridding import * # noqa
+from geoutils.interface.interpolate import * # noqa
+from geoutils.interface.raster_point import * # noqa
+from geoutils.interface.raster_vector import * # noqa
diff --git a/geoutils/interface/distance.py b/geoutils/interface/distance.py
new file mode 100644
index 00000000..c07f8fd1
--- /dev/null
+++ b/geoutils/interface/distance.py
@@ -0,0 +1,88 @@
+"""Functionalities related to distance operations."""
+
+from __future__ import annotations
+
+import warnings
+from typing import Literal
+
+import geopandas as gpd
+import numpy as np
+from scipy.ndimage import distance_transform_edt
+
+import geoutils as gu
+from geoutils._typing import NDArrayNum
+
+
+def _proximity_from_vector_or_raster(
+ raster: gu.Raster,
+ vector: gu.Vector | None = None,
+ target_values: list[float] | None = None,
+ geometry_type: str = "boundary",
+ in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both",
+ distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced",
+) -> NDArrayNum:
+ """
+ (This function is defined here as mostly raster-based, but used in a class method for both Raster and Vector)
+ Proximity to a Raster's target values if no Vector is provided, otherwise to a Vector's geometry type
+ rasterized on the Raster.
+
+ :param raster: Raster to burn the proximity grid on.
+ :param vector: Vector for which to compute the proximity to geometry,
+ if not provided computed on the Raster target pixels.
+ :param target_values: (Only with a Raster) List of target values to use for the proximity,
+ defaults to all non-zero values.
+ :param geometry_type: (Only with a Vector) Type of geometry to use for the proximity, defaults to 'boundary'.
+ :param in_or_out: (Only with a Vector) Compute proximity only 'in' or 'out'-side the geometry, or 'both'.
+ :param distance_unit: Distance unit, either 'georeferenced' or 'pixel'.
+ """
+
+ # 1/ First, if there is a vector input, we rasterize the geometry type
+ # (works with .boundary that is a LineString (.exterior exists, but is a LinearRing)
+ if vector is not None:
+
+ # TODO: Only when using centroid... Maybe we should leave this operation to the user anyway?
+ warnings.filterwarnings("ignore", message="Geometry is in a geographic CRS.*")
+
+ # We create a geodataframe with the geometry type
+ boundary_shp = gpd.GeoDataFrame(geometry=vector.ds.__getattr__(geometry_type), crs=vector.crs)
+ # We mask the pixels that make up the geometry type
+ mask_boundary = gu.Vector(boundary_shp).create_mask(raster, as_array=True)
+
+ else:
+ # We mask target pixels
+ if target_values is not None:
+ mask_boundary = np.logical_or.reduce([raster.get_nanarray() == target_val for target_val in target_values])
+ # Otherwise, all non-zero values are considered targets
+ else:
+ mask_boundary = raster.get_nanarray().astype(bool)
+
+ # 2/ Now, we compute the distance matrix relative to the masked geometry type
+ if distance_unit.lower() == "georeferenced":
+ sampling: int | tuple[float | int, float | int] = raster.res
+ elif distance_unit.lower() == "pixel":
+ sampling = 1
+ else:
+ raise ValueError('Distance unit must be either "georeferenced" or "pixel".')
+
+ # If not all pixels are targets, then we compute the distance
+ non_targets = np.count_nonzero(mask_boundary)
+ if non_targets > 0:
+ proximity = distance_transform_edt(~mask_boundary, sampling=sampling)
+ # Otherwise, pass an array full of nodata
+ else:
+ proximity = np.ones(np.shape(mask_boundary)) * np.nan
+
+ # 3/ If there was a vector input, apply the in_and_out argument to optionally mask inside/outside
+ if vector is not None:
+ if in_or_out == "both":
+ pass
+ elif in_or_out in ["in", "out"]:
+ mask_polygon = gu.Vector(vector.ds).create_mask(raster, as_array=True)
+ if in_or_out == "in":
+ proximity[~mask_polygon] = 0
+ else:
+ proximity[mask_polygon] = 0
+ else:
+ raise ValueError('The type of proximity must be one of "in", "out" or "both".')
+
+ return proximity
diff --git a/geoutils/pointcloud.py b/geoutils/interface/gridding.py
similarity index 98%
rename from geoutils/pointcloud.py
rename to geoutils/interface/gridding.py
index c7e521fd..20e71821 100644
--- a/geoutils/pointcloud.py
+++ b/geoutils/interface/gridding.py
@@ -1,4 +1,4 @@
-"""Module for point cloud manipulation."""
+"""Functionalities for gridding points (point cloud to raster)."""
import warnings
from typing import Literal
diff --git a/geoutils/raster/interpolate.py b/geoutils/interface/interpolate.py
similarity index 98%
rename from geoutils/raster/interpolate.py
rename to geoutils/interface/interpolate.py
index 153d9955..2824bd39 100644
--- a/geoutils/raster/interpolate.py
+++ b/geoutils/interface/interpolate.py
@@ -1,3 +1,5 @@
+"""Functionalities for interpolating a regular grid at points (raster to point cloud)."""
+
from __future__ import annotations
from typing import Any, Callable, Literal, overload
@@ -195,8 +197,7 @@ def _interp_points(
*,
return_interpolator: Literal[False] = False,
**kwargs: Any,
-) -> NDArrayNum:
- ...
+) -> NDArrayNum: ...
@overload
@@ -212,8 +213,7 @@ def _interp_points(
*,
return_interpolator: Literal[True],
**kwargs: Any,
-) -> Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]:
- ...
+) -> Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]: ...
@overload
@@ -229,8 +229,7 @@ def _interp_points(
*,
return_interpolator: bool = False,
**kwargs: Any,
-) -> NDArrayNum | Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]:
- ...
+) -> NDArrayNum | Callable[[tuple[NDArrayNum, NDArrayNum]], NDArrayNum]: ...
def _interp_points(
diff --git a/geoutils/interface/raster_point.py b/geoutils/interface/raster_point.py
new file mode 100644
index 00000000..594e81a8
--- /dev/null
+++ b/geoutils/interface/raster_point.py
@@ -0,0 +1,243 @@
+"""Functionalities at the interface of rasters and point clouds."""
+
+from __future__ import annotations
+
+from typing import Iterable, Literal
+
+import affine
+import geopandas as gpd
+import numpy as np
+import rasterio as rio
+from rasterio.crs import CRS
+
+import geoutils as gu
+from geoutils._typing import NDArrayNum
+from geoutils.raster.array import _get_mask_from_array
+from geoutils.raster.georeferencing import _default_nodata, _xy2ij
+from geoutils.raster.sampling import subsample_array
+
+
+def _regular_pointcloud_to_raster(
+ pointcloud: gpd.GeoDataFrame,
+ grid_coords: tuple[NDArrayNum, NDArrayNum] = None,
+ transform: rio.transform.Affine = None,
+ shape: tuple[int, int] = None,
+ nodata: int | float | None = None,
+ data_column_name: str = "b1",
+ area_or_point: Literal["Area", "Point"] = "Point",
+) -> tuple[NDArrayNum, affine.Affine, CRS, int | float | None, Literal["Area", "Point"]]:
+ """
+ Convert a regular point cloud to a raster. See Raster.from_pointcloud_regular() for details.
+ """
+
+ # Get transform and shape from input
+ if grid_coords is not None:
+
+ # Input checks
+ if (
+ not isinstance(grid_coords, tuple)
+ or not (isinstance(grid_coords[0], np.ndarray) and grid_coords[0].ndim == 1)
+ or not (isinstance(grid_coords[1], np.ndarray) and grid_coords[1].ndim == 1)
+ ):
+ raise TypeError("Input grid coordinates must be 1D arrays.")
+
+ diff_x = np.diff(grid_coords[0])
+ diff_y = np.diff(grid_coords[1])
+
+ if not all(diff_x == diff_x[0]) and all(diff_y == diff_y[0]):
+ raise ValueError("Grid coordinates must be regular (equally spaced, independently along X and Y).")
+
+ # Build transform from min X, max Y and step in both
+ out_transform = rio.transform.from_origin(np.min(grid_coords[0]), np.max(grid_coords[1]), diff_x[0], diff_y[0])
+ # Y is first axis, X is second axis
+ out_shape = (len(grid_coords[1]), len(grid_coords[0]))
+
+ elif transform is not None and shape is not None:
+
+ out_transform = transform
+ out_shape = shape
+
+ else:
+ raise ValueError("Either grid coordinates or both geotransform and shape must be provided.")
+
+ # Create raster from inputs, with placeholder data for now
+ dtype = pointcloud[data_column_name].dtype
+ out_nodata = nodata if not None else _default_nodata(dtype)
+ arr = np.ones(out_shape, dtype=dtype)
+
+ # Get indexes of point cloud coordinates in the raster, forcing no shift
+ i, j = _xy2ij(
+ x=pointcloud.geometry.x.values,
+ y=pointcloud.geometry.y.values,
+ shift_area_or_point=False,
+ transform=out_transform,
+ area_or_point=area_or_point,
+ )
+
+ # If coordinates are not integer type (forced in xy2ij), then some points are not falling on exact coordinates
+ if not np.issubdtype(i.dtype, np.integer) or not np.issubdtype(i.dtype, np.integer):
+ raise ValueError("Some point cloud coordinates differ from the grid coordinates.")
+
+ # Set values
+ mask = np.ones(np.shape(arr), dtype=bool)
+ mask[i, j] = False
+ arr[i, j] = pointcloud[data_column_name].values
+
+ # Set output values
+ raster_arr = np.ma.masked_array(data=arr, mask=mask)
+
+ return raster_arr, out_transform, pointcloud.crs, out_nodata, area_or_point
+
+
+def _raster_to_pointcloud(
+ source_raster: gu.Raster,
+ data_column_name: str,
+ data_band: int,
+ auxiliary_data_bands: list[int] | None,
+ auxiliary_column_names: list[str] | None,
+ subsample: float | int,
+ skip_nodata: bool,
+ as_array: bool,
+ random_state: int | np.random.Generator | None,
+ force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"],
+) -> NDArrayNum | gu.Vector:
+ """
+ Convert a raster to a point cloud. See Raster.to_pointcloud() for details.
+ """
+
+ # Input checks
+
+ # Main data column checks
+ if not isinstance(data_column_name, str):
+ raise ValueError("Data column name must be a string.")
+ if not (isinstance(data_band, int) and data_band >= 1 and data_band <= source_raster.count):
+ raise ValueError(
+ f"Data band number must be an integer between 1 and the total number of bands ({source_raster.count})."
+ )
+
+ # Rename data column if a different band is selected but the name is still default
+ if data_band != 1 and data_column_name == "b1":
+ data_column_name = "b" + str(data_band)
+
+ # Auxiliary data columns checks
+ if auxiliary_column_names is not None and auxiliary_data_bands is None:
+ raise ValueError("Passing auxiliary column names requires passing auxiliary data band numbers as well.")
+ if auxiliary_data_bands is not None:
+ if not (isinstance(auxiliary_data_bands, Iterable) and all(isinstance(b, int) for b in auxiliary_data_bands)):
+ raise ValueError("Auxiliary data band number must be an iterable containing only integers.")
+ if any((1 > b or source_raster.count < b) for b in auxiliary_data_bands):
+ raise ValueError(
+ f"Auxiliary data band numbers must be between 1 and the total number of bands ({source_raster.count})."
+ )
+ if data_band in auxiliary_data_bands:
+ raise ValueError(
+ f"Main data band {data_band} should not be listed in auxiliary data bands {auxiliary_data_bands}."
+ )
+
+ # Ensure auxiliary column name is defined if auxiliary data bands is not None
+ if auxiliary_column_names is not None:
+ if not (
+ isinstance(auxiliary_column_names, Iterable) and all(isinstance(b, str) for b in auxiliary_column_names)
+ ):
+ raise ValueError("Auxiliary column names must be an iterable containing only strings.")
+ if not len(auxiliary_column_names) == len(auxiliary_data_bands):
+ raise ValueError(
+ f"Length of auxiliary column name and data band numbers should be the same, "
+ f"found {len(auxiliary_column_names)} and {len(auxiliary_data_bands)} respectively."
+ )
+
+ else:
+ auxiliary_column_names = [f"b{i}" for i in auxiliary_data_bands]
+
+ # Define bigger list with all bands and names
+ all_bands = [data_band] + auxiliary_data_bands
+ all_column_names = [data_column_name] + auxiliary_column_names
+
+ else:
+ all_bands = [data_band]
+ all_column_names = [data_column_name]
+
+ # If subsample is the entire array, load it to optimize speed
+ if subsample == 1 and not source_raster.is_loaded:
+ source_raster.load(bands=all_bands)
+
+ # Band indexes in the array are band number minus one
+ all_indexes = [b - 1 for b in all_bands]
+
+ # We do 2D subsampling on the data band only, regardless of valid masks on other bands
+ if skip_nodata:
+ if source_raster.is_loaded:
+ if source_raster.count == 1:
+ self_mask = _get_mask_from_array(
+ source_raster.data
+ ) # This is to avoid the case where the mask is just "False"
+ else:
+ self_mask = _get_mask_from_array(
+ source_raster.data[data_band - 1, :, :]
+ ) # This is to avoid the case where the mask is just "False"
+ valid_mask = ~self_mask
+
+ # Load only mask of valid data from disk if array not loaded
+ else:
+ valid_mask = ~source_raster._load_only_mask(bands=data_band)
+ # If we are not skipping nodata values, valid mask is everywhere
+ else:
+ if source_raster.count == 1:
+ valid_mask = np.ones(source_raster.data.shape, dtype=bool)
+ else:
+ valid_mask = np.ones(source_raster.data[0, :].shape, dtype=bool)
+
+ # Get subsample on valid mask
+ # Build a low memory boolean masked array with invalid values masked to pass to subsampling
+ ma_valid = np.ma.masked_array(data=np.ones(np.shape(valid_mask), dtype=bool), mask=~valid_mask)
+ # Take a subsample within the valid values
+ indices = subsample_array(array=ma_valid, subsample=subsample, random_state=random_state, return_indices=True)
+
+ # If the Raster is loaded, pick from the data while ignoring the mask
+ if source_raster.is_loaded:
+ if source_raster.count == 1:
+ pixel_data = source_raster.data[indices[0], indices[1]]
+ else:
+ # TODO: Combining both indexes at once could reduce memory usage?
+ pixel_data = source_raster.data[all_indexes, :][:, indices[0], indices[1]]
+
+ # Otherwise use rasterio.sample to load only requested pixels
+ else:
+ # Extract the coordinates at subsampled pixels with valid data
+ # To extract data, we always use "upper left" which rasterio interprets as the exact raster coordinates
+ # Further below we redefine output coordinates based on point interpretation
+ x_coords, y_coords = (np.array(a) for a in source_raster.ij2xy(indices[0], indices[1], force_offset="ul"))
+
+ with rio.open(source_raster.filename) as raster:
+ # Rasterio uses indexes (starts at 1)
+ pixel_data = np.array(list(raster.sample(zip(x_coords, y_coords), indexes=all_bands))).T
+
+ # At this point there should not be any nodata anymore, so we can transform everything to normal array
+ if np.ma.isMaskedArray(pixel_data):
+ pixel_data = pixel_data.data
+
+ # If nodata values were not skipped, convert them to NaNs and change data type
+ if skip_nodata is False:
+ pixel_data = pixel_data.astype("float32")
+ pixel_data[pixel_data == source_raster.nodata] = np.nan
+
+ # Now we force the coordinates we define for the point cloud, according to pixel interpretation
+ x_coords_2, y_coords_2 = (
+ np.array(a) for a in source_raster.ij2xy(indices[0], indices[1], force_offset=force_pixel_offset)
+ )
+
+ if not as_array:
+ points = gu.Vector(
+ gpd.GeoDataFrame(
+ pixel_data.T,
+ columns=all_column_names,
+ geometry=gpd.points_from_xy(x_coords_2, y_coords_2),
+ crs=source_raster.crs,
+ )
+ )
+ return points
+ else:
+ # Merge the coordinates and pixel data an array of N x K
+ # This has the downside of converting all the data to the same data type
+ points_arr = np.vstack((x_coords_2.reshape(1, -1), y_coords_2.reshape(1, -1), pixel_data)).T
+ return points_arr
diff --git a/geoutils/interface/raster_vector.py b/geoutils/interface/raster_vector.py
new file mode 100644
index 00000000..8d90f3fc
--- /dev/null
+++ b/geoutils/interface/raster_vector.py
@@ -0,0 +1,257 @@
+"""Functionalities at the interface of rasters and vectors."""
+
+from __future__ import annotations
+
+import warnings
+from typing import Any, Iterable, Literal
+
+import affine
+import geopandas as gpd
+import numpy as np
+import rasterio as rio
+from rasterio import features, warp
+from rasterio.crs import CRS
+from rasterio.features import shapes
+
+import geoutils as gu
+from geoutils._typing import NDArrayBool, NDArrayNum, Number
+
+
+def _polygonize(
+ source_raster: gu.Raster,
+ target_values: Number | tuple[Number, Number] | list[Number] | NDArrayNum | Literal["all"],
+ data_column_name: str,
+) -> gu.Vector:
+ """Polygonize a raster. See Raster.polygonize() for details."""
+
+ # Mask a unique value set by a number
+ if isinstance(target_values, (int, float, np.integer, np.floating)):
+ if np.sum(source_raster.data == target_values) == 0:
+ raise ValueError(f"no pixel with in_value {target_values}")
+
+ bool_msk = np.array(source_raster.data == target_values).astype(np.uint8)
+
+ # Mask values within boundaries set by a tuple
+ elif isinstance(target_values, tuple):
+ if np.sum((source_raster.data > target_values[0]) & (source_raster.data < target_values[1])) == 0:
+ raise ValueError(f"no pixel with in_value between {target_values[0]} and {target_values[1]}")
+
+ bool_msk = ((source_raster.data > target_values[0]) & (source_raster.data < target_values[1])).astype(np.uint8)
+
+ # Mask specific values set by a sequence
+ elif isinstance(target_values, list) or isinstance(target_values, np.ndarray):
+ if np.sum(np.isin(source_raster.data, np.array(target_values))) == 0:
+ raise ValueError("no pixel with in_value " + ", ".join(map("{}".format, target_values)))
+
+ bool_msk = np.isin(source_raster.data, np.array(target_values)).astype("uint8")
+
+ # Mask all valid values
+ elif target_values == "all":
+ # Using getmaskarray is necessary in case .data.mask is nomask (False)
+ bool_msk = (~np.ma.getmaskarray(source_raster.data)).astype("uint8")
+
+ else:
+ raise ValueError("in_value must be a number, a tuple or a sequence")
+
+ # GeoPandas.from_features() only supports certain dtypes, we find the best common dtype to optimize memory usage
+ # TODO: this should be a function independent of polygonize, reused in several places
+ gpd_dtypes = ["uint8", "uint16", "int16", "int32", "float32"]
+ list_common_dtype_index = []
+ for gpd_type in gpd_dtypes:
+ polygonize_dtype = np.promote_types(gpd_type, source_raster.dtype)
+ if str(polygonize_dtype) in gpd_dtypes:
+ list_common_dtype_index.append(gpd_dtypes.index(gpd_type))
+ if len(list_common_dtype_index) == 0:
+ final_dtype = "float32"
+ else:
+ final_dtype_index = min(list_common_dtype_index)
+ final_dtype = gpd_dtypes[final_dtype_index]
+
+ results = (
+ {"properties": {"raster_value": v}, "geometry": s}
+ for i, (s, v) in enumerate(
+ shapes(source_raster.data.astype(final_dtype), mask=bool_msk, transform=source_raster.transform)
+ )
+ )
+
+ gdf = gpd.GeoDataFrame.from_features(list(results))
+ gdf.insert(0, data_column_name, range(0, 0 + len(gdf)))
+ gdf = gdf.set_geometry(col="geometry")
+ gdf = gdf.set_crs(source_raster.crs)
+
+ return gu.Vector(gdf)
+
+
+def _rasterize(
+ gdf: gpd.GeoDataFrame,
+ raster: gu.Raster | None = None,
+ crs: CRS | int | None = None,
+ xres: float | None = None,
+ yres: float | None = None,
+ bounds: tuple[float, float, float, float] | None = None,
+ in_value: int | float | Iterable[int | float] | None = None,
+ out_value: int | float = 0,
+) -> gu.Raster:
+ if (raster is not None) and (crs is not None):
+ raise ValueError("Only one of raster or crs can be provided.")
+
+ # Reproject vector into requested CRS or rst CRS first, if needed
+ # This has to be done first so that width/height calculated below are correct!
+ if crs is None:
+ crs = gdf.crs
+
+ if raster is not None:
+ crs = raster.crs # type: ignore
+
+ vect = gdf.to_crs(crs)
+
+ # If no raster given, now use provided dimensions
+ if raster is None:
+ # At minimum, xres must be set
+ if xres is None:
+ raise ValueError("At least raster or xres must be set.")
+ if yres is None:
+ yres = xres
+
+ # By default, use self's bounds
+ if bounds is None:
+ bounds = vect.total_bounds
+
+ # Calculate raster shape
+ left, bottom, right, top = bounds
+ width = abs((right - left) / xres)
+ height = abs((top - bottom) / yres)
+
+ if width % 1 != 0 or height % 1 != 0:
+ warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.")
+
+ width = int(np.round(width))
+ height = int(np.round(height))
+ out_shape = (height, width)
+
+ # Calculate raster transform
+ transform = rio.transform.from_bounds(left, bottom, right, top, width, height)
+
+ # otherwise use directly raster's dimensions
+ else:
+ out_shape = raster.shape # type: ignore
+ transform = raster.transform # type: ignore
+
+ # Set default burn value, index from 1 to len(self.ds)
+ if in_value is None:
+ in_value = gdf.index + 1
+
+ # Rasterize geometry
+ if isinstance(in_value, Iterable):
+ if len(in_value) != len(vect.geometry): # type: ignore
+ raise ValueError(
+ "in_value must have same length as self.ds.geometry, currently {} != {}".format(
+ len(in_value), len(vect.geometry) # type: ignore
+ )
+ )
+
+ out_geom = ((geom, value) for geom, value in zip(vect.geometry, in_value))
+
+ mask = features.rasterize(shapes=out_geom, fill=out_value, out_shape=out_shape, transform=transform)
+
+ elif isinstance(in_value, int | float | np.floating | np.integer):
+ mask = features.rasterize(
+ shapes=vect.geometry, fill=out_value, out_shape=out_shape, transform=transform, default_value=in_value
+ )
+ else:
+ raise ValueError("in_value must be a single number or an iterable with same length as self.ds.geometry")
+
+ # We return a mask if there is a single value to burn and this value is 1
+ if isinstance(in_value, (int, np.integer, float, np.floating)) and in_value == 1:
+ output = gu.Mask.from_array(data=mask, transform=transform, crs=crs, nodata=None)
+
+ # Otherwise we return a Raster if there are several values to burn
+ else:
+ output = gu.Raster.from_array(data=mask, transform=transform, crs=crs, nodata=None)
+
+ return output
+
+
+def _create_mask(
+ gdf: gpd.GeoDataFrame,
+ raster: gu.Raster | None = None,
+ crs: CRS | None = None,
+ xres: float | None = None,
+ yres: float | None = None,
+ bounds: tuple[float, float, float, float] | None = None,
+ buffer: int | float | np.integer[Any] | np.floating[Any] = 0,
+ as_array: bool = False,
+) -> tuple[NDArrayBool, affine.Affine, CRS]:
+
+ # If no raster given, use provided dimensions
+ if raster is None:
+ # At minimum, xres must be set
+ if xres is None:
+ raise ValueError("At least raster or xres must be set.")
+ if yres is None:
+ yres = xres
+
+ # By default, use self's CRS and bounds
+ if crs is None:
+ crs = gdf.crs
+ if bounds is None:
+ bounds_shp = True
+ bounds = gdf.total_bounds
+ else:
+ bounds_shp = False
+
+ # Calculate raster shape
+ left, bottom, right, top = bounds
+ height = abs((right - left) / xres)
+ width = abs((top - bottom) / yres)
+
+ if width % 1 != 0 or height % 1 != 0:
+ # Only warn if the bounds were provided, and not derived from the vector
+ if not bounds_shp:
+ warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.")
+
+ width = int(np.round(width))
+ height = int(np.round(height))
+ out_shape = (height, width)
+
+ # Calculate raster transform
+ transform = rio.transform.from_bounds(left, bottom, right, top, width, height)
+
+ # otherwise use directly raster's dimensions
+ elif isinstance(raster, gu.Raster):
+ out_shape = raster.shape
+ transform = raster.transform
+ crs = raster.crs
+ bounds = raster.bounds
+ else:
+ raise TypeError("Raster must be a geoutils.Raster or None.")
+
+ # Copying GeoPandas dataframe before applying changes
+ gdf = gdf.copy()
+
+ # Crop vector geometries to avoid issues when reprojecting
+ left, bottom, right, top = bounds # type: ignore
+ x1, y1, x2, y2 = warp.transform_bounds(crs, gdf.crs, left, bottom, right, top)
+ gdf = gdf.cx[x1:x2, y1:y2]
+
+ # Reproject vector into raster CRS
+ gdf = gdf.to_crs(crs)
+
+ # Create a buffer around the features
+ if not isinstance(buffer, (int, float, np.number)):
+ raise TypeError(f"Buffer must be a number, currently set to {type(buffer).__name__}.")
+ if buffer != 0:
+ gdf.geometry = [geom.buffer(buffer) for geom in gdf.geometry]
+ elif buffer == 0:
+ pass
+
+ # Rasterize geometry
+ mask = features.rasterize(
+ shapes=gdf.geometry, fill=0, out_shape=out_shape, transform=transform, default_value=1, dtype="uint8"
+ ).astype("bool")
+
+ # Force output mask to be of same dimension as input raster
+ if raster is not None:
+ mask = mask.reshape((raster.count, raster.height, raster.width)) # type: ignore
+
+ return mask, transform, crs
diff --git a/geoutils/misc.py b/geoutils/misc.py
index ed2b981f..51d0ceae 100644
--- a/geoutils/misc.py
+++ b/geoutils/misc.py
@@ -1,4 +1,5 @@
-"""Miscellaneous functions, mainly for testing."""
+"""Miscellaneous functions for maintenance, documentation and testing."""
+
from __future__ import annotations
import copy
@@ -13,7 +14,6 @@
except ImportError:
_has_yaml = False
-import rasterio as rio
from packaging.version import Version
import geoutils
@@ -138,22 +138,6 @@ def decorator(decorated: Callable) -> Callable: # type: ignore
return decorator
-def resampling_method_from_str(method_str: str) -> rio.enums.Resampling:
- """Get a rasterio resampling method from a string representation, e.g. "cubic_spline"."""
- # Try to match the string version of the resampling method with a rio Resampling enum name
- for method in rio.enums.Resampling:
- if method.name == method_str:
- resampling_method = method
- break
- # If no match was found, raise an error.
- else:
- raise ValueError(
- f"'{method_str}' is not a valid rasterio.enums.Resampling method. "
- f"Valid methods: {[method.name for method in rio.enums.Resampling]}"
- )
- return resampling_method
-
-
def diff_environment_yml(
fn_env: str | dict[str, Any], fn_devenv: str | dict[str, Any], print_dep: str = "both", input_dict: bool = False
) -> None:
diff --git a/geoutils/pointcloud/__init__.py b/geoutils/pointcloud/__init__.py
new file mode 100644
index 00000000..16393346
--- /dev/null
+++ b/geoutils/pointcloud/__init__.py
@@ -0,0 +1 @@
+from geoutils.pointcloud.pointcloud import * # noqa
diff --git a/geoutils/pointcloud/pointcloud.py b/geoutils/pointcloud/pointcloud.py
new file mode 100644
index 00000000..cf2f6f3c
--- /dev/null
+++ b/geoutils/pointcloud/pointcloud.py
@@ -0,0 +1 @@
+"""Module for future PointCloud class."""
diff --git a/geoutils/projtools.py b/geoutils/projtools.py
index 1f234a5d..c9cf73c8 100644
--- a/geoutils/projtools.py
+++ b/geoutils/projtools.py
@@ -1,12 +1,12 @@
"""
-projtools provides a set of tools for dealing with different coordinate reference systems (CRS) and bounds.
+Functionalities to manipulate metadata in different coordinate reference systems (CRS).
"""
+
from __future__ import annotations
import warnings
-from collections import abc
from math import ceil, floor
-from typing import Literal
+from typing import Iterable, Literal
import geopandas as gpd
import numpy as np
@@ -154,7 +154,7 @@ def bounds2poly(
def merge_bounds(
- bounds_list: abc.Iterable[
+ bounds_list: Iterable[
list[float] | tuple[float] | rio.coords.BoundingBox | rio.io.DatasetReader | gpd.GeoDataFrame
],
resolution: float | None = None,
@@ -270,7 +270,7 @@ def reproject_points(
def reproject_to_latlon(
points: list[list[float]] | list[float] | NDArrayNum, in_crs: CRS, round_: int = 8
-) -> tuple[list[float], list[float]]:
+) -> NDArrayNum:
"""
Reproject a set of point from in_crs to lat/lon.
@@ -281,13 +281,12 @@ def reproject_to_latlon(
:returns: Reprojected points, of same shape as points.
"""
proj_points = reproject_points(points, in_crs, crs_4326)
- proj_points = np.round(proj_points, round_)
- return proj_points
+ return np.round(proj_points, round_)
def reproject_from_latlon(
points: list[list[float]] | tuple[list[float], list[float]] | NDArrayNum, out_crs: CRS, round_: int = 2
-) -> tuple[list[float], list[float]]:
+) -> NDArrayNum:
"""
Reproject a set of point from lat/lon to out_crs.
@@ -298,8 +297,7 @@ def reproject_from_latlon(
:returns: Reprojected points, of same shape as points.
"""
proj_points = reproject_points(points, crs_4326, out_crs)
- proj_points = np.round(proj_points, round_)
- return proj_points
+ return np.round(proj_points, round_)
def reproject_shape(inshape: BaseGeometry, in_crs: CRS, out_crs: CRS) -> BaseGeometry:
diff --git a/geoutils/raster/__init__.py b/geoutils/raster/__init__.py
index 5402aa46..35f981b7 100644
--- a/geoutils/raster/__init__.py
+++ b/geoutils/raster/__init__.py
@@ -1,5 +1,7 @@
from geoutils.raster.raster import Raster, RasterType, Mask, handled_array_funcs # noqa isort:skip
from geoutils.raster.array import * # noqa
+from geoutils.raster.georeferencing import * # noqa
+from geoutils.raster.geotransformations import * # noqa
from geoutils.raster.multiraster import * # noqa
from geoutils.raster.sampling import * # noqa
from geoutils.raster.satimg import SatelliteImage # noqa
diff --git a/geoutils/raster/array.py b/geoutils/raster/array.py
index e1df06c5..ed32b909 100644
--- a/geoutils/raster/array.py
+++ b/geoutils/raster/array.py
@@ -10,7 +10,7 @@
from geoutils._typing import MArrayNum, NDArrayBool, NDArrayNum
-def get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayBool:
+def _get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayBool:
"""
Return the mask of invalid values, whether array is a ndarray with NaNs or a np.ma.masked_array.
@@ -22,7 +22,7 @@ def get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayB
return mask.squeeze()
-def get_array_and_mask(
+def _get_array_and_mask(
array: NDArrayNum | MArrayNum, check_shape: bool = True, copy: bool = True
) -> tuple[NDArrayNum, NDArrayBool]:
"""
@@ -59,19 +59,19 @@ def get_array_and_mask(
array_data = np.array(array).squeeze() if copy else np.asarray(array).squeeze()
# Get the mask of invalid pixels and set nans if it is occupied.
- invalid_mask = get_mask_from_array(array)
+ invalid_mask = _get_mask_from_array(array)
if np.any(invalid_mask):
array_data[invalid_mask] = np.nan
return array_data, invalid_mask
-def get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, ...]:
+def _get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, ...]:
"""
Return (rowmin, rowmax, colmin, colmax), the first/last row/column of array with valid pixels
"""
if not array.dtype == "bool":
- valid_mask = ~get_mask_from_array(array)
+ valid_mask = ~_get_mask_from_array(array)
else:
# Not sure why Mypy is not recognizing that the type of the array can only be bool here
valid_mask = array # type: ignore
@@ -80,7 +80,7 @@ def get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int,
return rows_nonzero[0], rows_nonzero[-1], cols_nonzero[0], cols_nonzero[-1]
-def get_xy_rotated(raster: gu.Raster, along_track_angle: float) -> tuple[NDArrayNum, NDArrayNum]:
+def _get_xy_rotated(raster: gu.Raster, along_track_angle: float) -> tuple[NDArrayNum, NDArrayNum]:
"""
Rotate x, y axes of image to get along- and cross-track distances.
:param raster: Raster to get x,y positions from.
diff --git a/geoutils/raster/delayed.py b/geoutils/raster/delayed.py
index af668bd1..5f08ddc4 100644
--- a/geoutils/raster/delayed.py
+++ b/geoutils/raster/delayed.py
@@ -157,7 +157,7 @@ def delayed_subsample(
flattened chunk). For this reason, a loaded array will also have a different subsample due to its direct 1D
indexing (per valid value for the entire flattened array).
- To ensure you re-use a similar subsample of valid values for several arrays, call this function with
+ To ensure you reuse a similar subsample of valid values for several arrays, call this function with
return_indices=True, then sample your arrays out-of-memory with .vindex[indices[0], indices[1]]
(this assumes that these arrays have valid values at the same locations).
@@ -736,7 +736,7 @@ def delayed_reproject(
src_block_ids = np.array(src_geotiling.get_block_locations())
meta_params = [
(
- _combined_blocks_shape_transform(sub_block_ids=src_block_ids[sbid], src_geogrid=src_geogrid)
+ _combined_blocks_shape_transform(sub_block_ids=src_block_ids[sbid], src_geogrid=src_geogrid) # type: ignore
if len(sbid) > 0
else ({}, [])
)
diff --git a/geoutils/raster/georeferencing.py b/geoutils/raster/georeferencing.py
index da02d927..c37d30b2 100644
--- a/geoutils/raster/georeferencing.py
+++ b/geoutils/raster/georeferencing.py
@@ -1,12 +1,17 @@
+"""
+Functions for manipulating georeferencing of the raster objects.
+"""
+
from __future__ import annotations
+import warnings
from typing import Iterable, Literal
import numpy as np
import rasterio as rio
from geoutils._config import config
-from geoutils._typing import ArrayLike, NDArrayNum
+from geoutils._typing import ArrayLike, DTypeLike, NDArrayNum
def _ij2xy(
@@ -170,3 +175,94 @@ def _bounds(transform: rio.transform.Affine, shape: tuple[int, int]) -> rio.coor
"""See description of Raster.bounds."""
return rio.coords.BoundingBox(*rio.transform.array_bounds(height=shape[0], width=shape[1], transform=transform))
+
+
+def _cast_pixel_interpretation(
+ area_or_point1: Literal["Area", "Point"] | None, area_or_point2: Literal["Area", "Point"] | None
+) -> Literal["Area", "Point"] | None:
+ """
+ Cast two pixel interpretations and warn if not castable.
+
+ Casts to:
+ - "Area" if both are "Area",
+ - "Point" if both are "Point",
+ - None if any of the interpretation is None, or
+ - None if one is "Area" and the other "Point" (and raises a warning).
+ """
+
+ # If one is None, cast to None
+ if area_or_point1 is None or area_or_point2 is None:
+ area_or_point_out = None
+ # If both are equal and not None
+ elif area_or_point1 == area_or_point2:
+ area_or_point_out = area_or_point1
+ else:
+ area_or_point_out = None
+ msg = (
+ 'One raster has a pixel interpretation "Area" and the other "Point". To silence this warning, '
+ "either correct the pixel interpretation of one raster, or deactivate "
+ 'warnings of pixel interpretation with geoutils.config["warn_area_or_point"]=False.'
+ )
+ if config["warn_area_or_point"]:
+ warnings.warn(message=msg, category=UserWarning)
+
+ return area_or_point_out
+
+
+# Function to set the default nodata values for any given dtype
+# Similar to GDAL for int types, but without absurdly long nodata values for floats.
+# For unsigned types, the maximum value is chosen (with a max of 99999).
+# For signed types, the minimum value is chosen (with a min of -99999).
+def _default_nodata(dtype: DTypeLike) -> int:
+ """
+ Set the default nodata value for any given dtype, when this is not provided.
+ """
+ default_nodata_lookup = {
+ "uint8": 255,
+ "int8": -128,
+ "uint16": 65535,
+ "int16": -32768,
+ "uint32": 99999,
+ "int32": -99999,
+ "float16": -99999,
+ "float32": -99999,
+ "float64": -99999,
+ "float128": -99999,
+ "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility
+ }
+ # Check argument dtype is as expected
+ if not isinstance(dtype, (str, np.dtype, type)):
+ raise TypeError(f"dtype {dtype} not understood.")
+
+ # Convert numpy types to string
+ if isinstance(dtype, type):
+ dtype = np.dtype(dtype).name
+
+ # Convert np.dtype to string
+ if isinstance(dtype, np.dtype):
+ dtype = dtype.name
+
+ if dtype in default_nodata_lookup.keys():
+ return default_nodata_lookup[dtype]
+ else:
+ raise NotImplementedError(f"No default nodata value set for dtype {dtype}.")
+
+
+def _cast_nodata(out_dtype: DTypeLike, nodata: int | float | None) -> int | float | None:
+ """
+ Cast nodata value for output data type to default nodata if incompatible.
+
+ :param out_dtype: Dtype of output array.
+ :param nodata: Nodata value.
+
+ :return: Cast nodata value.
+ """
+
+ if out_dtype == bool:
+ nodata = None
+ if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, out_dtype):
+ nodata = _default_nodata(out_dtype)
+ else:
+ nodata = nodata
+
+ return nodata
diff --git a/geoutils/raster/geotransformations.py b/geoutils/raster/geotransformations.py
new file mode 100644
index 00000000..e43e51a5
--- /dev/null
+++ b/geoutils/raster/geotransformations.py
@@ -0,0 +1,546 @@
+"""
+Functionalities for geotransformations of raster objects.
+"""
+
+from __future__ import annotations
+
+import os
+import warnings
+from typing import Any, Iterable, Literal
+
+import affine
+import numpy as np
+import rasterio as rio
+from rasterio.crs import CRS
+from rasterio.enums import Resampling
+
+import geoutils as gu
+from geoutils._typing import DTypeLike, MArrayNum
+from geoutils.raster.georeferencing import (
+ _cast_pixel_interpretation,
+ _default_nodata,
+ _res,
+)
+
+
+def _resampling_method_from_str(method_str: str) -> rio.enums.Resampling:
+ """Get a rasterio resampling method from a string representation, e.g. "cubic_spline"."""
+ # Try to match the string version of the resampling method with a rio Resampling enum name
+ for method in rio.enums.Resampling:
+ if method.name == method_str:
+ resampling_method = method
+ break
+ # If no match was found, raise an error.
+ else:
+ raise ValueError(
+ f"'{method_str}' is not a valid rasterio.enums.Resampling method. "
+ f"Valid methods: {[method.name for method in rio.enums.Resampling]}"
+ )
+ return resampling_method
+
+
+##############
+# 1/ REPROJECT
+##############
+
+
+def _user_input_reproject(
+ source_raster: gu.Raster,
+ ref: gu.Raster,
+ crs: CRS | str | int | None,
+ res: float | Iterable[float] | None,
+ bounds: dict[str, float] | rio.coords.BoundingBox | None,
+ nodata: int | float | None,
+ dtype: DTypeLike | None,
+ force_source_nodata: int | float | None,
+) -> tuple[
+ CRS, DTypeLike, int | float | None, int | float | None, float | Iterable[float] | None, rio.coords.BoundingBox
+]:
+ """Check all user inputs of reproject."""
+
+ # --- Sanity checks on inputs and defaults -- #
+ # Check that either ref or crs is provided
+ if ref is not None and crs is not None:
+ raise ValueError("Either of `ref` or `crs` must be set. Not both.")
+ # If none are provided, simply preserve the CRS
+ elif ref is None and crs is None:
+ crs = source_raster.crs
+
+ # Set output dtype
+ if dtype is None:
+ # Warning: this will not work for multiple bands with different dtypes
+ dtype = source_raster.dtype
+
+ # --- Set source nodata if provided -- #
+ if force_source_nodata is None:
+ src_nodata = source_raster.nodata
+ else:
+ src_nodata = force_source_nodata
+ # Raise warning if a different nodata value exists for this raster than the forced one (not None)
+ if source_raster.nodata is not None:
+ warnings.warn(
+ "Forcing source nodata value of {} despite an existing nodata value of {} in the raster. "
+ "To silence this warning, use self.set_nodata() before reprojection instead of forcing.".format(
+ force_source_nodata, source_raster.nodata
+ )
+ )
+
+ # --- Set destination nodata if provided -- #
+ # This is needed in areas not covered by the input data.
+ # If None, will use GeoUtils' default, as rasterio's default is unknown, hence cannot be handled properly.
+ if nodata is None:
+ nodata = source_raster.nodata
+ if nodata is None:
+ nodata = _default_nodata(dtype)
+ # If nodata is already being used, raise a warning.
+ # TODO: for uint8, if all values are used, apply rio.warp to mask to identify invalid values
+ if not source_raster.is_loaded:
+ warnings.warn(
+ f"For reprojection, nodata must be set. Setting default nodata to {nodata}. You may "
+ f"set a different nodata with `nodata`."
+ )
+
+ elif nodata in source_raster.data:
+ warnings.warn(
+ f"For reprojection, nodata must be set. Default chosen value {nodata} exists in "
+ f"self.data. This may have unexpected consequences. Consider setting a different nodata with "
+ f"self.set_nodata()."
+ )
+
+ # Create a BoundingBox if required
+ if bounds is not None:
+ if not isinstance(bounds, rio.coords.BoundingBox):
+ bounds = rio.coords.BoundingBox(
+ bounds["left"],
+ bounds["bottom"],
+ bounds["right"],
+ bounds["top"],
+ )
+
+ # Case a raster is provided as reference
+ if ref is not None:
+ # Check that ref type is either str, Raster or rasterio data set
+ # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45
+ if isinstance(ref, gu.Raster):
+ # Raise a warning if the reference is a raster that has a different pixel interpretation
+ _cast_pixel_interpretation(source_raster.area_or_point, ref.area_or_point)
+ ds_ref = ref
+ elif isinstance(ref, str):
+ if not os.path.exists(ref):
+ raise ValueError("Reference raster does not exist.")
+ ds_ref = gu.Raster(ref, load_data=False)
+ else:
+ raise TypeError("Type of ref not understood, must be path to file (str), Raster.")
+
+ # Read reprojecting params from ref raster
+ crs = ds_ref.crs
+ res = ds_ref.res
+ bounds = ds_ref.bounds
+ else:
+ # Determine target CRS
+ crs = CRS.from_user_input(crs)
+ res = res
+
+ return crs, dtype, src_nodata, nodata, res, bounds
+
+
+def _get_target_georeferenced_grid(
+ raster: gu.Raster,
+ crs: CRS | str | int | None = None,
+ grid_size: tuple[int, int] | None = None,
+ res: int | float | Iterable[float] | None = None,
+ bounds: dict[str, float] | rio.coords.BoundingBox | None = None,
+) -> tuple[affine.Affine, tuple[int, int]]:
+ """
+ Derive the georeferencing parameters (transform, size) for the target grid.
+
+ Needed to reproject a raster to a different grid (resolution or size, bounds) and/or
+ coordinate reference system (CRS).
+
+ If requested bounds are incompatible with output resolution (would result in non integer number of pixels),
+ the bounds are rounded up to the nearest compatible value.
+
+ :param crs: Destination coordinate reference system as a string or EPSG. Defaults to this raster's CRS.
+ :param grid_size: Destination size as (ncol, nrow). Mutually exclusive with ``res``.
+ :param res: Destination resolution (pixel size) in units of destination CRS. Single value or (xres, yres).
+ Mutually exclusive with ``size``.
+ :param bounds: Destination bounds as a Rasterio bounding box, or a dictionary containing left, bottom,
+ right, top bounds in the destination CRS.
+
+ :returns: Calculated transform and size.
+ """
+ # --- Input sanity checks --- #
+ # check size and res are not both set
+ if (grid_size is not None) and (res is not None):
+ raise ValueError("size and res both specified. Specify only one.")
+
+ # Set CRS to input CRS by default
+ if crs is None:
+ crs = raster.crs
+
+ if grid_size is None:
+ width, height = None, None
+ else:
+ width, height = grid_size
+
+ # Convert bounds to BoundingBox
+ if bounds is not None:
+ if not isinstance(bounds, rio.coords.BoundingBox):
+ bounds = rio.coords.BoundingBox(
+ bounds["left"],
+ bounds["bottom"],
+ bounds["right"],
+ bounds["top"],
+ )
+
+ # If all georeferences are the same as input, skip calculating because of issue in
+ # rio.warp.calculate_default_transform (https://github.com/rasterio/rasterio/issues/3010)
+ if (
+ (crs == raster.crs)
+ & ((grid_size is None) | ((height == raster.shape[0]) & (width == raster.shape[1])))
+ & ((res is None) | np.all(np.array(res) == raster.res))
+ & ((bounds is None) | (bounds == raster.bounds))
+ ):
+ return raster.transform, raster.shape[::-1]
+
+ # --- First, calculate default transform ignoring any change in bounds --- #
+ tmp_transform, tmp_width, tmp_height = rio.warp.calculate_default_transform(
+ raster.crs,
+ crs,
+ raster.width,
+ raster.height,
+ left=raster.bounds.left,
+ right=raster.bounds.right,
+ top=raster.bounds.top,
+ bottom=raster.bounds.bottom,
+ resolution=res,
+ dst_width=width,
+ dst_height=height,
+ )
+
+ # If no bounds specified, can directly use output of rio.warp.calculate_default_transform
+ if bounds is None:
+ dst_size = (tmp_width, tmp_height)
+ dst_transform = tmp_transform
+
+ # --- Second, crop to requested bounds --- #
+ else:
+ # If output size and bounds are known, can use rio.transform.from_bounds to get dst_transform
+ if grid_size is not None:
+ dst_transform = rio.transform.from_bounds(
+ bounds.left, bounds.bottom, bounds.right, bounds.top, grid_size[0], grid_size[1]
+ )
+ dst_size = grid_size
+
+ else:
+ # Otherwise, need to calculate the new output size, rounded to nearest integer
+ ref_win = rio.windows.from_bounds(*list(bounds), tmp_transform).round_lengths()
+ dst_size = (int(ref_win.width), int(ref_win.height))
+
+ if res is not None:
+ # In this case, we force output resolution
+ if isinstance(res, tuple):
+ dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res[0], res[1])
+ else:
+ dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res, res)
+ else:
+ # In this case, we force output bounds
+ dst_transform = rio.transform.from_bounds(
+ bounds.left, bounds.bottom, bounds.right, bounds.top, dst_size[0], dst_size[1]
+ )
+
+ return dst_transform, dst_size
+
+
+def _get_reproj_params(
+ source_raster: gu.Raster,
+ crs: CRS,
+ res: float | Iterable[float] | None,
+ grid_size: tuple[int, int] | None,
+ bounds: dict[str, float] | rio.coords.BoundingBox | None,
+ dtype: DTypeLike,
+ src_nodata: int | float | None,
+ nodata: int | float | None,
+ resampling: Resampling | str,
+) -> dict[str, Any]:
+ """Get all reprojection parameters."""
+
+ # First, set basic reprojection options
+ reproj_kwargs = {
+ "src_transform": source_raster.transform,
+ "src_crs": source_raster.crs,
+ "resampling": resampling if isinstance(resampling, Resampling) else _resampling_method_from_str(resampling),
+ "src_nodata": src_nodata,
+ "dst_nodata": nodata,
+ }
+
+ # Second, determine target transform and grid size
+ transform, grid_size = _get_target_georeferenced_grid(
+ source_raster, crs=crs, grid_size=grid_size, res=res, bounds=bounds
+ )
+
+ # Finally, update reprojection options accordingly
+ reproj_kwargs.update({"dst_transform": transform})
+ data = np.ones((source_raster.count, grid_size[1], grid_size[0]), dtype=dtype)
+ reproj_kwargs.update({"destination": data})
+ reproj_kwargs.update({"dst_crs": crs})
+
+ return reproj_kwargs
+
+
+def _is_reproj_needed(src_shape: tuple[int, int], reproj_kwargs: dict[str, Any]) -> bool:
+ """Check if reprojection is actually needed based on transformation parameters."""
+
+ src_transform = reproj_kwargs["src_transform"]
+ transform = reproj_kwargs["dst_transform"]
+ src_crs = reproj_kwargs["src_crs"]
+ crs = reproj_kwargs["dst_crs"]
+ grid_size = reproj_kwargs["destination"].shape[1:][::-1]
+ src_res = _res(src_transform)
+ res = _res(transform)
+
+ # Caution, grid_size is (width, height) while shape is (height, width)
+ return all(
+ [
+ (transform == src_transform) or (transform is None),
+ (crs == src_crs) or (crs is None),
+ (grid_size == src_shape[::-1]) or (grid_size is None),
+ np.all(np.array(res) == src_res) or (res is None),
+ ]
+ )
+
+
+def _reproject(
+ source_raster: gu.Raster,
+ ref: gu.Raster,
+ crs: CRS | str | int | None = None,
+ res: float | Iterable[float] | None = None,
+ grid_size: tuple[int, int] | None = None,
+ bounds: dict[str, float] | rio.coords.BoundingBox | None = None,
+ nodata: int | float | None = None,
+ dtype: DTypeLike | None = None,
+ resampling: Resampling | str = Resampling.bilinear,
+ force_source_nodata: int | float | None = None,
+ silent: bool = False,
+ n_threads: int = 0,
+ memory_limit: int = 64,
+) -> tuple[bool, MArrayNum | None, affine.Affine | None, CRS | None, int | float | None]:
+ """
+ Reproject raster. See Raster.reproject() for details.
+ """
+
+ # 1/ Process user input
+ crs, dtype, src_nodata, nodata, res, bounds = _user_input_reproject(
+ source_raster=source_raster,
+ ref=ref,
+ crs=crs,
+ bounds=bounds,
+ res=res,
+ nodata=nodata,
+ dtype=dtype,
+ force_source_nodata=force_source_nodata,
+ )
+
+ # 2/ Derive georeferencing parameters for reprojection (transform, grid size)
+ reproj_kwargs = _get_reproj_params(
+ source_raster=source_raster,
+ crs=crs,
+ res=res,
+ grid_size=grid_size,
+ bounds=bounds,
+ dtype=dtype,
+ src_nodata=src_nodata,
+ nodata=nodata,
+ resampling=resampling,
+ )
+
+ # 3/ Check if reprojection is needed, otherwise return source raster with warning
+ if _is_reproj_needed(src_shape=source_raster.shape, reproj_kwargs=reproj_kwargs):
+ if (nodata == src_nodata) or (nodata is None):
+ if not silent:
+ warnings.warn("Output projection, bounds and grid size are identical -> returning self (not a copy!)")
+ return True, None, None, None, None
+
+ elif nodata is not None:
+ if not silent:
+ warnings.warn(
+ "Only nodata is different, consider using the 'set_nodata()' method instead'\
+ ' -> returning self (not a copy!)"
+ )
+ return True, None, None, None, None
+
+ # 4/ Perform reprojection
+
+ # --- Set the performance keywords --- #
+ if n_threads == 0:
+ # Default to cpu count minus one. If the cpu count is undefined, num_threads will be 1
+ cpu_count = os.cpu_count() or 2
+ num_threads = cpu_count - 1
+ else:
+ num_threads = n_threads
+ reproj_kwargs.update({"num_threads": num_threads, "warp_mem_limit": memory_limit})
+
+ # --- Run the reprojection of data --- #
+ # If data is loaded, reproject the numpy array directly
+ if source_raster.is_loaded:
+ # All masked values must be set to a nodata value for rasterio's reproject to work properly
+ # TODO: another option is to apply rio.warp.reproject to the mask to identify invalid pixels
+ if src_nodata is None and np.sum(source_raster.data.mask) > 0:
+ raise ValueError(
+ "No nodata set, set one for the raster with self.set_nodata() or use a temporary one "
+ "with `force_source_nodata`."
+ )
+
+ # Mask not taken into account by rasterio, need to fill with src_nodata
+ data, transformed = rio.warp.reproject(source_raster.data.filled(src_nodata), **reproj_kwargs)
+
+ # If not, uses the dataset instead
+ else:
+ data = [] # type: ignore
+ for k in range(source_raster.count):
+ with rio.open(source_raster.filename) as ds:
+ band = rio.band(ds, k + 1)
+ band, transformed = rio.warp.reproject(band, **reproj_kwargs)
+ data.append(band.squeeze())
+
+ data = np.array(data)
+
+ # Enforce output type
+ data = np.ma.masked_array(data.astype(dtype), fill_value=nodata)
+
+ if nodata is not None:
+ data.mask = data == nodata
+
+ # Check for funny business.
+ if reproj_kwargs["dst_transform"] is not None:
+ assert reproj_kwargs["dst_transform"] == transformed
+
+ return False, data, transformed, crs, nodata
+
+
+#########
+# 2/ CROP
+#########
+
+
+def _crop(
+ source_raster: gu.Raster,
+ crop_geom: gu.Raster | gu.Vector | list[float] | tuple[float, ...],
+ mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
+) -> tuple[MArrayNum, affine.Affine]:
+ """Crop raster. See details in Raster.crop()."""
+
+ assert mode in [
+ "match_extent",
+ "match_pixel",
+ ], "mode must be one of 'match_pixel', 'match_extent'"
+
+ if isinstance(crop_geom, (gu.Raster, gu.Vector)):
+ # For another Vector or Raster, we reproject the bounding box in the same CRS as self
+ xmin, ymin, xmax, ymax = crop_geom.get_bounds_projected(out_crs=source_raster.crs)
+ if isinstance(crop_geom, gu.Raster):
+ # Raise a warning if the reference is a raster that has a different pixel interpretation
+ _cast_pixel_interpretation(source_raster.area_or_point, crop_geom.area_or_point)
+ elif isinstance(crop_geom, (list, tuple)):
+ xmin, ymin, xmax, ymax = crop_geom
+ else:
+ raise ValueError("cropGeom must be a Raster, Vector, or list of coordinates.")
+
+ if mode == "match_pixel":
+ # Finding the intersection of requested bounds and original bounds, cropped to image shape
+ ref_win = rio.windows.from_bounds(xmin, ymin, xmax, ymax, transform=source_raster.transform)
+ self_win = rio.windows.from_bounds(*source_raster.bounds, transform=source_raster.transform).crop(
+ *source_raster.shape
+ )
+ final_window = ref_win.intersection(self_win).round_lengths().round_offsets()
+
+ # Update bounds and transform accordingly
+ new_xmin, new_ymin, new_xmax, new_ymax = rio.windows.bounds(final_window, transform=source_raster.transform)
+ tfm = rio.transform.from_origin(new_xmin, new_ymax, *source_raster.res)
+
+ if source_raster.is_loaded:
+ # In case data is loaded on disk, can extract directly from np array
+ (rowmin, rowmax), (colmin, colmax) = final_window.toranges()
+
+ if source_raster.count == 1:
+ crop_img = source_raster.data[rowmin:rowmax, colmin:colmax]
+ else:
+ crop_img = source_raster.data[:, rowmin:rowmax, colmin:colmax]
+ else:
+
+ assert source_raster._disk_shape is not None # This should not be the case, sanity check to make mypy happy
+
+ # If data was not loaded, and self's transform was updated (e.g. due to downsampling) need to
+ # get the Window corresponding to on disk data
+ ref_win_disk = rio.windows.from_bounds(
+ new_xmin, new_ymin, new_xmax, new_ymax, transform=source_raster._disk_transform
+ )
+ self_win_disk = rio.windows.from_bounds(
+ *source_raster.bounds, transform=source_raster._disk_transform
+ ).crop(*source_raster._disk_shape[1:])
+ final_window_disk = ref_win_disk.intersection(self_win_disk).round_lengths().round_offsets()
+
+ # Round up to downsampling size, to match __init__
+ final_window_disk = rio.windows.round_window_to_full_blocks(
+ final_window_disk, ((source_raster._downsample, source_raster._downsample),)
+ )
+
+ # Load data for "on_disk" window but out_shape matching in-memory transform -> enforce downsampling
+ # AD (24/04/24): Note that the same issue as #447 occurs here when final_window_disk extends beyond
+ # self's bounds. Using option `boundless=True` solves the issue but causes other tests to fail
+ # This should be fixed with #447 and previous line would be obsolete.
+ with rio.open(source_raster.filename) as raster:
+ crop_img = raster.read(
+ indexes=source_raster._bands,
+ masked=source_raster._masked,
+ window=final_window_disk,
+ out_shape=(final_window.height, final_window.width),
+ )
+
+ # Squeeze first axis for single-band
+ if crop_img.ndim == 3 and crop_img.shape[0] == 1:
+ crop_img = crop_img.squeeze(axis=0)
+
+ else:
+ bbox = rio.coords.BoundingBox(left=xmin, bottom=ymin, right=xmax, top=ymax)
+ out_rst = source_raster.reproject(bounds=bbox) # should we instead raise an issue and point to reproject?
+ crop_img = out_rst.data
+ tfm = out_rst.transform
+
+ return crop_img, tfm
+
+
+##############
+# 3/ TRANSLATE
+##############
+
+
+def _translate(
+ transform: affine.Affine,
+ xoff: float,
+ yoff: float,
+ distance_unit: Literal["georeferenced", "pixel"] = "georeferenced",
+) -> affine.Affine:
+ """
+ Translate geotransform horizontally, either in pixels or georeferenced units.
+
+ :param transform: Input geotransform.
+ :param xoff: Translation x offset.
+ :param yoff: Translation y offset.
+ :param distance_unit: Distance unit, either 'georeferenced' (default) or 'pixel'.
+
+ :return: Translated transform.
+ """
+
+ if distance_unit not in ["georeferenced", "pixel"]:
+ raise ValueError("Argument 'distance_unit' should be either 'pixel' or 'georeferenced'.")
+
+ # Get transform
+ dx, b, xmin, d, dy, ymax = list(transform)[:6]
+
+ # Convert pixel offsets to georeferenced units
+ if distance_unit == "pixel":
+ xoff *= dx
+ yoff *= abs(dy) # dy is negative
+
+ return rio.transform.Affine(dx, b, xmin + xoff, d, dy, ymax + yoff)
diff --git a/geoutils/raster/multiraster.py b/geoutils/raster/multiraster.py
index 838f332d..73851f9b 100644
--- a/geoutils/raster/multiraster.py
+++ b/geoutils/raster/multiraster.py
@@ -1,4 +1,5 @@
"""Multiple rasters tools."""
+
from __future__ import annotations
import warnings
@@ -11,9 +12,9 @@
import geoutils as gu
from geoutils._typing import NDArrayNum
-from geoutils.misc import resampling_method_from_str
-from geoutils.raster import Raster, RasterType, get_array_and_mask
-from geoutils.raster.raster import _default_nodata
+from geoutils.raster.array import _get_array_and_mask
+from geoutils.raster.geotransformations import _resampling_method_from_str
+from geoutils.raster.raster import RasterType, _default_nodata
def load_multiple_rasters(
@@ -141,7 +142,7 @@ def stack_rasters(
"""
# Check resampling method
if isinstance(resampling_method, str):
- resampling_method = resampling_method_from_str(resampling_method)
+ resampling_method = _resampling_method_from_str(resampling_method)
# Check raster has a single band
if any(r.count > 1 for r in rasters):
@@ -193,7 +194,7 @@ def stack_rasters(
# Optionally calculate difference
if diff:
diff_to_ref = (reference_raster.data - reprojected_raster.data).squeeze()
- diff_to_ref, _ = get_array_and_mask(diff_to_ref)
+ diff_to_ref, _ = _get_array_and_mask(diff_to_ref)
data.append(diff_to_ref)
else:
# img_data, _ = get_array_and_mask(reprojected_raster.data.squeeze())
@@ -228,7 +229,7 @@ def stack_rasters(
def merge_rasters(
rasters: list[RasterType],
- reference: int | Raster = 0,
+ reference: int | RasterType = 0,
merge_algorithm: Callable | list[Callable] = np.nanmean, # type: ignore
resampling_method: str | rio.enums.Resampling = "bilinear",
use_ref_bounds: bool = False,
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 4c4fe501..69dfcd1b 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1,16 +1,16 @@
"""
-geoutils.raster provides a toolset for working with raster data.
+Module for Raster class.
"""
+
from __future__ import annotations
import math
-import os
import pathlib
import warnings
from collections import abc
from contextlib import ExitStack
from math import floor
-from typing import IO, Any, Callable, Iterable, TypeVar, overload
+from typing import IO, Any, Callable, TypeVar, overload
import affine
import geopandas as gpd
@@ -18,7 +18,6 @@
import matplotlib.pyplot as plt
import numpy as np
import rasterio as rio
-import rasterio.warp
import rasterio.windows
import rioxarray
import xarray as xr
@@ -27,11 +26,8 @@
from packaging.version import Version
from rasterio.crs import CRS
from rasterio.enums import Resampling
-from rasterio.features import shapes
from rasterio.plot import show as rshow
-from scipy.ndimage import distance_transform_edt
-import geoutils.vector as gv
from geoutils._config import config
from geoutils._typing import (
ArrayLike,
@@ -42,6 +38,13 @@
NDArrayNum,
Number,
)
+from geoutils.interface.distance import _proximity_from_vector_or_raster
+from geoutils.interface.interpolate import _interp_points
+from geoutils.interface.raster_point import (
+ _raster_to_pointcloud,
+ _regular_pointcloud_to_raster,
+)
+from geoutils.interface.raster_vector import _polygonize
from geoutils.misc import deprecate
from geoutils.projtools import (
_get_bounds_projected,
@@ -49,18 +52,20 @@
_get_utm_ups_crs,
reproject_from_latlon,
)
-from geoutils.raster.array import get_mask_from_array
from geoutils.raster.georeferencing import (
_bounds,
+ _cast_nodata,
+ _cast_pixel_interpretation,
_coords,
+ _default_nodata,
_ij2xy,
_outside_image,
_res,
_xy2ij,
)
-from geoutils.raster.interpolate import _interp_points
+from geoutils.raster.geotransformations import _crop, _reproject, _translate
from geoutils.raster.sampling import subsample_array
-from geoutils.vector import Vector
+from geoutils.vector.vector import Vector
# If python38 or above, Literal is builtin. Otherwise, use typing_extensions
try:
@@ -137,46 +142,6 @@
]
handled_array_funcs = _HANDLED_FUNCTIONS_1NIN + _HANDLED_FUNCTIONS_2NIN
-
-# Function to set the default nodata values for any given dtype
-# Similar to GDAL for int types, but without absurdly long nodata values for floats.
-# For unsigned types, the maximum value is chosen (with a max of 99999).
-# For signed types, the minimum value is chosen (with a min of -99999).
-def _default_nodata(dtype: DTypeLike) -> int:
- """
- Set the default nodata value for any given dtype, when this is not provided.
- """
- default_nodata_lookup = {
- "uint8": 255,
- "int8": -128,
- "uint16": 65535,
- "int16": -32768,
- "uint32": 99999,
- "int32": -99999,
- "float16": -99999,
- "float32": -99999,
- "float64": -99999,
- "float128": -99999,
- "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility
- }
- # Check argument dtype is as expected
- if not isinstance(dtype, (str, np.dtype, type)):
- raise TypeError(f"dtype {dtype} not understood.")
-
- # Convert numpy types to string
- if isinstance(dtype, type):
- dtype = np.dtype(dtype).name
-
- # Convert np.dtype to string
- if isinstance(dtype, np.dtype):
- dtype = dtype.name
-
- if dtype in default_nodata_lookup.keys():
- return default_nodata_lookup[dtype]
- else:
- raise NotImplementedError(f"No default nodata value set for dtype {dtype}.")
-
-
# Set default attributes to be kept from rasterio's DatasetReader
_default_rio_attrs = [
"bounds",
@@ -260,195 +225,6 @@ def _load_rio(
return data
-def _get_reproject_params(
- raster: RasterType,
- crs: CRS | str | int | None = None,
- grid_size: tuple[int, int] | None = None,
- res: int | float | abc.Iterable[float] | None = None,
- bounds: dict[str, float] | rio.coords.BoundingBox | None = None,
-) -> tuple[Affine, tuple[int, int]]:
- """
- Returns the parameters (transform, size) needed to reproject a raster to a different grid (resolution or
- size, bounds) and/or coordinate reference system (CRS).
-
- If requested bounds are incompatible with output resolution (would result in non integer number of pixels),
- the bounds are rounded up to the nearest compatible value.
-
- :param crs: Destination coordinate reference system as a string or EPSG. Defaults to this raster's CRS.
- :param grid_size: Destination size as (ncol, nrow). Mutually exclusive with ``res``.
- :param res: Destination resolution (pixel size) in units of destination CRS. Single value or (xres, yres).
- Mutually exclusive with ``size``.
- :param bounds: Destination bounds as a Rasterio bounding box, or a dictionary containing left, bottom,
- right, top bounds in the destination CRS.
-
- :returns: Calculated transform and size.
- """
- # --- Input sanity checks --- #
- # check size and res are not both set
- if (grid_size is not None) and (res is not None):
- raise ValueError("size and res both specified. Specify only one.")
-
- # Set CRS to input CRS by default
- if crs is None:
- crs = raster.crs
-
- if grid_size is None:
- width, height = None, None
- else:
- width, height = grid_size
-
- # Convert bounds to BoundingBox
- if bounds is not None:
- if not isinstance(bounds, rio.coords.BoundingBox):
- bounds = rio.coords.BoundingBox(
- bounds["left"],
- bounds["bottom"],
- bounds["right"],
- bounds["top"],
- )
-
- # If all georeferences are the same as input, skip calculating because of issue in
- # rio.warp.calculate_default_transform (https://github.com/rasterio/rasterio/issues/3010)
- if (
- (crs == raster.crs)
- & ((grid_size is None) | ((height == raster.shape[0]) & (width == raster.shape[1])))
- & ((res is None) | np.all(np.array(res) == raster.res))
- & ((bounds is None) | (bounds == raster.bounds))
- ):
- return raster.transform, raster.shape[::-1]
-
- # --- First, calculate default transform ignoring any change in bounds --- #
- tmp_transform, tmp_width, tmp_height = rio.warp.calculate_default_transform(
- raster.crs,
- crs,
- raster.width,
- raster.height,
- left=raster.bounds.left,
- right=raster.bounds.right,
- top=raster.bounds.top,
- bottom=raster.bounds.bottom,
- resolution=res,
- dst_width=width,
- dst_height=height,
- )
-
- # If no bounds specified, can directly use output of rio.warp.calculate_default_transform
- if bounds is None:
- dst_size = (tmp_width, tmp_height)
- dst_transform = tmp_transform
-
- # --- Second, crop to requested bounds --- #
- else:
- # If output size and bounds are known, can use rio.transform.from_bounds to get dst_transform
- if grid_size is not None:
- dst_transform = rio.transform.from_bounds(
- bounds.left, bounds.bottom, bounds.right, bounds.top, grid_size[0], grid_size[1]
- )
- dst_size = grid_size
-
- else:
- # Otherwise, need to calculate the new output size, rounded to nearest integer
- ref_win = rio.windows.from_bounds(*list(bounds), tmp_transform).round_lengths()
- dst_size = (int(ref_win.width), int(ref_win.height))
-
- if res is not None:
- # In this case, we force output resolution
- if isinstance(res, tuple):
- dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res[0], res[1])
- else:
- dst_transform = rio.transform.from_origin(bounds.left, bounds.top, res, res)
- else:
- # In this case, we force output bounds
- dst_transform = rio.transform.from_bounds(
- bounds.left, bounds.bottom, bounds.right, bounds.top, dst_size[0], dst_size[1]
- )
-
- return dst_transform, dst_size
-
-
-def _cast_pixel_interpretation(
- area_or_point1: Literal["Area", "Point"] | None, area_or_point2: Literal["Area", "Point"] | None
-) -> Literal["Area", "Point"] | None:
- """
- Cast two pixel interpretations and warn if not castable.
-
- Casts to:
- - "Area" if both are "Area",
- - "Point" if both are "Point",
- - None if any of the interpretation is None, or
- - None if one is "Area" and the other "Point" (and raises a warning).
- """
-
- # If one is None, cast to None
- if area_or_point1 is None or area_or_point2 is None:
- area_or_point_out = None
- # If both are equal and not None
- elif area_or_point1 == area_or_point2:
- area_or_point_out = area_or_point1
- else:
- area_or_point_out = None
- msg = (
- 'One raster has a pixel interpretation "Area" and the other "Point". To silence this warning, '
- "either correct the pixel interpretation of one raster, or deactivate "
- 'warnings of pixel interpretation with geoutils.config["warn_area_or_point"]=False.'
- )
- if config["warn_area_or_point"]:
- warnings.warn(message=msg, category=UserWarning)
-
- return area_or_point_out
-
-
-def _cast_nodata(out_dtype: DTypeLike, nodata: int | float | None) -> int | float | None:
- """
- Cast nodata value for output data type to default nodata if incompatible.
-
- :param out_dtype: Dtype of output array.
- :param nodata: Nodata value.
-
- :return: Cast nodata value.
- """
-
- if out_dtype == bool:
- nodata = None
- if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, out_dtype):
- nodata = _default_nodata(out_dtype)
- else:
- nodata = nodata
-
- return nodata
-
-
-def _shift_transform(
- transform: affine.Affine,
- xoff: float,
- yoff: float,
- distance_unit: Literal["georeferenced", "pixel"] = "georeferenced",
-) -> affine.Affine:
- """
- Shift geotransform horizontally, either in pixels or georeferenced units.
-
- :param transform: Input geotransform.
- :param xoff: Translation x offset.
- :param yoff: Translation y offset.
- :param distance_unit: Distance unit, either 'georeferenced' (default) or 'pixel'.
-
- :return: Shifted transform.
- """
-
- if distance_unit not in ["georeferenced", "pixel"]:
- raise ValueError("Argument 'distance_unit' should be either 'pixel' or 'georeferenced'.")
-
- # Get transform
- dx, b, xmin, d, dy, ymax = list(transform)[:6]
-
- # Convert pixel offsets to georeferenced units
- if distance_unit == "pixel":
- xoff *= dx
- yoff *= abs(dy) # dy is negative
-
- return rio.transform.Affine(dx, b, xmin + xoff, d, dy, ymax + yoff)
-
-
def _cast_numeric_array_raster(
raster: RasterType, other: RasterType | NDArrayNum | Number, operation_name: str
) -> tuple[MArrayNum, MArrayNum | NDArrayNum | Number, float | int | None, Literal["Area", "Point"] | None]:
@@ -567,12 +343,9 @@ class Raster:
def __init__(
self,
- filename_or_dataset: str
- | pathlib.Path
- | RasterType
- | rio.io.DatasetReader
- | rio.io.MemoryFile
- | dict[str, Any],
+ filename_or_dataset: (
+ str | pathlib.Path | RasterType | rio.io.DatasetReader | rio.io.MemoryFile | dict[str, Any]
+ ),
bands: int | list[int] | None = None,
load_data: bool = False,
downsample: Number = 1,
@@ -1658,18 +1431,15 @@ def __ge__(self: RasterType, other: RasterType | NDArrayNum | Number) -> RasterT
@overload
def astype(
self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: Literal[False] = False
- ) -> RasterType:
- ...
+ ) -> RasterType: ...
@overload
- def astype(self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: Literal[True]) -> None:
- ...
+ def astype(self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: Literal[True]) -> None: ...
@overload
def astype(
self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, *, inplace: bool = False
- ) -> RasterType | None:
- ...
+ ) -> RasterType | None: ...
def astype(
self: RasterType, dtype: DTypeLike, convert_nodata: bool = True, inplace: bool = False
@@ -2068,12 +1838,10 @@ def set_mask(self, mask: NDArrayBool | Mask) -> None:
self.data[mask_arr > 0] = np.ma.masked
@overload
- def info(self, stats: bool = False, *, verbose: Literal[True] = ...) -> None:
- ...
+ def info(self, stats: bool = False, *, verbose: Literal[True] = ...) -> None: ...
@overload
- def info(self, stats: bool = False, *, verbose: Literal[False]) -> str:
- ...
+ def info(self, stats: bool = False, *, verbose: Literal[False]) -> str: ...
def info(self, stats: bool = False, verbose: bool = True) -> None | str:
"""
@@ -2169,12 +1937,10 @@ def georeferenced_grid_equal(self: RasterType, raster: RasterType) -> bool:
return all([self.shape == raster.shape, self.transform == raster.transform, self.crs == raster.crs])
@overload
- def get_nanarray(self, return_mask: Literal[False] = False) -> NDArrayNum:
- ...
+ def get_nanarray(self, return_mask: Literal[False] = False) -> NDArrayNum: ...
@overload
- def get_nanarray(self, return_mask: Literal[True]) -> tuple[NDArrayNum, NDArrayBool]:
- ...
+ def get_nanarray(self, return_mask: Literal[True]) -> tuple[NDArrayNum, NDArrayBool]: ...
def get_nanarray(self, return_mask: bool = False) -> NDArrayNum | tuple[NDArrayNum, NDArrayBool]:
"""
@@ -2417,8 +2183,7 @@ def crop(
mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
*,
inplace: Literal[False] = False,
- ) -> RasterType:
- ...
+ ) -> RasterType: ...
@overload
def crop(
@@ -2427,8 +2192,7 @@ def crop(
mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
*,
inplace: Literal[True],
- ) -> None:
- ...
+ ) -> None: ...
@overload
def crop(
@@ -2437,8 +2201,7 @@ def crop(
mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
*,
inplace: bool = False,
- ) -> RasterType | None:
- ...
+ ) -> RasterType | None: ...
def crop(
self: RasterType,
@@ -2464,80 +2227,8 @@ def crop(
:returns: A new raster (or None if inplace).
"""
- assert mode in [
- "match_extent",
- "match_pixel",
- ], "mode must be one of 'match_pixel', 'match_extent'"
-
- if isinstance(crop_geom, (Raster, Vector)):
- # For another Vector or Raster, we reproject the bounding box in the same CRS as self
- xmin, ymin, xmax, ymax = crop_geom.get_bounds_projected(out_crs=self.crs)
- if isinstance(crop_geom, Raster):
- # Raise a warning if the reference is a raster that has a different pixel interpretation
- _cast_pixel_interpretation(self.area_or_point, crop_geom.area_or_point)
- elif isinstance(crop_geom, (list, tuple)):
- xmin, ymin, xmax, ymax = crop_geom
- else:
- raise ValueError("cropGeom must be a Raster, Vector, or list of coordinates.")
-
- if mode == "match_pixel":
- # Finding the intersection of requested bounds and original bounds, cropped to image shape
- ref_win = rio.windows.from_bounds(xmin, ymin, xmax, ymax, transform=self.transform)
- self_win = rio.windows.from_bounds(*self.bounds, transform=self.transform).crop(*self.shape)
- final_window = ref_win.intersection(self_win).round_lengths().round_offsets()
-
- # Update bounds and transform accordingly
- new_xmin, new_ymin, new_xmax, new_ymax = rio.windows.bounds(final_window, transform=self.transform)
- tfm = rio.transform.from_origin(new_xmin, new_ymax, *self.res)
-
- if self.is_loaded:
- # In case data is loaded on disk, can extract directly from np array
- (rowmin, rowmax), (colmin, colmax) = final_window.toranges()
-
- if self.count == 1:
- crop_img = self.data[rowmin:rowmax, colmin:colmax]
- else:
- crop_img = self.data[:, rowmin:rowmax, colmin:colmax]
- else:
- assert self._disk_shape is not None # This should not be the case, sanity check to make mypy happy
-
- # If data was not loaded, and self's transform was updated (e.g. due to downsampling) need to
- # get the Window corresponding to on disk data
- ref_win_disk = rio.windows.from_bounds(
- new_xmin, new_ymin, new_xmax, new_ymax, transform=self._disk_transform
- )
- self_win_disk = rio.windows.from_bounds(*self.bounds, transform=self._disk_transform).crop(
- *self._disk_shape[1:]
- )
- final_window_disk = ref_win_disk.intersection(self_win_disk).round_lengths().round_offsets()
-
- # Round up to downsampling size, to match __init__
- final_window_disk = rio.windows.round_window_to_full_blocks(
- final_window_disk, ((self._downsample, self._downsample),)
- )
-
- # Load data for "on_disk" window but out_shape matching in-memory transform -> enforce downsampling
- # AD (24/04/24): Note that the same issue as #447 occurs here when final_window_disk extends beyond
- # self's bounds. Using option `boundless=True` solves the issue but causes other tests to fail
- # This should be fixed with #447 and previous line would be obsolete.
- with rio.open(self.filename) as raster:
- crop_img = raster.read(
- indexes=self._bands,
- masked=self._masked,
- window=final_window_disk,
- out_shape=(final_window.height, final_window.width),
- )
-
- # Squeeze first axis for single-band
- if crop_img.ndim == 3 and crop_img.shape[0] == 1:
- crop_img = crop_img.squeeze(axis=0)
-
- else:
- bbox = rio.coords.BoundingBox(left=xmin, bottom=ymin, right=xmax, top=ymax)
- out_rst = self.reproject(bounds=bbox) # should we instead raise an issue and point to reproject?
- crop_img = out_rst.data
- tfm = out_rst.transform
+ crop_img, tfm = _crop(source_raster=self, crop_geom=crop_geom, mode=mode)
if inplace:
self._data = crop_img
@@ -2564,8 +2255,7 @@ def reproject(
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
- ) -> RasterType:
- ...
+ ) -> RasterType: ...
@overload
def reproject(
@@ -2584,8 +2274,7 @@ def reproject(
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
- ) -> None:
- ...
+ ) -> None: ...
@overload
def reproject(
@@ -2604,8 +2293,7 @@ def reproject(
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
- ) -> RasterType | None:
- ...
+ ) -> RasterType | None: ...
def reproject(
self: RasterType,
@@ -2632,7 +2320,6 @@ def reproject(
Any resampling algorithm implemented in Rasterio can be passed as a string.
-
:param ref: Reference raster to match resolution, bounds and CRS.
:param crs: Destination coordinate reference system as a string or EPSG. If ``ref`` not set,
defaults to this raster's CRS.
@@ -2656,185 +2343,42 @@ def reproject(
:returns: Reprojected raster (or None if inplace).
"""
- # --- Sanity checks on inputs and defaults -- #
- # Check that either ref or crs is provided
- if ref is not None and crs is not None:
- raise ValueError("Either of `ref` or `crs` must be set. Not both.")
- # If none are provided, simply preserve the CRS
- elif ref is None and crs is None:
- crs = self.crs
-
- # Set output dtype
- if dtype is None:
- # Warning: this will not work for multiple bands with different dtypes
- dtype = self.dtype
-
- # --- Set source nodata if provided -- #
- if force_source_nodata is None:
- src_nodata = self.nodata
- else:
- src_nodata = force_source_nodata
- # Raise warning if a different nodata value exists for this raster than the forced one (not None)
- if self.nodata is not None:
- warnings.warn(
- "Forcing source nodata value of {} despite an existing nodata value of {} in the raster. "
- "To silence this warning, use self.set_nodata() before reprojection instead of forcing.".format(
- force_source_nodata, self.nodata
- )
- )
- # --- Set destination nodata if provided -- #
- # This is needed in areas not covered by the input data.
- # If None, will use GeoUtils' default, as rasterio's default is unknown, hence cannot be handled properly.
- if nodata is None:
- nodata = self.nodata
- if nodata is None:
- nodata = _default_nodata(dtype)
- # If nodata is already being used, raise a warning.
- # TODO: for uint8, if all values are used, apply rio.warp to mask to identify invalid values
- if not self.is_loaded:
- warnings.warn(
- f"For reprojection, nodata must be set. Setting default nodata to {nodata}. You may "
- f"set a different nodata with `nodata`."
- )
-
- elif nodata in self.data:
- warnings.warn(
- f"For reprojection, nodata must be set. Default chosen value {nodata} exists in "
- f"self.data. This may have unexpected consequences. Consider setting a different nodata with "
- f"self.set_nodata()."
- )
-
- # Create a BoundingBox if required
- if bounds is not None:
- if not isinstance(bounds, rio.coords.BoundingBox):
- bounds = rio.coords.BoundingBox(
- bounds["left"],
- bounds["bottom"],
- bounds["right"],
- bounds["top"],
- )
+ # Reproject
+ return_copy, data, transformed, crs, nodata = _reproject(
+ source_raster=self,
+ ref=ref,
+ crs=crs,
+ res=res,
+ grid_size=grid_size,
+ bounds=bounds,
+ nodata=nodata,
+ dtype=dtype,
+ resampling=resampling,
+ force_source_nodata=force_source_nodata,
+ silent=silent,
+ n_threads=n_threads,
+ memory_limit=memory_limit,
+ )
- from geoutils.misc import resampling_method_from_str
-
- # --- Basic reprojection options, needed in all cases. --- #
- reproj_kwargs = {
- "src_transform": self.transform,
- "src_crs": self.crs,
- "resampling": resampling if isinstance(resampling, Resampling) else resampling_method_from_str(resampling),
- "src_nodata": src_nodata,
- "dst_nodata": nodata,
- }
-
- # --- Calculate output georeferences (transform, grid size)
-
- # Case a raster is provided as reference
- if ref is not None:
- # Check that ref type is either str, Raster or rasterio data set
- # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45
- if isinstance(ref, Raster):
- # Raise a warning if the reference is a raster that has a different pixel interpretation
- _cast_pixel_interpretation(self.area_or_point, ref.area_or_point)
- ds_ref = ref
- elif isinstance(ref, str):
- if not os.path.exists(ref):
- raise ValueError("Reference raster does not exist.")
- ds_ref = Raster(ref, load_data=False)
+ # If return copy is True (target georeferenced grid was the same as input)
+ if return_copy:
+ if inplace:
+ return None
else:
- raise TypeError("Type of ref not understood, must be path to file (str), Raster.")
-
- # Read reprojecting params from ref raster
- crs = ds_ref.crs
- res = ds_ref.res
- bounds = ds_ref.bounds
- else:
- # Determine target CRS
- crs = CRS.from_user_input(crs)
-
- # Determine target transform and grid size
- transform, grid_size = _get_reproject_params(self, crs=crs, grid_size=grid_size, res=res, bounds=bounds)
-
- # Update reprojection options accordingly
- reproj_kwargs.update({"dst_transform": transform})
- data = np.ones((self.count, grid_size[1], grid_size[0]), dtype=dtype)
- reproj_kwargs.update({"destination": data})
- reproj_kwargs.update({"dst_crs": crs})
-
- # --- Check that reprojection is actually needed --- #
- # Caution, grid_size is (width, height) while shape is (height, width)
- if all(
- [
- (transform == self.transform) or (transform is None),
- (crs == self.crs) or (crs is None),
- (grid_size == self.shape[::-1]) or (grid_size is None),
- np.all(np.array(res) == self.res) or (res is None),
- ]
- ):
- if (nodata == self.nodata) or (nodata is None):
- if not silent:
- warnings.warn(
- "Output projection, bounds and grid size are identical -> returning self (not a copy!)"
- )
- return self
-
- elif nodata is not None:
- if not silent:
- warnings.warn(
- "Only nodata is different, consider using the 'set_nodata()' method instead'\
- ' -> returning self (not a copy!)"
- )
return self
- # --- Set the performance keywords --- #
- if n_threads == 0:
- # Default to cpu count minus one. If the cpu count is undefined, num_threads will be 1
- cpu_count = os.cpu_count() or 2
- num_threads = cpu_count - 1
- else:
- num_threads = n_threads
- reproj_kwargs.update({"num_threads": num_threads, "warp_mem_limit": memory_limit})
-
- # --- Run the reprojection of data --- #
- # If data is loaded, reproject the numpy array directly
- if self.is_loaded:
- # All masked values must be set to a nodata value for rasterio's reproject to work properly
- # TODO: another option is to apply rio.warp.reproject to the mask to identify invalid pixels
- if src_nodata is None and np.sum(self.data.mask) > 0:
- raise ValueError(
- "No nodata set, set one for the raster with self.set_nodata() or use a temporary one "
- "with `force_source_nodata`."
- )
-
- # Mask not taken into account by rasterio, need to fill with src_nodata
- data, transformed = rio.warp.reproject(self.data.filled(src_nodata), **reproj_kwargs)
-
- # If not, uses the dataset instead
- else:
- data = [] # type: ignore
- for k in range(self.count):
- with rio.open(self.filename) as ds:
- band = rio.band(ds, k + 1)
- band, transformed = rio.warp.reproject(band, **reproj_kwargs)
- data.append(band.squeeze())
-
- data = np.array(data)
-
- # Enforce output type
- data = np.ma.masked_array(data.astype(dtype), fill_value=nodata)
-
- if nodata is not None:
- data.mask = data == nodata
-
- # Check for funny business.
- if transform is not None:
- assert transform == transformed
+ # To make MyPy happy without overload for _reproject (as it might re-structured soon anyway)
+ assert data is not None
+ assert transformed is not None
+ assert crs is not None
# Write results to a new Raster.
if inplace:
# Order is important here, because calling self.data will use nodata to mask the array properly
self._crs = crs
self._nodata = nodata
- self._transform = transform
+ self._transform = transformed
# A little trick to force the right shape of data in, then update the mask properly through the data setter
self._data = data.squeeze()
self.data = data
@@ -2850,8 +2394,7 @@ def translate(
distance_unit: Literal["georeferenced"] | Literal["pixel"] = "georeferenced",
*,
inplace: Literal[False] = False,
- ) -> RasterType:
- ...
+ ) -> RasterType: ...
@overload
def translate(
@@ -2861,8 +2404,7 @@ def translate(
distance_unit: Literal["georeferenced"] | Literal["pixel"] = "georeferenced",
*,
inplace: Literal[True],
- ) -> None:
- ...
+ ) -> None: ...
@overload
def translate(
@@ -2872,8 +2414,7 @@ def translate(
distance_unit: Literal["georeferenced"] | Literal["pixel"] = "georeferenced",
*,
inplace: bool = False,
- ) -> RasterType | None:
- ...
+ ) -> RasterType | None: ...
def translate(
self: RasterType,
@@ -2883,27 +2424,27 @@ def translate(
inplace: bool = False,
) -> RasterType | None:
"""
- Shift a raster by a (x,y) offset.
+ Translate a raster by a (x,y) offset.
- The shifting only updates the geotransform (no resampling is performed).
+ The translation only updates the geotransform (no resampling is performed).
:param xoff: Translation x offset.
:param yoff: Translation y offset.
:param distance_unit: Distance unit, either 'georeferenced' (default) or 'pixel'.
:param inplace: Whether to modify the raster in-place.
- :returns: Shifted raster (or None if inplace).
+ :returns: Translated raster (or None if inplace).
"""
- shifted_transform = _shift_transform(self.transform, xoff=xoff, yoff=yoff, distance_unit=distance_unit)
+ translated_transform = _translate(self.transform, xoff=xoff, yoff=yoff, distance_unit=distance_unit)
if inplace:
- # Overwrite transform by shifted transform
- self.transform = shifted_transform
+ # Overwrite transform by translated transform
+ self.transform = translated_transform
return None
else:
raster_copy = self.copy()
- raster_copy.transform = shifted_transform
+ raster_copy.transform = translated_transform
return raster_copy
def save(
@@ -3256,10 +2797,10 @@ def plot(
# Set colorbar min/max values (needed for ScalarMappable)
if vmin is None:
- vmin = np.nanmin(data)
+ vmin = float(np.nanmin(data))
if vmax is None:
- vmax = np.nanmax(data)
+ vmax = float(np.nanmax(data))
# Make sure they are numbers, to avoid mpl error
try:
@@ -3725,8 +3266,7 @@ def to_pointcloud(
as_array: Literal[False] = False,
random_state: int | np.random.Generator | None = None,
force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul",
- ) -> NDArrayNum:
- ...
+ ) -> NDArrayNum: ...
@overload
def to_pointcloud(
@@ -3741,8 +3281,7 @@ def to_pointcloud(
as_array: Literal[True],
random_state: int | np.random.Generator | None = None,
force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul",
- ) -> Vector:
- ...
+ ) -> Vector: ...
@overload
def to_pointcloud(
@@ -3757,8 +3296,7 @@ def to_pointcloud(
as_array: bool = False,
random_state: int | np.random.Generator | None = None,
force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul",
- ) -> NDArrayNum | Vector:
- ...
+ ) -> NDArrayNum | Vector: ...
def to_pointcloud(
self,
@@ -3818,146 +3356,19 @@ def to_pointcloud(
:returns: A point cloud, or array of the shape (N, 2 + count) where N is the sample count.
"""
- # Input checks
-
- # Main data column checks
- if not isinstance(data_column_name, str):
- raise ValueError("Data column name must be a string.")
- if not (isinstance(data_band, int) and data_band >= 1 and data_band <= self.count):
- raise ValueError(
- f"Data band number must be an integer between 1 and the total number of bands ({self.count})."
- )
-
- # Rename data column if a different band is selected but the name is still default
- if data_band != 1 and data_column_name == "b1":
- data_column_name = "b" + str(data_band)
-
- # Auxiliary data columns checks
- if auxiliary_column_names is not None and auxiliary_data_bands is None:
- raise ValueError("Passing auxiliary column names requires passing auxiliary data band numbers as well.")
- if auxiliary_data_bands is not None:
- if not (
- isinstance(auxiliary_data_bands, Iterable) and all(isinstance(b, int) for b in auxiliary_data_bands)
- ):
- raise ValueError("Auxiliary data band number must be an iterable containing only integers.")
- if any((1 > b or self.count < b) for b in auxiliary_data_bands):
- raise ValueError(
- f"Auxiliary data band numbers must be between 1 and the total number of bands ({self.count})."
- )
- if data_band in auxiliary_data_bands:
- raise ValueError(
- f"Main data band {data_band} should not be listed in auxiliary data bands {auxiliary_data_bands}."
- )
-
- # Ensure auxiliary column name is defined if auxiliary data bands is not None
- if auxiliary_column_names is not None:
- if not (
- isinstance(auxiliary_column_names, Iterable)
- and all(isinstance(b, str) for b in auxiliary_column_names)
- ):
- raise ValueError("Auxiliary column names must be an iterable containing only strings.")
- if not len(auxiliary_column_names) == len(auxiliary_data_bands):
- raise ValueError(
- f"Length of auxiliary column name and data band numbers should be the same, "
- f"found {len(auxiliary_column_names)} and {len(auxiliary_data_bands)} respectively."
- )
-
- else:
- auxiliary_column_names = [f"b{i}" for i in auxiliary_data_bands]
-
- # Define bigger list with all bands and names
- all_bands = [data_band] + auxiliary_data_bands
- all_column_names = [data_column_name] + auxiliary_column_names
-
- else:
- all_bands = [data_band]
- all_column_names = [data_column_name]
-
- # If subsample is the entire array, load it to optimize speed
- if subsample == 1 and not self.is_loaded:
- self.load(bands=all_bands)
-
- # Band indexes in the array are band number minus one
- all_indexes = [b - 1 for b in all_bands]
-
- # We do 2D subsampling on the data band only, regardless of valid masks on other bands
- if skip_nodata:
- if self.is_loaded:
- if self.count == 1:
- self_mask = get_mask_from_array(
- self.data
- ) # This is to avoid the case where the mask is just "False"
- else:
- self_mask = get_mask_from_array(
- self.data[data_band - 1, :, :]
- ) # This is to avoid the case where the mask is just "False"
- valid_mask = ~self_mask
-
- # Load only mask of valid data from disk if array not loaded
- else:
- valid_mask = ~self._load_only_mask(bands=data_band)
- # If we are not skipping nodata values, valid mask is everywhere
- else:
- if self.count == 1:
- valid_mask = np.ones(self.data.shape, dtype=bool)
- else:
- valid_mask = np.ones(self.data[0, :].shape, dtype=bool)
-
- # Get subsample on valid mask
- # Build a low memory boolean masked array with invalid values masked to pass to subsampling
- ma_valid = np.ma.masked_array(data=np.ones(np.shape(valid_mask), dtype=bool), mask=~valid_mask)
- # Take a subsample within the valid values
- indices = subsample_array(array=ma_valid, subsample=subsample, random_state=random_state, return_indices=True)
-
- # If the Raster is loaded, pick from the data while ignoring the mask
- if self.is_loaded:
- if self.count == 1:
- pixel_data = self.data[indices[0], indices[1]]
- else:
- # TODO: Combining both indexes at once could reduce memory usage?
- pixel_data = self.data[all_indexes, :][:, indices[0], indices[1]]
-
- # Otherwise use rasterio.sample to load only requested pixels
- else:
- # Extract the coordinates at subsampled pixels with valid data
- # To extract data, we always use "upper left" which rasterio interprets as the exact raster coordinates
- # Further below we redefine output coordinates based on point interpretation
- x_coords, y_coords = (np.array(a) for a in self.ij2xy(indices[0], indices[1], force_offset="ul"))
-
- with rio.open(self.filename) as raster:
- # Rasterio uses indexes (starts at 1)
- pixel_data = np.array(list(raster.sample(zip(x_coords, y_coords), indexes=all_bands))).T
-
- # At this point there should not be any nodata anymore, so we can transform everything to normal array
- if np.ma.isMaskedArray(pixel_data):
- pixel_data = pixel_data.data
-
- # If nodata values were not skipped, convert them to NaNs and change data type
- if skip_nodata is False:
- pixel_data = pixel_data.astype("float32")
- pixel_data[pixel_data == self.nodata] = np.nan
-
- # Now we force the coordinates we define for the point cloud, according to pixel interpretation
- x_coords_2, y_coords_2 = (
- np.array(a) for a in self.ij2xy(indices[0], indices[1], force_offset=force_pixel_offset)
+ return _raster_to_pointcloud(
+ source_raster=self,
+ data_column_name=data_column_name,
+ data_band=data_band,
+ auxiliary_data_bands=auxiliary_data_bands,
+ auxiliary_column_names=auxiliary_column_names,
+ subsample=subsample,
+ skip_nodata=skip_nodata,
+ as_array=as_array,
+ random_state=random_state,
+ force_pixel_offset=force_pixel_offset,
)
- if not as_array:
- points = Vector(
- gpd.GeoDataFrame(
- pixel_data.T,
- columns=all_column_names,
- geometry=gpd.points_from_xy(x_coords_2, y_coords_2),
- crs=self.crs,
- )
- )
- return points
- else:
- # Merge the coordinates and pixel data an array of N x K
- # This has the downside of converting all the data to the same data type
- points_arr = np.vstack((x_coords_2.reshape(1, -1), y_coords_2.reshape(1, -1), pixel_data)).T
- return points_arr
-
@classmethod
def from_pointcloud_regular(
cls: type[RasterType],
@@ -3986,62 +3397,17 @@ def from_pointcloud_regular(
:param area_or_point: Whether to set the pixel interpretation of the raster to "Area" or "Point".
"""
- # Get transform and shape from input
- if grid_coords is not None:
-
- # Input checks
- if (
- not isinstance(grid_coords, tuple)
- or not (isinstance(grid_coords[0], np.ndarray) and grid_coords[0].ndim == 1)
- or not (isinstance(grid_coords[1], np.ndarray) and grid_coords[1].ndim == 1)
- ):
- raise TypeError("Input grid coordinates must be 1D arrays.")
-
- diff_x = np.diff(grid_coords[0])
- diff_y = np.diff(grid_coords[1])
-
- if not all(diff_x == diff_x[0]) and all(diff_y == diff_y[0]):
- raise ValueError("Grid coordinates must be regular (equally spaced, independently along X and Y).")
-
- # Build transform from min X, max Y and step in both
- out_transform = rio.transform.from_origin(
- np.min(grid_coords[0]), np.max(grid_coords[1]), diff_x[0], diff_y[0]
- )
- # Y is first axis, X is second axis
- out_shape = (len(grid_coords[1]), len(grid_coords[0]))
-
- elif transform is not None and shape is not None:
-
- out_transform = transform
- out_shape = shape
-
- else:
- raise ValueError("Either grid coordinates or both geotransform and shape must be provided.")
-
- # Create raster from inputs, with placeholder data for now
- dtype = pointcloud[data_column_name].dtype
- out_nodata = nodata if not None else _default_nodata(dtype)
- arr = np.ones(out_shape, dtype=dtype)
- raster_arr = cls.from_array(
- data=arr, transform=out_transform, crs=pointcloud.crs, nodata=out_nodata, area_or_point=area_or_point
- )
-
- # Get indexes of point cloud coordinates in the raster, forcing no shift
- i, j = raster_arr.xy2ij(
- x=pointcloud.geometry.x.values, y=pointcloud.geometry.y.values, shift_area_or_point=False
+ arr, transform, crs, nodata, aop = _regular_pointcloud_to_raster(
+ pointcloud=pointcloud,
+ grid_coords=grid_coords,
+ transform=transform,
+ shape=shape,
+ nodata=nodata,
+ data_column_name=data_column_name,
+ area_or_point=area_or_point,
)
- # If coordinates are not integer type (forced in xy2ij), then some points are not falling on exact coordinates
- if not np.issubdtype(i.dtype, np.integer) or not np.issubdtype(i.dtype, np.integer):
- raise ValueError("Some point cloud coordinates differ from the grid coordinates.")
-
- # Set values
- mask = np.ones(np.shape(arr), dtype=bool)
- mask[i, j] = False
- arr[i, j] = pointcloud[data_column_name].values
- raster_arr.data = np.ma.masked_array(data=arr, mask=mask)
-
- return raster_arr
+ return cls.from_array(data=arr, transform=transform, crs=crs, nodata=nodata, area_or_point=area_or_point)
def polygonize(
self,
@@ -4059,60 +3425,7 @@ def polygonize(
:returns: Vector containing the polygonized geometries associated to target values.
"""
- # Mask a unique value set by a number
- if isinstance(target_values, (int, float, np.integer, np.floating)):
- if np.sum(self.data == target_values) == 0:
- raise ValueError(f"no pixel with in_value {target_values}")
-
- bool_msk = np.array(self.data == target_values).astype(np.uint8)
-
- # Mask values within boundaries set by a tuple
- elif isinstance(target_values, tuple):
- if np.sum((self.data > target_values[0]) & (self.data < target_values[1])) == 0:
- raise ValueError(f"no pixel with in_value between {target_values[0]} and {target_values[1]}")
-
- bool_msk = ((self.data > target_values[0]) & (self.data < target_values[1])).astype(np.uint8)
-
- # Mask specific values set by a sequence
- elif isinstance(target_values, list) or isinstance(target_values, np.ndarray):
- if np.sum(np.isin(self.data, np.array(target_values))) == 0:
- raise ValueError("no pixel with in_value " + ", ".join(map("{}".format, target_values)))
-
- bool_msk = np.isin(self.data, np.array(target_values)).astype("uint8")
-
- # Mask all valid values
- elif target_values == "all":
- # Using getmaskarray is necessary in case .data.mask is nomask (False)
- bool_msk = (~np.ma.getmaskarray(self.data)).astype("uint8")
-
- else:
- raise ValueError("in_value must be a number, a tuple or a sequence")
-
- # GeoPandas.from_features() only supports certain dtypes, we find the best common dtype to optimize memory usage
- # TODO: this should be a function independent of polygonize, reused in several places
- gpd_dtypes = ["uint8", "uint16", "int16", "int32", "float32"]
- list_common_dtype_index = []
- for gpd_type in gpd_dtypes:
- polygonize_dtype = np.promote_types(gpd_type, self.dtype)
- if str(polygonize_dtype) in gpd_dtypes:
- list_common_dtype_index.append(gpd_dtypes.index(gpd_type))
- if len(list_common_dtype_index) == 0:
- final_dtype = "float32"
- else:
- final_dtype_index = min(list_common_dtype_index)
- final_dtype = gpd_dtypes[final_dtype_index]
-
- results = (
- {"properties": {"raster_value": v}, "geometry": s}
- for i, (s, v) in enumerate(shapes(self.data.astype(final_dtype), mask=bool_msk, transform=self.transform))
- )
-
- gdf = gpd.GeoDataFrame.from_features(list(results))
- gdf.insert(0, data_column_name, range(0, 0 + len(gdf)))
- gdf = gdf.set_geometry(col="geometry")
- gdf = gdf.set_crs(self.crs)
-
- return gv.Vector(gdf)
+ return _polygonize(source_raster=self, target_values=target_values, data_column_name=data_column_name)
def proximity(
self,
@@ -4143,7 +3456,7 @@ def proximity(
:return: Proximity distances raster.
"""
- proximity = proximity_from_vector_or_raster(
+ proximity = _proximity_from_vector_or_raster(
raster=self,
vector=vector,
target_values=target_values,
@@ -4169,8 +3482,7 @@ def subsample(
return_indices: Literal[False] = False,
*,
random_state: int | np.random.Generator | None = None,
- ) -> NDArrayNum:
- ...
+ ) -> NDArrayNum: ...
@overload
def subsample(
@@ -4179,8 +3491,7 @@ def subsample(
return_indices: Literal[True],
*,
random_state: int | np.random.Generator | None = None,
- ) -> tuple[NDArrayNum, ...]:
- ...
+ ) -> tuple[NDArrayNum, ...]: ...
@overload
def subsample(
@@ -4188,8 +3499,7 @@ def subsample(
subsample: float | int,
return_indices: bool = False,
random_state: int | np.random.Generator | None = None,
- ) -> NDArrayNum | tuple[NDArrayNum, ...]:
- ...
+ ) -> NDArrayNum | tuple[NDArrayNum, ...]: ...
def subsample(
self,
@@ -4329,8 +3639,7 @@ def reproject(
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
- ) -> Mask:
- ...
+ ) -> Mask: ...
@overload
def reproject(
@@ -4349,8 +3658,7 @@ def reproject(
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
- ) -> None:
- ...
+ ) -> None: ...
@overload
def reproject(
@@ -4369,8 +3677,7 @@ def reproject(
silent: bool = False,
n_threads: int = 0,
memory_limit: int = 64,
- ) -> Mask | None:
- ...
+ ) -> Mask | None: ...
def reproject(
self: Mask,
@@ -4440,8 +3747,7 @@ def crop(
mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
*,
inplace: Literal[False] = False,
- ) -> Mask:
- ...
+ ) -> Mask: ...
@overload
def crop(
@@ -4450,8 +3756,7 @@ def crop(
mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
*,
inplace: Literal[True],
- ) -> None:
- ...
+ ) -> None: ...
@overload
def crop(
@@ -4460,8 +3765,7 @@ def crop(
mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel",
*,
inplace: bool = False,
- ) -> Mask | None:
- ...
+ ) -> Mask | None: ...
def crop(
self: Mask,
@@ -4590,83 +3894,3 @@ def __invert__(self: Mask) -> Mask:
"""Bitwise inversion of a mask."""
return self.copy(~self.data)
-
-
-# -----------------------------------------
-# Additional stand-alone utility functions
-# -----------------------------------------
-
-
-def proximity_from_vector_or_raster(
- raster: Raster,
- vector: Vector | None = None,
- target_values: list[float] | None = None,
- geometry_type: str = "boundary",
- in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both",
- distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced",
-) -> NDArrayNum:
- """
- (This function is defined here as mostly raster-based, but used in a class method for both Raster and Vector)
- Proximity to a Raster's target values if no Vector is provided, otherwise to a Vector's geometry type
- rasterized on the Raster.
-
- :param raster: Raster to burn the proximity grid on.
- :param vector: Vector for which to compute the proximity to geometry,
- if not provided computed on the Raster target pixels.
- :param target_values: (Only with a Raster) List of target values to use for the proximity,
- defaults to all non-zero values.
- :param geometry_type: (Only with a Vector) Type of geometry to use for the proximity, defaults to 'boundary'.
- :param in_or_out: (Only with a Vector) Compute proximity only 'in' or 'out'-side the geometry, or 'both'.
- :param distance_unit: Distance unit, either 'georeferenced' or 'pixel'.
- """
-
- # 1/ First, if there is a vector input, we rasterize the geometry type
- # (works with .boundary that is a LineString (.exterior exists, but is a LinearRing)
- if vector is not None:
-
- # TODO: Only when using centroid... Maybe we should leave this operation to the user anyway?
- warnings.filterwarnings("ignore", message="Geometry is in a geographic CRS.*")
-
- # We create a geodataframe with the geometry type
- boundary_shp = gpd.GeoDataFrame(geometry=vector.ds.__getattr__(geometry_type), crs=vector.crs)
- # We mask the pixels that make up the geometry type
- mask_boundary = Vector(boundary_shp).create_mask(raster, as_array=True)
-
- else:
- # We mask target pixels
- if target_values is not None:
- mask_boundary = np.logical_or.reduce([raster.get_nanarray() == target_val for target_val in target_values])
- # Otherwise, all non-zero values are considered targets
- else:
- mask_boundary = raster.get_nanarray().astype(bool)
-
- # 2/ Now, we compute the distance matrix relative to the masked geometry type
- if distance_unit.lower() == "georeferenced":
- sampling: int | tuple[float | int, float | int] = raster.res
- elif distance_unit.lower() == "pixel":
- sampling = 1
- else:
- raise ValueError('Distance unit must be either "georeferenced" or "pixel".')
-
- # If not all pixels are targets, then we compute the distance
- non_targets = np.count_nonzero(mask_boundary)
- if non_targets > 0:
- proximity = distance_transform_edt(~mask_boundary, sampling=sampling)
- # Otherwise, pass an array full of nodata
- else:
- proximity = np.ones(np.shape(mask_boundary)) * np.nan
-
- # 3/ If there was a vector input, apply the in_and_out argument to optionally mask inside/outside
- if vector is not None:
- if in_or_out == "both":
- pass
- elif in_or_out in ["in", "out"]:
- mask_polygon = Vector(vector.ds).create_mask(raster, as_array=True)
- if in_or_out == "in":
- proximity[~mask_polygon] = 0
- else:
- proximity[mask_polygon] = 0
- else:
- raise ValueError('The type of proximity must be one of "in", "out" or "both".')
-
- return proximity
diff --git a/geoutils/raster/sampling.py b/geoutils/raster/sampling.py
index 34aa557d..a4559403 100644
--- a/geoutils/raster/sampling.py
+++ b/geoutils/raster/sampling.py
@@ -7,7 +7,7 @@
import numpy as np
from geoutils._typing import MArrayNum, NDArrayNum
-from geoutils.raster.array import get_mask_from_array
+from geoutils.raster.array import _get_mask_from_array
@overload
@@ -17,8 +17,7 @@ def subsample_array(
return_indices: Literal[False] = False,
*,
random_state: int | np.random.Generator | None = None,
-) -> NDArrayNum:
- ...
+) -> NDArrayNum: ...
@overload
@@ -28,8 +27,7 @@ def subsample_array(
return_indices: Literal[True],
*,
random_state: int | np.random.Generator | None = None,
-) -> tuple[NDArrayNum, ...]:
- ...
+) -> tuple[NDArrayNum, ...]: ...
@overload
@@ -38,8 +36,7 @@ def subsample_array(
subsample: float | int,
return_indices: bool = False,
random_state: int | np.random.Generator | None = None,
-) -> NDArrayNum | tuple[NDArrayNum, ...]:
- ...
+) -> NDArrayNum | tuple[NDArrayNum, ...]: ...
def subsample_array(
@@ -63,7 +60,7 @@ def subsample_array(
rng = np.random.default_rng(random_state)
# Remove invalid values and flatten array
- mask = get_mask_from_array(array) # -> need to remove .squeeze in get_mask
+ mask = _get_mask_from_array(array) # -> need to remove .squeeze in get_mask
valids = np.argwhere(~mask.flatten()).squeeze()
# Get number of points to extract
@@ -184,7 +181,7 @@ def subdivide_array(shape: tuple[int, ...], count: int) -> NDArrayNum:
# Generate a small grid of indices, with the same unique count as 'count'
rect = _get_closest_rectangle(count)
- small_indices = np.pad(np.arange(count), np.prod(rect) - count, mode="edge")[: np.prod(rect)].reshape(rect)
+ small_indices = np.pad(np.arange(count), np.prod(rect) - count, mode="edge")[: int(np.prod(rect))].reshape(rect)
# Upscale the grid to fit the output shape using nearest neighbour scaling.
indices = skimage.transform.resize(small_indices, shape, order=0, preserve_range=True).astype(int)
diff --git a/geoutils/raster/satimg.py b/geoutils/raster/satimg.py
index 181ad5a0..79871dbb 100644
--- a/geoutils/raster/satimg.py
+++ b/geoutils/raster/satimg.py
@@ -1,6 +1,7 @@
"""
geoutils.satimg provides a toolset for working with satellite data.
"""
+
from __future__ import annotations
import datetime as dt
@@ -101,7 +102,7 @@ def parse_metadata_from_fn(fname: str) -> list[Any]:
else:
attrs = (None,) * 6
- # if the form is only XX.ext (only the first versions of SRTM had a naming that... bad (simplfied?))
+ # if the form is only XX.ext (only the first versions of SRTM had a naming that... bad (simplified?))
elif os.path.splitext(os.path.basename(fname))[1] == ".hgt":
attrs = (
"SRTM",
diff --git a/geoutils/vector/__init__.py b/geoutils/vector/__init__.py
new file mode 100644
index 00000000..dd198200
--- /dev/null
+++ b/geoutils/vector/__init__.py
@@ -0,0 +1,3 @@
+from geoutils.vector.geometric import * # noqa
+from geoutils.vector.geotransformations import * # noqa
+from geoutils.vector.vector import Vector, VectorType # noqa
diff --git a/geoutils/vector/geometric.py b/geoutils/vector/geometric.py
new file mode 100644
index 00000000..a30404a6
--- /dev/null
+++ b/geoutils/vector/geometric.py
@@ -0,0 +1,212 @@
+"""Functionalities to manipulate vector geometries."""
+
+from __future__ import annotations
+
+import warnings
+
+import geopandas as gpd
+import matplotlib.pyplot as plt
+import numpy as np
+import shapely
+from scipy.spatial import Voronoi
+from shapely.geometry.polygon import Polygon
+
+import geoutils as gu
+from geoutils.projtools import _get_utm_ups_crs, bounds2poly
+
+
+def _buffer_metric(gdf: gpd.GeoDataFrame, buffer_size: float) -> gu.Vector:
+ """
+ Metric buffering. See Vector.buffer_metric() for details.
+ """
+
+ crs_utm_ups = _get_utm_ups_crs(df=gdf)
+
+ # Reproject the shapefile in the local UTM
+ ds_utm = gdf.to_crs(crs=crs_utm_ups)
+
+ # Buffer the shapefile
+ ds_buffered = ds_utm.buffer(distance=buffer_size)
+ del ds_utm
+
+ # Revert-project the shapefile in the original CRS
+ ds_buffered_origproj = ds_buffered.to_crs(crs=gdf.crs)
+ del ds_buffered
+
+ # Return a Vector object of the buffered GeoDataFrame
+ # TODO: Clarify what is conserved in the GeoSeries and what to pass the GeoDataFrame to not lose any attributes
+ vector_buffered = gu.Vector(gpd.GeoDataFrame(geometry=ds_buffered_origproj.geometry, crs=gdf.crs))
+
+ return vector_buffered
+
+
+def _buffer_without_overlap(
+ gdf: gpd.GeoDataFrame, buffer_size: int | float, metric: bool = True, plot: bool = False
+) -> gu.Vector:
+ """See Vector.buffer_without_overlap() for details."""
+
+ # Project in local UTM if metric is True
+ if metric:
+ crs_utm_ups = _get_utm_ups_crs(df=gdf)
+ gdf = gdf.to_crs(crs=crs_utm_ups)
+ else:
+ gdf = gdf
+
+ # Dissolve all geometries into one
+ merged = gdf.dissolve()
+
+ # Add buffer around geometries
+ merged_buffer = merged.buffer(buffer_size)
+
+ # Extract only the buffered area
+ buffer = merged_buffer.difference(merged)
+
+ # Crop Voronoi polygons to bound geometry and add missing polygons
+ bound_poly = bounds2poly(gdf)
+ bound_poly = bound_poly.buffer(buffer_size)
+ voronoi_all = _generate_voronoi_with_bounds(gdf, bound_poly)
+ if plot:
+ plt.figure(figsize=(16, 4))
+ ax1 = plt.subplot(141)
+ voronoi_all.plot(ax=ax1)
+ gdf.plot(fc="none", ec="k", ax=ax1)
+ ax1.set_title("Voronoi polygons, cropped")
+
+ # Extract Voronoi polygons only within the buffer area
+ voronoi_diff = voronoi_all.intersection(buffer.geometry[0])
+
+ # Split all polygons, and join attributes of original geometries into the Voronoi polygons
+ # Splitting, i.e. explode, is needed when Voronoi generate MultiPolygons that may extend over several features.
+ voronoi_gdf = gpd.GeoDataFrame(geometry=voronoi_diff.explode(index_parts=True)) # requires geopandas>=0.10
+ joined_voronoi = gpd.tools.sjoin(gdf, voronoi_gdf, how="right")
+
+ # Plot results -> some polygons are duplicated
+ if plot:
+ ax2 = plt.subplot(142, sharex=ax1, sharey=ax1)
+ joined_voronoi.plot(ax=ax2, column="index_left", alpha=0.5, ec="k")
+ gdf.plot(ax=ax2, column=gdf.index.values)
+ ax2.set_title("Buffer with duplicated polygons")
+
+ # Find non unique Voronoi polygons, and retain only first one
+ _, indexes = np.unique(joined_voronoi.index, return_index=True)
+ unique_voronoi = joined_voronoi.iloc[indexes]
+
+ # Plot results -> unique polygons only
+ if plot:
+ ax3 = plt.subplot(143, sharex=ax1, sharey=ax1)
+ unique_voronoi.plot(ax=ax3, column="index_left", alpha=0.5, ec="k")
+ gdf.plot(ax=ax3, column=gdf.index.values)
+ ax3.set_title("Buffer with unique polygons")
+
+ # Dissolve all polygons by original index
+ merged_voronoi = unique_voronoi.dissolve(by="index_left")
+
+ # Plot
+ if plot:
+ ax4 = plt.subplot(144, sharex=ax1, sharey=ax1)
+ gdf.plot(ax=ax4, column=gdf.index.values)
+ merged_voronoi.plot(column=merged_voronoi.index.values, ax=ax4, alpha=0.5)
+ ax4.set_title("Final buffer")
+ plt.show()
+
+ # Reverse-project to the original CRS if metric is True
+ if metric:
+ merged_voronoi = merged_voronoi.to_crs(crs=gdf.crs)
+
+ return gu.Vector(merged_voronoi)
+
+
+def _extract_vertices(gdf: gpd.GeoDataFrame) -> list[list[tuple[float, float]]]:
+ r"""
+ Function to extract the exterior vertices of all shapes within a gpd.GeoDataFrame.
+
+ :param gdf: The GeoDataFrame from which the vertices need to be extracted.
+
+ :returns: A list containing a list of (x, y) positions of the vertices. The length of the primary list is equal
+ to the number of geometries inside gdf, and length of each sublist is the number of vertices in the geometry.
+ """
+ vertices = []
+ # Loop on all geometries within gdf
+ for geom in gdf.geometry:
+ # Extract geometry exterior(s)
+ if geom.geom_type == "MultiPolygon":
+ exteriors = [p.exterior for p in geom.geoms]
+ elif geom.geom_type == "Polygon":
+ exteriors = [geom.exterior]
+ elif geom.geom_type == "LineString":
+ exteriors = [geom]
+ elif geom.geom_type == "MultiLineString":
+ exteriors = list(geom.geoms)
+ else:
+ raise NotImplementedError(f"Geometry type {geom.geom_type} not implemented.")
+
+ vertices.extend([list(ext.coords) for ext in exteriors])
+
+ return vertices
+
+
+def _generate_voronoi_polygons(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
+ """
+ Generate Voronoi polygons (tessellation) from the vertices of all geometries in a GeoDataFrame.
+
+ Uses scipy.spatial.voronoi.
+
+ :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons.
+
+ :returns: A GeoDataFrame containing the Voronoi polygons.
+ """
+ # Extract the coordinates of the vertices of all geometries in gdf
+ vertices = _extract_vertices(gdf)
+ coords = np.concatenate(vertices)
+
+ # Create the Voronoi diagram and extract ridges
+ vor = Voronoi(coords)
+ lines = [shapely.geometry.LineString(vor.vertices[line]) for line in vor.ridge_vertices if -1 not in line]
+ polys = list(shapely.ops.polygonize(lines))
+ if len(polys) == 0:
+ raise ValueError("Invalid geometry, cannot generate finite Voronoi polygons")
+
+ # Convert into GeoDataFrame
+ voronoi = gpd.GeoDataFrame(geometry=gpd.GeoSeries(polys))
+ voronoi.crs = gdf.crs
+
+ return voronoi
+
+
+def _generate_voronoi_with_bounds(gdf: gpd.GeoDataFrame, bound_poly: Polygon) -> gpd.GeoDataFrame:
+ """
+ Generate Voronoi polygons that are bounded by the polygon bound_poly, to avoid Voronoi polygons that extend \
+far beyond the original geometry.
+
+ Voronoi polygons are created using generate_voronoi_polygons, cropped to the extent of bound_poly and gaps \
+are filled with new polygons.
+
+ :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons.
+ :param: A shapely Polygon to be used for bounding the Voronoi diagrams.
+
+ :returns: A GeoDataFrame containing the Voronoi polygons.
+ """
+ # Create Voronoi polygons
+ voronoi = _generate_voronoi_polygons(gdf)
+
+ # Crop Voronoi polygons to input bound_poly extent
+ voronoi_crop = voronoi.intersection(bound_poly)
+ voronoi_crop = gpd.GeoDataFrame(geometry=voronoi_crop) # convert to DataFrame
+
+ # Dissolve all Voronoi polygons and subtract from bounds to get gaps
+ voronoi_merged = voronoi_crop.dissolve()
+ bound_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(bound_poly))
+ bound_gdf.crs = gdf.crs
+ gaps = bound_gdf.difference(voronoi_merged)
+
+ # Merge cropped Voronoi with gaps, if not empty, otherwise return cropped Voronoi
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", "Geometry is in a geographic CRS. Results from 'area' are likely incorrect.")
+ tot_area = np.sum(gaps.area.values)
+
+ if not tot_area == 0:
+ voronoi_all = gpd.GeoDataFrame(geometry=list(voronoi_crop.geometry) + list(gaps.geometry))
+ voronoi_all.crs = gdf.crs
+ return voronoi_all
+ else:
+ return voronoi_crop
diff --git a/geoutils/vector/geotransformations.py b/geoutils/vector/geotransformations.py
new file mode 100644
index 00000000..a6c8a92f
--- /dev/null
+++ b/geoutils/vector/geotransformations.py
@@ -0,0 +1,55 @@
+"""Functionalities for geotransformations of vectors."""
+
+from __future__ import annotations
+
+import os
+
+import geopandas as gpd
+import pyogrio
+import rasterio as rio
+from rasterio.crs import CRS
+
+import geoutils as gu
+
+
+def _reproject(
+ gdf: gpd.GeoDataFrame,
+ ref: gu.Raster | rio.io.DatasetReader | gu.Vector | gpd.GeoDataFrame | str | None = None,
+ crs: CRS | str | int | None = None,
+) -> gpd.GeoDataFrame:
+ """Reproject a vector. See Vector.reproject() for more details."""
+
+ # Check that either ref or crs is provided
+ if (ref is not None and crs is not None) or (ref is None and crs is None):
+ raise ValueError("Either of `ref` or `crs` must be set. Not both.")
+
+ # Case a raster or vector is provided as reference
+ if ref is not None:
+ # Check that ref type is either str, Raster or rasterio data set
+ # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45
+ if isinstance(ref, (gu.Raster, gu.Vector)):
+ ds_ref = ref
+ elif isinstance(ref, (rio.io.DatasetReader, gpd.GeoDataFrame)):
+ ds_ref = ref
+ elif isinstance(ref, str):
+ if not os.path.exists(ref):
+ raise ValueError("Reference raster or vector path does not exist.")
+ try:
+ ds_ref = gu.Raster(ref, load_data=False)
+ except rio.errors.RasterioIOError:
+ try:
+ ds_ref = gu.Vector(ref)
+ except pyogrio.errors.DataSourceError:
+ raise ValueError("Could not open raster or vector with rasterio or pyogrio.")
+ else:
+ raise TypeError("Type of ref must be string path to file, Raster or Vector.")
+
+ # Read reprojecting params from ref raster
+ crs = ds_ref.crs
+ else:
+ # Determine user-input target CRS
+ crs = CRS.from_user_input(crs)
+
+ new_ds = gdf.to_crs(crs=crs)
+
+ return new_ds
diff --git a/geoutils/vector.py b/geoutils/vector/vector.py
similarity index 77%
rename from geoutils/vector.py
rename to geoutils/vector/vector.py
index f4c61ab8..7d563b0b 100644
--- a/geoutils/vector.py
+++ b/geoutils/vector/vector.py
@@ -1,13 +1,12 @@
"""
-geoutils.vectortools provides a toolset for working with vector data.
+Module for Vector class.
"""
+
from __future__ import annotations
-import os
import pathlib
import warnings
from collections import abc
-from numbers import Number
from os import PathLike
from typing import (
Any,
@@ -25,28 +24,25 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
-import pyogrio
import rasterio as rio
-import rasterio.errors
-import shapely
from geopandas.testing import assert_geodataframe_equal
from mpl_toolkits.axes_grid1 import make_axes_locatable
from pandas._typing import WriteBuffer
-from rasterio import features, warp
from rasterio.crs import CRS
-from scipy.spatial import Voronoi
from shapely.geometry.base import BaseGeometry
-from shapely.geometry.polygon import Polygon
import geoutils as gu
from geoutils._typing import NDArrayBool, NDArrayNum
+from geoutils.interface.distance import _proximity_from_vector_or_raster
+from geoutils.interface.raster_vector import _create_mask, _rasterize
from geoutils.misc import copy_doc
from geoutils.projtools import (
_get_bounds_projected,
_get_footprint_projected,
_get_utm_ups_crs,
- bounds2poly,
)
+from geoutils.vector.geometric import _buffer_metric, _buffer_without_overlap
+from geoutils.vector.geotransformations import _reproject
# This is a generic Vector-type (if subclasses are made, this will change appropriately)
VectorType = TypeVar("VectorType", bound="Vector")
@@ -148,12 +144,10 @@ def __str__(self) -> str:
return str(self.ds.__str__())
@overload
- def info(self, verbose: Literal[True] = ...) -> None:
- ...
+ def info(self, verbose: Literal[True] = ...) -> None: ...
@overload
- def info(self, verbose: Literal[False]) -> str:
- ...
+ def info(self, verbose: Literal[False]) -> str: ...
def info(self, verbose: bool = True) -> str | None:
"""
@@ -323,7 +317,7 @@ def _override_gdf_output(
"""Parse outputs of GeoPandas functions to facilitate object manipulation."""
# Raise error if output is not treated separately, should appear in tests
- if not isinstance(other, (gpd.GeoDataFrame, gpd.GeoDataFrame, pd.Series, BaseGeometry)):
+ if not isinstance(other, (gpd.GeoDataFrame, pd.Series, BaseGeometry)):
raise ValueError("Not implemented. This error should only be raised in tests.")
# If a GeoDataFrame is the output, return it
@@ -652,7 +646,7 @@ def sjoin(self, df: Vector | gpd.GeoDataFrame, *args: Any, **kwargs: Any) -> Vec
else:
gdf = df
- return self._override_gdf_output(self.ds.sjoin(df=gdf, *args, **kwargs))
+ return self._override_gdf_output(self.ds.sjoin(gdf, *args, **kwargs))
@copy_doc(gpd.GeoDataFrame, "Vector")
def sjoin_nearest(
@@ -994,8 +988,7 @@ def crop(
clip: bool,
*,
inplace: Literal[False] = False,
- ) -> VectorType:
- ...
+ ) -> VectorType: ...
@overload
def crop(
@@ -1004,8 +997,7 @@ def crop(
clip: bool,
*,
inplace: Literal[True],
- ) -> None:
- ...
+ ) -> None: ...
@overload
def crop(
@@ -1014,8 +1006,7 @@ def crop(
clip: bool,
*,
inplace: bool = False,
- ) -> VectorType | None:
- ...
+ ) -> VectorType | None: ...
def crop(
self: VectorType,
@@ -1070,8 +1061,7 @@ def reproject(
crs: CRS | str | int | None = None,
*,
inplace: Literal[False] = False,
- ) -> Vector:
- ...
+ ) -> Vector: ...
@overload
def reproject(
@@ -1080,8 +1070,7 @@ def reproject(
crs: CRS | str | int | None = None,
*,
inplace: Literal[True],
- ) -> None:
- ...
+ ) -> None: ...
@overload
def reproject(
@@ -1090,8 +1079,7 @@ def reproject(
crs: CRS | str | int | None = None,
*,
inplace: bool = False,
- ) -> Vector | None:
- ...
+ ) -> Vector | None: ...
def reproject(
self: Vector,
@@ -1117,38 +1105,7 @@ def reproject(
:returns: Reprojected vector (or None if inplace).
"""
- # Check that either ref or crs is provided
- if (ref is not None and crs is not None) or (ref is None and crs is None):
- raise ValueError("Either of `ref` or `crs` must be set. Not both.")
-
- # Case a raster or vector is provided as reference
- if ref is not None:
- # Check that ref type is either str, Raster or rasterio data set
- # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45
- if isinstance(ref, (gu.Raster, gu.Vector)):
- ds_ref = ref
- elif isinstance(ref, (rio.io.DatasetReader, gpd.GeoDataFrame)):
- ds_ref = ref
- elif isinstance(ref, str):
- if not os.path.exists(ref):
- raise ValueError("Reference raster or vector path does not exist.")
- try:
- ds_ref = gu.Raster(ref, load_data=False)
- except rasterio.errors.RasterioIOError:
- try:
- ds_ref = Vector(ref)
- except pyogrio.errors.DataSourceError:
- raise ValueError("Could not open raster or vector with rasterio or pyogrio.")
- else:
- raise TypeError("Type of ref must be string path to file, Raster or Vector.")
-
- # Read reprojecting params from ref raster
- crs = ds_ref.crs
- else:
- # Determine user-input target CRS
- crs = CRS.from_user_input(crs)
-
- new_ds = self.ds.to_crs(crs=crs)
+ new_ds = _reproject(gdf=self.ds, ref=ref, crs=crs)
if inplace:
self.ds = new_ds
@@ -1164,8 +1121,7 @@ def translate(
zoff: float = 0.0,
*,
inplace: Literal[False] = False,
- ) -> VectorType:
- ...
+ ) -> VectorType: ...
@overload
def translate(
@@ -1175,8 +1131,7 @@ def translate(
zoff: float = 0.0,
*,
inplace: Literal[True],
- ) -> None:
- ...
+ ) -> None: ...
@overload
def translate(
@@ -1186,8 +1141,7 @@ def translate(
zoff: float = 0.0,
*,
inplace: bool = False,
- ) -> VectorType | None:
- ...
+ ) -> VectorType | None: ...
def translate(
self: VectorType,
@@ -1231,8 +1185,7 @@ def create_mask(
buffer: int | float | np.integer[Any] | np.floating[Any] = 0,
*,
as_array: Literal[False] = False,
- ) -> gu.Mask:
- ...
+ ) -> gu.Mask: ...
@overload
def create_mask(
@@ -1245,8 +1198,7 @@ def create_mask(
buffer: int | float | np.integer[Any] | np.floating[Any] = 0,
*,
as_array: Literal[True],
- ) -> NDArrayNum:
- ...
+ ) -> NDArrayNum: ...
def create_mask(
self,
@@ -1280,76 +1232,9 @@ def create_mask(
:returns: A Mask object contain a boolean array
"""
- # If no raster given, use provided dimensions
- if raster is None:
- # At minimum, xres must be set
- if xres is None:
- raise ValueError("At least raster or xres must be set.")
- if yres is None:
- yres = xres
-
- # By default, use self's CRS and bounds
- if crs is None:
- crs = self.ds.crs
- if bounds is None:
- bounds_shp = True
- bounds = self.ds.total_bounds
- else:
- bounds_shp = False
-
- # Calculate raster shape
- left, bottom, right, top = bounds
- height = abs((right - left) / xres)
- width = abs((top - bottom) / yres)
-
- if width % 1 != 0 or height % 1 != 0:
- # Only warn if the bounds were provided, and not derived from the vector
- if not bounds_shp:
- warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.")
-
- width = int(np.round(width))
- height = int(np.round(height))
- out_shape = (height, width)
-
- # Calculate raster transform
- transform = rio.transform.from_bounds(left, bottom, right, top, width, height)
-
- # otherwise use directly raster's dimensions
- elif isinstance(raster, gu.Raster):
- out_shape = raster.shape
- transform = raster.transform
- crs = raster.crs
- bounds = raster.bounds
- else:
- raise TypeError("Raster must be a geoutils.Raster or None.")
-
- # Copying GeoPandas dataframe before applying changes
- gdf = self.ds.copy()
-
- # Crop vector geometries to avoid issues when reprojecting
- left, bottom, right, top = bounds # type: ignore
- x1, y1, x2, y2 = warp.transform_bounds(crs, gdf.crs, left, bottom, right, top)
- gdf = gdf.cx[x1:x2, y1:y2]
-
- # Reproject vector into raster CRS
- gdf = gdf.to_crs(crs)
-
- # Create a buffer around the features
- if not isinstance(buffer, (int, float, np.number)):
- raise TypeError(f"Buffer must be a number, currently set to {type(buffer).__name__}.")
- if buffer != 0:
- gdf.geometry = [geom.buffer(buffer) for geom in gdf.geometry]
- elif buffer == 0:
- pass
-
- # Rasterize geometry
- mask = features.rasterize(
- shapes=gdf.geometry, fill=0, out_shape=out_shape, transform=transform, default_value=1, dtype="uint8"
- ).astype("bool")
-
- # Force output mask to be of same dimension as input raster
- if raster is not None:
- mask = mask.reshape((raster.count, raster.height, raster.width)) # type: ignore
+ mask, transform, crs = _create_mask(
+ gdf=self.ds, raster=raster, crs=crs, xres=xres, yres=yres, bounds=bounds, buffer=buffer, as_array=as_array
+ )
# Return output as mask or as array
if as_array:
@@ -1393,84 +1278,16 @@ def rasterize(
:returns: Raster or mask containing the burned geometries.
"""
- if (raster is not None) and (crs is not None):
- raise ValueError("Only one of raster or crs can be provided.")
-
- # Reproject vector into requested CRS or rst CRS first, if needed
- # This has to be done first so that width/height calculated below are correct!
- if crs is None:
- crs = self.ds.crs
-
- if raster is not None:
- crs = raster.crs # type: ignore
-
- vect = self.ds.to_crs(crs)
-
- # If no raster given, now use provided dimensions
- if raster is None:
- # At minimum, xres must be set
- if xres is None:
- raise ValueError("At least raster or xres must be set.")
- if yres is None:
- yres = xres
-
- # By default, use self's bounds
- if bounds is None:
- bounds = vect.total_bounds
-
- # Calculate raster shape
- left, bottom, right, top = bounds
- width = abs((right - left) / xres)
- height = abs((top - bottom) / yres)
-
- if width % 1 != 0 or height % 1 != 0:
- warnings.warn("Bounds not a multiple of xres/yres, use rounded bounds.")
-
- width = int(np.round(width))
- height = int(np.round(height))
- out_shape = (height, width)
-
- # Calculate raster transform
- transform = rio.transform.from_bounds(left, bottom, right, top, width, height)
-
- # otherwise use directly raster's dimensions
- else:
- out_shape = raster.shape # type: ignore
- transform = raster.transform # type: ignore
-
- # Set default burn value, index from 1 to len(self.ds)
- if in_value is None:
- in_value = self.ds.index + 1
-
- # Rasterize geometry
- if isinstance(in_value, abc.Iterable):
- if len(in_value) != len(vect.geometry): # type: ignore
- raise ValueError(
- "in_value must have same length as self.ds.geometry, currently {} != {}".format(
- len(in_value), len(vect.geometry) # type: ignore
- )
- )
-
- out_geom = ((geom, value) for geom, value in zip(vect.geometry, in_value))
-
- mask = features.rasterize(shapes=out_geom, fill=out_value, out_shape=out_shape, transform=transform)
-
- elif isinstance(in_value, Number):
- mask = features.rasterize(
- shapes=vect.geometry, fill=out_value, out_shape=out_shape, transform=transform, default_value=in_value
- )
- else:
- raise ValueError("in_value must be a single number or an iterable with same length as self.ds.geometry")
-
- # We return a mask if there is a single value to burn and this value is 1
- if isinstance(in_value, (int, np.integer, float, np.floating)) and in_value == 1:
- output = gu.Mask.from_array(data=mask, transform=transform, crs=crs, nodata=None)
-
- # Otherwise we return a Raster if there are several values to burn
- else:
- output = gu.Raster.from_array(data=mask, transform=transform, crs=crs, nodata=None)
-
- return output
+ return _rasterize(
+ gdf=self.ds,
+ raster=raster,
+ crs=crs,
+ xres=xres,
+ yres=yres,
+ bounds=bounds,
+ in_value=in_value,
+ out_value=out_value,
+ )
@classmethod
def from_bounds_projected(
@@ -1557,7 +1374,7 @@ def proximity(
raster = gu.Raster.from_array(data=np.zeros((1000, 1000)), transform=transform, crs=self.crs)
- proximity = gu.raster.raster.proximity_from_vector_or_raster(
+ proximity = _proximity_from_vector_or_raster(
raster=raster, vector=self, geometry_type=geometry_type, in_or_out=in_or_out, distance_unit=distance_unit
)
@@ -1582,24 +1399,7 @@ def buffer_metric(self, buffer_size: float) -> Vector:
:return: Buffered shapefile.
"""
- crs_utm_ups = _get_utm_ups_crs(df=self.ds)
-
- # Reproject the shapefile in the local UTM
- ds_utm = self.ds.to_crs(crs=crs_utm_ups)
-
- # Buffer the shapefile
- ds_buffered = ds_utm.buffer(distance=buffer_size)
- del ds_utm
-
- # Revert-project the shapefile in the original CRS
- ds_buffered_origproj = ds_buffered.to_crs(crs=self.ds.crs)
- del ds_buffered
-
- # Return a Vector object of the buffered GeoDataFrame
- # TODO: Clarify what is conserved in the GeoSeries and what to pass the GeoDataFrame to not lose any attributes
- vector_buffered = Vector(gpd.GeoDataFrame(geometry=ds_buffered_origproj.geometry, crs=self.ds.crs))
-
- return vector_buffered
+ return _buffer_metric(gdf=self.ds, buffer_size=buffer_size)
def get_bounds_projected(self, out_crs: CRS, densify_points: int = 5000) -> rio.coords.BoundingBox:
"""
@@ -1684,173 +1484,4 @@ def buffer_without_overlap(self, buffer_size: int | float, metric: bool = True,
>>> plt.plot() # doctest: +SKIP
"""
- # Project in local UTM if metric is True
- if metric:
- crs_utm_ups = _get_utm_ups_crs(df=self.ds)
- gdf = self.ds.to_crs(crs=crs_utm_ups)
- else:
- gdf = self.ds
-
- # Dissolve all geometries into one
- merged = gdf.dissolve()
-
- # Add buffer around geometries
- merged_buffer = merged.buffer(buffer_size)
-
- # Extract only the buffered area
- buffer = merged_buffer.difference(merged)
-
- # Crop Voronoi polygons to bound geometry and add missing polygons
- bound_poly = bounds2poly(gdf)
- bound_poly = bound_poly.buffer(buffer_size)
- voronoi_all = generate_voronoi_with_bounds(gdf, bound_poly)
- if plot:
- plt.figure(figsize=(16, 4))
- ax1 = plt.subplot(141)
- voronoi_all.plot(ax=ax1)
- gdf.plot(fc="none", ec="k", ax=ax1)
- ax1.set_title("Voronoi polygons, cropped")
-
- # Extract Voronoi polygons only within the buffer area
- voronoi_diff = voronoi_all.intersection(buffer.geometry[0])
-
- # Split all polygons, and join attributes of original geometries into the Voronoi polygons
- # Splitting, i.e. explode, is needed when Voronoi generate MultiPolygons that may extend over several features.
- voronoi_gdf = gpd.GeoDataFrame(geometry=voronoi_diff.explode(index_parts=True)) # requires geopandas>=0.10
- joined_voronoi = gpd.tools.sjoin(gdf, voronoi_gdf, how="right")
-
- # Plot results -> some polygons are duplicated
- if plot:
- ax2 = plt.subplot(142, sharex=ax1, sharey=ax1)
- joined_voronoi.plot(ax=ax2, column="index_left", alpha=0.5, ec="k")
- gdf.plot(ax=ax2, column=gdf.index.values)
- ax2.set_title("Buffer with duplicated polygons")
-
- # Find non unique Voronoi polygons, and retain only first one
- _, indexes = np.unique(joined_voronoi.index, return_index=True)
- unique_voronoi = joined_voronoi.iloc[indexes]
-
- # Plot results -> unique polygons only
- if plot:
- ax3 = plt.subplot(143, sharex=ax1, sharey=ax1)
- unique_voronoi.plot(ax=ax3, column="index_left", alpha=0.5, ec="k")
- gdf.plot(ax=ax3, column=gdf.index.values)
- ax3.set_title("Buffer with unique polygons")
-
- # Dissolve all polygons by original index
- merged_voronoi = unique_voronoi.dissolve(by="index_left")
-
- # Plot
- if plot:
- ax4 = plt.subplot(144, sharex=ax1, sharey=ax1)
- gdf.plot(ax=ax4, column=gdf.index.values)
- merged_voronoi.plot(column=merged_voronoi.index.values, ax=ax4, alpha=0.5)
- ax4.set_title("Final buffer")
- plt.show()
-
- # Reverse-project to the original CRS if metric is True
- if metric:
- merged_voronoi = merged_voronoi.to_crs(crs=self.crs)
-
- return Vector(merged_voronoi)
-
-
-# -----------------------------------------
-# Additional stand-alone utility functions
-# -----------------------------------------
-
-
-def extract_vertices(gdf: gpd.GeoDataFrame) -> list[list[tuple[float, float]]]:
- r"""
- Function to extract the exterior vertices of all shapes within a gpd.GeoDataFrame.
-
- :param gdf: The GeoDataFrame from which the vertices need to be extracted.
-
- :returns: A list containing a list of (x, y) positions of the vertices. The length of the primary list is equal
- to the number of geometries inside gdf, and length of each sublist is the number of vertices in the geometry.
- """
- vertices = []
- # Loop on all geometries within gdf
- for geom in gdf.geometry:
- # Extract geometry exterior(s)
- if geom.geom_type == "MultiPolygon":
- exteriors = [p.exterior for p in geom.geoms]
- elif geom.geom_type == "Polygon":
- exteriors = [geom.exterior]
- elif geom.geom_type == "LineString":
- exteriors = [geom]
- elif geom.geom_type == "MultiLineString":
- exteriors = list(geom.geoms)
- else:
- raise NotImplementedError(f"Geometry type {geom.geom_type} not implemented.")
-
- vertices.extend([list(ext.coords) for ext in exteriors])
-
- return vertices
-
-
-def generate_voronoi_polygons(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
- """
- Generate Voronoi polygons (tessellation) from the vertices of all geometries in a GeoDataFrame.
-
- Uses scipy.spatial.voronoi.
-
- :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons.
-
- :returns: A GeoDataFrame containing the Voronoi polygons.
- """
- # Extract the coordinates of the vertices of all geometries in gdf
- vertices = extract_vertices(gdf)
- coords = np.concatenate(vertices)
-
- # Create the Voronoi diagram and extract ridges
- vor = Voronoi(coords)
- lines = [shapely.geometry.LineString(vor.vertices[line]) for line in vor.ridge_vertices if -1 not in line]
- polys = list(shapely.ops.polygonize(lines))
- if len(polys) == 0:
- raise ValueError("Invalid geometry, cannot generate finite Voronoi polygons")
-
- # Convert into GeoDataFrame
- voronoi = gpd.GeoDataFrame(geometry=gpd.GeoSeries(polys))
- voronoi.crs = gdf.crs
-
- return voronoi
-
-
-def generate_voronoi_with_bounds(gdf: gpd.GeoDataFrame, bound_poly: Polygon) -> gpd.GeoDataFrame:
- """
- Generate Voronoi polygons that are bounded by the polygon bound_poly, to avoid Voronoi polygons that extend \
-far beyond the original geometry.
-
- Voronoi polygons are created using generate_voronoi_polygons, cropped to the extent of bound_poly and gaps \
-are filled with new polygons.
-
- :param: The GeoDataFrame from whose vertices are used for the Voronoi polygons.
- :param: A shapely Polygon to be used for bounding the Voronoi diagrams.
-
- :returns: A GeoDataFrame containing the Voronoi polygons.
- """
- # Create Voronoi polygons
- voronoi = generate_voronoi_polygons(gdf)
-
- # Crop Voronoi polygons to input bound_poly extent
- voronoi_crop = voronoi.intersection(bound_poly)
- voronoi_crop = gpd.GeoDataFrame(geometry=voronoi_crop) # convert to DataFrame
-
- # Dissolve all Voronoi polygons and subtract from bounds to get gaps
- voronoi_merged = voronoi_crop.dissolve()
- bound_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(bound_poly))
- bound_gdf.crs = gdf.crs
- gaps = bound_gdf.difference(voronoi_merged)
-
- # Merge cropped Voronoi with gaps, if not empty, otherwise return cropped Voronoi
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore", "Geometry is in a geographic CRS. Results from 'area' are likely incorrect.")
- tot_area = np.sum(gaps.area.values)
-
- if not tot_area == 0:
- voronoi_all = gpd.GeoDataFrame(geometry=list(voronoi_crop.geometry) + list(gaps.geometry))
- voronoi_all.crs = gdf.crs
- return voronoi_all
- else:
- return voronoi_crop
+ return _buffer_without_overlap(self.ds, buffer_size=buffer_size, metric=metric, plot=plot)
diff --git a/setup.py b/setup.py
index eb7bae29..7f7e78e1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
"""This file now only serves for backward-compatibility for routines explicitly calling python setup.py"""
+
from setuptools import setup
setup()
diff --git a/tests/test_config.py b/tests/test_config.py
index 1803c1b9..ed1f1f05 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,4 +1,5 @@
"""Test configuration file."""
+
import geoutils as gu
diff --git a/tests/test_doc.py b/tests/test_doc.py
index cbbe3ff1..753880d9 100644
--- a/tests/test_doc.py
+++ b/tests/test_doc.py
@@ -1,4 +1,5 @@
"""Functions to test the documentation."""
+
import os
import platform
import shutil
diff --git a/tests/test_examples.py b/tests/test_examples.py
index a9e4d54b..fb6cad81 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -1,6 +1,7 @@
"""
Test the example files used for testing and documentation
"""
+
import hashlib
import warnings
diff --git a/tests/test_interface/test_distance.py b/tests/test_interface/test_distance.py
new file mode 100644
index 00000000..c2c1641d
--- /dev/null
+++ b/tests/test_interface/test_distance.py
@@ -0,0 +1,230 @@
+"""Test distance functions at the interface of raster and vectors."""
+
+from __future__ import annotations
+
+import os
+import tempfile
+import warnings
+
+import numpy as np
+import pytest
+import rasterio as rio
+
+import geoutils as gu
+from geoutils._typing import NDArrayNum
+
+
+def run_gdal_proximity(
+ input_raster: gu.Raster, target_values: list[float] | None, distunits: str = "GEO"
+) -> NDArrayNum:
+ """Run GDAL's ComputeProximity and return the read numpy array."""
+ # Rasterio strongly recommends against importing gdal along rio, so this is done here instead.
+ from osgeo import gdal, gdalconst
+
+ gdal.UseExceptions()
+
+ # Initiate empty GDAL raster for proximity output
+ drv = gdal.GetDriverByName("MEM")
+ proxy_ds = drv.Create("", input_raster.shape[1], input_raster.shape[0], 1, gdal.GetDataTypeByName("Float32"))
+ proxy_ds.GetRasterBand(1).SetNoDataValue(-9999)
+
+ # Save input in temporary file to read with GDAL
+ # (avoids the nightmare of setting nodata, transform, crs in GDAL format...)
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_path = os.path.join(temp_dir, "input.tif")
+ input_raster.save(temp_path)
+ ds_raster_in = gdal.Open(temp_path, gdalconst.GA_ReadOnly)
+
+ # Define GDAL options
+ proximity_options = ["DISTUNITS=" + distunits]
+ if target_values is not None:
+ proximity_options.insert(0, "VALUES=" + ",".join([str(tgt) for tgt in target_values]))
+
+ # Compute proximity
+ gdal.ComputeProximity(ds_raster_in.GetRasterBand(1), proxy_ds.GetRasterBand(1), proximity_options)
+ # Save array
+ proxy_array = proxy_ds.GetRasterBand(1).ReadAsArray().astype("float32")
+ proxy_array[proxy_array == -9999] = np.nan
+
+ # Close GDAL datasets
+ proxy_ds = None
+ ds_raster_in = None
+
+ return proxy_array
+
+
+class TestDistance:
+
+ landsat_b4_path = gu.examples.get_path("everest_landsat_b4")
+ landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped")
+ everest_outlines_path = gu.examples.get_path("everest_rgi_outlines")
+ aster_dem_path = gu.examples.get_path("exploradores_aster_dem")
+
+ def test_proximity_vector(self) -> None:
+ """
+ The core functionality is already tested against GDAL in test_raster: just verify the vector-specific behaviour.
+ #TODO: add an artificial test as well (mirroring TODO in test_raster)
+ """
+
+ vector = gu.Vector(self.everest_outlines_path)
+
+ # -- Test 1: with a Raster provided --
+ raster1 = gu.Raster(self.landsat_b4_crop_path)
+ prox1 = vector.proximity(raster=raster1)
+
+ # The proximity should have the same extent, resolution and CRS
+ assert raster1.georeferenced_grid_equal(prox1)
+
+ # With the base geometry
+ vector.proximity(raster=raster1, geometry_type="geometry")
+
+ # With another geometry option
+ vector.proximity(raster=raster1, geometry_type="centroid")
+
+ # With only inside proximity
+ vector.proximity(raster=raster1, in_or_out="in")
+
+ # -- Test 2: with no Raster provided, just grid size --
+
+ # Default grid size
+ vector.proximity()
+
+ # With specific grid size
+ vector.proximity(size=(100, 100))
+
+ # Test all options, with both an artificial Raster (that has all target values) and a real Raster
+
+ @pytest.mark.parametrize("distunits", ["GEO", "PIXEL"]) # type: ignore
+ # 0 and 1,2,3 are especially useful for the artificial Raster, and 112 for the real Raster
+ @pytest.mark.parametrize("target_values", [[1, 2, 3], [0], [112], None]) # type: ignore
+ @pytest.mark.parametrize(
+ "raster",
+ [
+ gu.Raster(landsat_b4_path),
+ gu.Raster.from_array(
+ np.arange(25, dtype="int32").reshape(5, 5), transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326
+ ),
+ ],
+ ) # type: ignore
+ def test_proximity_raster_against_gdal(
+ self, distunits: str, target_values: list[float] | None, raster: gu.Raster
+ ) -> None:
+ """Test that proximity matches the results of GDAL for any parameter."""
+
+ # TODO: When adding new rasters for tests, specify warning only for Landsat
+ warnings.filterwarnings("ignore", message="Setting default nodata -99999 to mask non-finite values *")
+
+ # We generate proximity with GDAL and GeoUtils
+ gdal_proximity = run_gdal_proximity(raster, target_values=target_values, distunits=distunits)
+ # We translate distunits GDAL option into its GeoUtils equivalent
+ if distunits == "GEO":
+ distance_unit = "georeferenced"
+ else:
+ distance_unit = "pixel"
+ geoutils_proximity = (
+ raster.proximity(distance_unit=distance_unit, target_values=target_values)
+ .data.data.squeeze()
+ .astype("float32")
+ )
+
+ # The results should be the same in all cases
+ try:
+ # In some cases, the proximity differs slightly (generally <1%) for complex settings
+ # (Landsat Raster with target of 112)
+ # It looks like GDAL might not have the right value,
+ # so this particular case is treated differently in tests
+ if target_values is not None and target_values[0] == 112 and raster.filename is not None:
+ # Get index and number of not almost equal point (tolerance of 10-4)
+ ind_not_almost_equal = np.abs(gdal_proximity - geoutils_proximity) > 1e-04
+ nb_not_almost_equal = np.count_nonzero(ind_not_almost_equal)
+ # Check that this is a minority of points (less than 0.5%)
+ assert nb_not_almost_equal < 0.005 * raster.width * raster.height
+
+ # Replace these exceptions by zero in both
+ gdal_proximity[ind_not_almost_equal] = 0.0
+ geoutils_proximity[ind_not_almost_equal] = 0.0
+ # Check that all the rest is almost equal
+ assert np.allclose(gdal_proximity, geoutils_proximity, atol=1e-04, equal_nan=True)
+
+ # Otherwise, results are exactly equal
+ else:
+ assert np.array_equal(gdal_proximity, geoutils_proximity, equal_nan=True)
+
+ # For debugging
+ except Exception as exception:
+ import matplotlib.pyplot as plt
+
+ # Plotting the xdem and GDAL attributes for comparison (plotting "diff" can also help debug)
+ plt.subplot(121)
+ plt.imshow(gdal_proximity)
+ # plt.imshow(np.abs(gdal_proximity - geoutils_proximity)>0.1)
+ plt.colorbar()
+ plt.subplot(122)
+ plt.imshow(geoutils_proximity)
+ # plt.imshow(raster.data.data == 112)
+ plt.colorbar()
+ plt.show()
+
+ # ind_not_equal = np.abs(gdal_proximity - geoutils_proximity)>0.1
+ # print(gdal_proximity[ind_not_equal])
+ # print(geoutils_proximity[ind_not_equal])
+
+ raise exception
+
+ def test_proximity_raster_parameters(self) -> None:
+ """
+ Test that new (different to GDAL's) proximity parameters run.
+ No need to test the results specifically, as those rely entirely on the previous test with GDAL,
+ and tests in rasterize and shapely.
+ #TODO: Maybe add one test with an artificial vector to check it works as intended
+ """
+
+ # -- Test 1: with self's Raster alone --
+ raster1 = gu.Raster(self.landsat_b4_path)
+ prox1 = raster1.proximity()
+
+ # The raster should have the same extent, resolution and CRS
+ assert raster1.georeferenced_grid_equal(prox1)
+
+ # It should change with target values specified
+ prox2 = raster1.proximity(target_values=[255])
+ assert not np.array_equal(prox1.data, prox2.data)
+
+ # -- Test 2: with a vector provided --
+ vector = gu.Vector(self.everest_outlines_path)
+
+ # With default options (boundary geometry)
+ raster1.proximity(vector=vector)
+
+ # With the base geometry
+ raster1.proximity(vector=vector, geometry_type="geometry")
+
+ # With another geometry option
+ raster1.proximity(vector=vector, geometry_type="centroid")
+
+ # With only inside proximity
+ raster1.proximity(vector=vector, in_or_out="in")
+
+ # Paths to example data
+
+ # Mask without nodata
+ mask_landsat_b4 = gu.Raster(landsat_b4_path) > 125
+ # Mask with nodata
+ mask_aster_dem = gu.Raster(aster_dem_path) > 2000
+ # Mask from an outline
+ mask_everest = gu.Vector(everest_outlines_path).create_mask(gu.Raster(landsat_b4_path))
+
+ @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
+ def test_proximity_mask(self, mask: gu.Mask) -> None:
+ mask_orig = mask.copy()
+ # Run default
+ rast = mask.proximity()
+ # Check the dtype of the original mask was properly reconverted
+ assert mask.data.dtype == bool
+ # Check the original mask was not modified during reprojection
+ assert mask_orig.raster_equal(mask)
+
+ # Check that output is cast back into a raster
+ assert isinstance(rast, gu.Raster)
+ # A mask is a raster, so also need to check this
+ assert not isinstance(rast, gu.Mask)
diff --git a/tests/test_pointcloud.py b/tests/test_interface/test_gridding.py
similarity index 98%
rename from tests/test_pointcloud.py
rename to tests/test_interface/test_gridding.py
index 484f8eee..35371f25 100644
--- a/tests/test_pointcloud.py
+++ b/tests/test_interface/test_gridding.py
@@ -7,7 +7,7 @@
from shapely import geometry
from geoutils import Raster
-from geoutils.pointcloud import _grid_pointcloud
+from geoutils.interface.gridding import _grid_pointcloud
class TestPointCloud:
diff --git a/tests/test_raster/test_interpolate.py b/tests/test_interface/test_interpolate.py
similarity index 99%
rename from tests/test_raster/test_interpolate.py
rename to tests/test_interface/test_interpolate.py
index d2c743d7..87e5c126 100644
--- a/tests/test_raster/test_interpolate.py
+++ b/tests/test_interface/test_interpolate.py
@@ -11,13 +11,13 @@
import geoutils as gu
from geoutils import examples
-from geoutils.projtools import reproject_to_latlon
-from geoutils.raster.interpolate import (
+from geoutils.interface.interpolate import (
_get_dist_nodata_spread,
_interp_points,
_interpn_interpolator,
method_to_order,
)
+from geoutils.projtools import reproject_to_latlon
class TestInterpolate:
@@ -81,7 +81,7 @@ def test_interpn_interpolator_accuracy(
@pytest.mark.parametrize("tag_aop", [None, "Area", "Point"]) # type: ignore
@pytest.mark.parametrize("shift_aop", [True, False]) # type: ignore
- def test_interp_points__synthetic(self, tag_aop: str | None, shift_aop: bool) -> None:
+ def test_interp_points__synthetic(self, tag_aop: Literal["Area", "Point"] | None, shift_aop: bool) -> None:
"""
Test interp_points function with synthetic data:
diff --git a/tests/test_interface/test_raster_point.py b/tests/test_interface/test_raster_point.py
new file mode 100644
index 00000000..3b63bb59
--- /dev/null
+++ b/tests/test_interface/test_raster_point.py
@@ -0,0 +1,238 @@
+"""Tests for raster-point interfacing."""
+
+from __future__ import annotations
+
+import re
+
+import numpy as np
+import pytest
+import rasterio as rio
+
+import geoutils as gu
+from geoutils import examples
+
+
+class TestRasterPointInterface:
+
+ # Paths to example data
+ landsat_b4_path = examples.get_path("everest_landsat_b4")
+ landsat_rgb_path = examples.get_path("everest_landsat_rgb")
+ aster_dem_path = examples.get_path("exploradores_aster_dem")
+
+ def test_to_pointcloud(self) -> None:
+ """Test to_pointcloud method."""
+
+ # 1/ Single band synthetic data
+
+ # Create a small raster to test point sampling on
+ img_arr = np.arange(25, dtype="int32").reshape(5, 5)
+ img0 = gu.Raster.from_array(img_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326)
+
+ # Sample the whole raster (fraction==1)
+ points = img0.to_pointcloud()
+ points_arr = img0.to_pointcloud(as_array=True)
+
+ # Check output types
+ assert isinstance(points, gu.Vector)
+ assert isinstance(points_arr, np.ndarray)
+
+ # Check that both outputs (array or vector) are fully consistent, order matters here
+ assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0])
+ assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1])
+ assert np.array_equal(points.ds["b1"].values, points_arr[:, 2])
+
+ # Validate that 25 points were sampled (equating to img1.height * img1.width) with x, y, and band0 values.
+ assert points_arr.shape == (25, 3)
+ assert points.ds.shape == (25, 2) # One less column here due to geometry storing X and Y
+ # Check that X, Y and Z arrays are equal to raster array input independently of value order
+ x_coords, y_coords = img0.ij2xy(i=np.arange(0, 5), j=np.arange(0, 5))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5)))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5)))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_arr.ravel()))
+
+ # Check that subsampling works properly
+ points_arr = img0.to_pointcloud(subsample=0.2, as_array=True)
+ assert points_arr.shape == (5, 3)
+
+ # All values should be between 0 and 25
+ assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25)
+
+ # 2/ Multi-band synthetic data
+ img_arr = np.arange(25, dtype="int32").reshape(5, 5)
+ img_3d_arr = np.stack((img_arr, 25 + img_arr, 50 + img_arr), axis=0)
+ img3d = gu.Raster.from_array(img_3d_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326)
+
+ # Sample the whole raster (fraction==1)
+ points = img3d.to_pointcloud(auxiliary_data_bands=[2, 3])
+ points_arr = img3d.to_pointcloud(as_array=True, auxiliary_data_bands=[2, 3])
+
+ # Check equality between both output types
+ assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0])
+ assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1])
+ assert np.array_equal(points.ds["b1"].values, points_arr[:, 2])
+ assert np.array_equal(points.ds["b2"].values, points_arr[:, 3])
+ assert np.array_equal(points.ds["b3"].values, points_arr[:, 4])
+
+ # Check it is the right data
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5)))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5)))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_3d_arr[0, :, :].ravel()))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 3])), np.sort(img_3d_arr[1, :, :].ravel()))
+ assert np.array_equal(np.sort(np.asarray(points_arr[:, 4])), np.sort(img_3d_arr[2, :, :].ravel()))
+
+ # With a subsample
+ points_arr = img3d.to_pointcloud(as_array=True, subsample=10, auxiliary_data_bands=[2, 3])
+ assert points_arr.shape == (10, 5)
+
+ # Check the values are still good
+ assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25)
+ assert all(25 <= points_arr[:, 3]) and all(points_arr[:, 3] < 50)
+ assert all(50 <= points_arr[:, 4]) and all(points_arr[:, 4] < 75)
+
+ # 3/ Single-band real raster with nodata values
+ img1 = gu.Raster(self.aster_dem_path)
+
+ # Get a large sample to ensure they should be some NaNs normally
+ points_arr = img1.to_pointcloud(subsample=10000, as_array=True, random_state=42)
+ points = img1.to_pointcloud(subsample=10000, random_state=42)
+
+ # This should not load the image
+ assert not img1.is_loaded
+
+ # The subsampled values should be valid and the right shape
+ assert points_arr.shape == (10000, 3)
+ assert points.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y
+ assert all(np.isfinite(points_arr[:, 2]))
+
+ # The output should respect the default band naming and the input CRS
+ assert np.array_equal(points.ds.columns, ["b1", "geometry"])
+ assert points.crs == img1.crs
+
+ # Try setting the band name
+ points = img1.to_pointcloud(data_column_name="lol", subsample=10)
+ assert np.array_equal(points.ds.columns, ["lol", "geometry"])
+
+ # Keeping the nodata values
+ points_invalid = img1.to_pointcloud(subsample=10000, random_state=42, skip_nodata=False)
+
+ # The subsampled values should not all be valid and the right shape
+ assert points_invalid.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y
+ assert any(~np.isfinite(points_invalid["b1"].values))
+
+ # 4/ Multi-band real raster
+ img2 = gu.Raster(self.landsat_rgb_path)
+
+ # By default only loads a single band without loading
+ points_arr = img2.to_pointcloud(subsample=10, as_array=True)
+ points = img2.to_pointcloud(subsample=10)
+
+ assert points_arr.shape == (10, 3)
+ assert points.ds.shape == (10, 2) # One less column here due to geometry storing X and Y
+ assert not img2.is_loaded
+
+ # Storing auxiliary bands
+ points_arr = img2.to_pointcloud(subsample=10, as_array=True, auxiliary_data_bands=[2, 3])
+ points = img2.to_pointcloud(subsample=10, auxiliary_data_bands=[2, 3])
+ assert points_arr.shape == (10, 5)
+ assert points.ds.shape == (10, 4) # One less column here due to geometry storing X and Y
+ assert not img2.is_loaded
+ assert np.array_equal(points.ds.columns, ["b1", "b2", "b3", "geometry"])
+
+ # Try setting the column name of a specific band while storing all
+ points = img2.to_pointcloud(subsample=10, data_column_name="yes", data_band=2, auxiliary_data_bands=[1, 3])
+ assert np.array_equal(points.ds.columns, ["yes", "b1", "b3", "geometry"])
+
+ # 5/ Error raising
+ with pytest.raises(ValueError, match="Data column name must be a string.*"):
+ img1.to_pointcloud(data_column_name=1) # type: ignore
+ with pytest.raises(
+ ValueError,
+ match=re.escape("Data band number must be an integer between 1 and the total number of bands (3)."),
+ ):
+ img2.to_pointcloud(data_band=4)
+ with pytest.raises(
+ ValueError, match="Passing auxiliary column names requires passing auxiliary data band numbers as well."
+ ):
+ img2.to_pointcloud(auxiliary_column_names=["a"])
+ with pytest.raises(
+ ValueError, match="Auxiliary data band number must be an iterable containing only integers."
+ ):
+ img2.to_pointcloud(auxiliary_data_bands=[1, 2.5]) # type: ignore
+ img2.to_pointcloud(auxiliary_data_bands="lol") # type: ignore
+ with pytest.raises(
+ ValueError,
+ match=re.escape("Auxiliary data band numbers must be between 1 and the total number of bands (3)."),
+ ):
+ img2.to_pointcloud(auxiliary_data_bands=[0])
+ img2.to_pointcloud(auxiliary_data_bands=[4])
+ with pytest.raises(
+ ValueError, match=re.escape("Main data band 1 should not be listed in auxiliary data bands [1, 2].")
+ ):
+ img2.to_pointcloud(auxiliary_data_bands=[1, 2])
+ with pytest.raises(ValueError, match="Auxiliary column names must be an iterable containing only strings."):
+ img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", 1])
+ with pytest.raises(
+ ValueError, match="Length of auxiliary column name and data band numbers should be the same*"
+ ):
+ img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", "lol2", "lol3"])
+
+ def test_from_pointcloud(self) -> None:
+ """Test from_pointcloud method."""
+
+ # 1/ Create a small raster to test point sampling on
+ shape = (5, 5)
+ nodata = 100
+ img_arr = np.arange(np.prod(shape), dtype="int32").reshape(shape)
+ transform = rio.transform.from_origin(0, 5, 1, 1)
+ img1 = gu.Raster.from_array(img_arr, transform=transform, crs=4326, nodata=nodata)
+
+ # Check both inputs work (grid coords or transform+shape) on a subsample
+ pc1 = img1.to_pointcloud(subsample=10)
+ img1_sub = gu.Raster.from_pointcloud_regular(pc1, transform=transform, shape=shape)
+
+ grid_coords1 = img1.coords(grid=False)
+ img1_sub2 = gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1)
+
+ assert img1_sub.raster_equal(img1_sub2)
+
+ # Check that number of valid values are equal to point cloud size
+ assert np.count_nonzero(~img1_sub.data.mask) == 10
+
+ # With no subsampling, should get the exact same raster back
+ pc1_full = img1.to_pointcloud()
+ img1_full = gu.Raster.from_pointcloud_regular(pc1_full, transform=transform, shape=shape, nodata=nodata)
+ assert img1.raster_equal(img1_full, warn_failure_reason=True)
+
+ # 2/ Single-band real raster with nodata values
+ img2 = gu.Raster(self.aster_dem_path)
+ nodata = img2.nodata
+ transform = img2.transform
+ shape = img2.shape
+
+ # Check both inputs work (grid coords or transform+shape) on a subsample
+ pc2 = img2.to_pointcloud(subsample=10000, random_state=42)
+ img2_sub = gu.Raster.from_pointcloud_regular(pc2, transform=transform, shape=shape, nodata=nodata)
+
+ grid_coords2 = img2.coords(grid=False)
+ img2_sub2 = gu.Raster.from_pointcloud_regular(pc2, grid_coords=grid_coords2, nodata=nodata)
+
+ assert img2_sub.raster_equal(img2_sub2, warn_failure_reason=True)
+
+ # Check that number of valid values are equal to point cloud size
+ assert np.count_nonzero(~img2_sub.data.mask) == 10000
+
+ # With no subsampling, should get the exact same raster back
+ pc2_full = img2.to_pointcloud()
+ img2_full = gu.Raster.from_pointcloud_regular(pc2_full, transform=transform, shape=shape, nodata=nodata)
+ assert img2.raster_equal(img2_full, warn_failure_reason=True, strict_masked=False)
+
+ # 3/ Error raising
+ with pytest.raises(TypeError, match="Input grid coordinates must be 1D arrays.*"):
+ gu.Raster.from_pointcloud_regular(pc1, grid_coords=(1, "lol")) # type: ignore
+ with pytest.raises(ValueError, match="Grid coordinates must be regular*"):
+ grid_coords1[0][0] += 1
+ gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) # type: ignore
+ with pytest.raises(
+ ValueError, match="Either grid coordinates or both geotransform and shape must be provided."
+ ):
+ gu.Raster.from_pointcloud_regular(pc1)
diff --git a/tests/test_interface/test_raster_vector.py b/tests/test_interface/test_raster_vector.py
new file mode 100644
index 00000000..54005fcd
--- /dev/null
+++ b/tests/test_interface/test_raster_vector.py
@@ -0,0 +1,257 @@
+"""Tests for raster-vector interfacing."""
+
+from __future__ import annotations
+
+import warnings
+
+import geopandas as gpd
+import numpy as np
+import pytest
+from scipy.ndimage import binary_erosion
+from shapely import LineString, MultiLineString, MultiPolygon, Polygon
+
+import geoutils as gu
+from geoutils import examples
+
+GLACIER_OUTLINES_URL = "http://public.data.npolar.no/cryoclim/CryoClim_GAO_SJ_1990.zip"
+
+
+class TestRasterVectorInterface:
+
+ # Create a synthetic vector file with a square of size 1, started at position (10, 10)
+ poly1 = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)])
+ gdf = gpd.GeoDataFrame({"geometry": [poly1]}, crs="EPSG:4326")
+ vector = gu.Vector(gdf)
+
+ # Same with a square started at position (5, 5)
+ poly2 = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)])
+ gdf = gpd.GeoDataFrame({"geometry": [poly2]}, crs="EPSG:4326")
+ vector2 = gu.Vector(gdf)
+
+ # Create a multipolygon with both
+ multipoly = MultiPolygon([poly1, poly2])
+ gdf = gpd.GeoDataFrame({"geometry": [multipoly]}, crs="EPSG:4326")
+ vector_multipoly = gu.Vector(gdf)
+
+ # Create a synthetic vector file with a square of size 5, started at position (8, 8)
+ poly3 = Polygon([(8, 8), (13, 8), (13, 13), (8, 13)])
+ gdf = gpd.GeoDataFrame({"geometry": [poly3]}, crs="EPSG:4326")
+ vector_5 = gu.Vector(gdf)
+
+ # Create a synthetic LineString geometry
+ lines = LineString([(10, 10), (11, 10), (11, 11)])
+ gdf = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326")
+ vector_lines = gu.Vector(gdf)
+
+ # Create a synthetic MultiLineString geometry
+ multilines = MultiLineString([[(10, 10), (11, 10), (11, 11)], [(5, 5), (6, 5), (6, 6)]])
+ gdf = gpd.GeoDataFrame({"geometry": [multilines]}, crs="EPSG:4326")
+ vector_multilines = gu.Vector(gdf)
+
+ def test_create_mask(self) -> None:
+ """
+ Test Vector.create_mask.
+ """
+ # First with given res and bounds -> Should be a 21 x 21 array with 0 everywhere except center pixel
+ vector = self.vector.copy()
+ out_mask = vector.create_mask(xres=1, bounds=(0, 0, 21, 21), as_array=True)
+ ref_mask = np.zeros((21, 21), dtype="bool")
+ ref_mask[10, 10] = True
+ assert out_mask.shape == (21, 21)
+ assert np.all(ref_mask == out_mask)
+
+ # Check that vector has not been modified by accident
+ assert vector.bounds == self.vector.bounds
+ assert len(vector.ds) == len(self.vector.ds)
+ assert vector.crs == self.vector.crs
+
+ # Then with a gu.Raster as reference, single band
+ rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326")
+ out_mask = vector.create_mask(rst, as_array=True)
+ assert out_mask.shape == (21, 21)
+
+ # With gu.Raster, 2 bands -> fails...
+ # rst = gu.Raster.from_array(np.zeros((2, 21, 21)), transform=(1., 0., 0., 0., -1., 21.), crs='EPSG:4326')
+ # out_mask = vector.create_mask(rst)
+
+ # Test that buffer = 0 works
+ out_mask_buff = vector.create_mask(rst, buffer=0, as_array=True)
+ assert np.all(ref_mask == out_mask_buff)
+
+ # Test that buffer > 0 works
+ rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326")
+ out_mask = vector.create_mask(rst, as_array=True)
+ for buffer in np.arange(1, 8):
+ out_mask_buff = vector.create_mask(rst, buffer=buffer, as_array=True)
+ diff = out_mask_buff & ~out_mask
+ assert np.count_nonzero(diff) > 0
+ # Difference between masks should always be thinner than buffer + 1
+ eroded_diff = binary_erosion(diff.squeeze(), np.ones((buffer + 1, buffer + 1)))
+ assert np.count_nonzero(eroded_diff) == 0
+
+ # Test that buffer < 0 works
+ vector_5 = self.vector_5
+ out_mask = vector_5.create_mask(rst, as_array=True)
+ for buffer in np.arange(-1, -3, -1):
+ out_mask_buff = vector_5.create_mask(rst, buffer=buffer, as_array=True)
+ diff = ~out_mask_buff & out_mask
+ assert np.count_nonzero(diff) > 0
+ # Difference between masks should always be thinner than buffer + 1
+ eroded_diff = binary_erosion(diff.squeeze(), np.ones((abs(buffer) + 1, abs(buffer) + 1)))
+ assert np.count_nonzero(eroded_diff) == 0
+
+ # Check that no warning is raised when creating a mask with a xres not multiple of vector bounds
+ mask = vector.create_mask(xres=1.01)
+
+ # Check that by default, create_mask returns a Mask
+ assert isinstance(mask, gu.Mask)
+
+ # Check that an error is raised if xres is not passed
+ with pytest.raises(ValueError, match="At least raster or xres must be set."):
+ vector.create_mask()
+
+ # Check that an error is raised if buffer is the wrong type
+ with pytest.raises(TypeError, match="Buffer must be a number, currently set to str."):
+ vector.create_mask(rst, buffer="lol") # type: ignore
+
+ # If the raster has the wrong type
+ with pytest.raises(TypeError, match="Raster must be a geoutils.Raster or None."):
+ vector.create_mask("lol") # type: ignore
+
+ # Check that a warning is raised if the bounds were passed specifically by the user
+ with pytest.warns(UserWarning):
+ vector.create_mask(xres=1.01, bounds=(0, 0, 21, 21))
+
+ landsat_b4_path = examples.get_path("everest_landsat_b4")
+ landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped")
+ everest_outlines_path = gu.examples.get_path("everest_rgi_outlines")
+ aster_dem_path = gu.examples.get_path("exploradores_aster_dem")
+ aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines")
+ glacier_outlines = gu.Vector(GLACIER_OUTLINES_URL)
+
+ def test_rasterize_proj(self) -> None:
+ # Capture the warning on resolution not matching exactly bounds
+ with pytest.warns(UserWarning):
+ burned = self.glacier_outlines.rasterize(xres=3000)
+
+ assert burned.shape[0] == 146
+ assert burned.shape[1] == 115
+
+ def test_rasterize_unproj(self) -> None:
+ """Test rasterizing an EPSG:3426 dataset into a projection."""
+
+ vct = gu.Vector(self.everest_outlines_path)
+ rst = gu.Raster(self.landsat_b4_crop_path)
+
+ # Use Web Mercator at 30 m.
+ # Capture the warning on resolution not matching exactly bounds
+ with pytest.warns(UserWarning):
+ burned = vct.rasterize(xres=30, crs=3857)
+
+ assert burned.shape[0] == 1251
+ assert burned.shape[1] == 1522
+
+ # Typically, rasterize returns a raster
+ burned_in2_out1 = vct.rasterize(raster=rst, in_value=2, out_value=1)
+ assert isinstance(burned_in2_out1, gu.Raster)
+
+ # For an in_value of 1 and out_value of 0 (default), it returns a mask
+ burned_mask = vct.rasterize(raster=rst, in_value=1)
+ assert isinstance(burned_mask, gu.Mask)
+
+ # Check that rasterizing with in_value=1 is the same as creating a mask
+ assert burned_mask.raster_equal(vct.create_mask(raster=rst))
+
+ # The two rasterization should match
+ assert np.all(burned_in2_out1[burned_mask] == 2)
+ assert np.all(burned_in2_out1[~burned_mask] == 1)
+
+ # Check that errors are raised
+ with pytest.raises(ValueError, match="Only one of raster or crs can be provided."):
+ vct.rasterize(raster=rst, crs=3857)
+
+ @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore
+ def test_polygonize(self, example: str) -> None:
+ """Test that polygonize doesn't raise errors."""
+
+ img = gu.Raster(example)
+
+ # -- Test 1: basic functioning of polygonize --
+
+ # Get unique value for image and the corresponding area
+ value = np.unique(img)[0]
+ pixel_area = np.count_nonzero(img.data == value) * img.res[0] * img.res[1]
+
+ # Polygonize the raster for this value, and compute the total area
+ polygonized = img.polygonize(target_values=value)
+ polygon_area = polygonized.ds.area.sum()
+
+ # Check that these two areas are approximately equal
+ assert polygon_area == pytest.approx(pixel_area)
+ assert isinstance(polygonized, gu.Vector)
+ assert polygonized.crs == img.crs
+
+ # Check default name of data column, and that defining a custom name works the same
+ assert "id" in polygonized.ds.columns
+ polygonized2 = img.polygonize(target_values=value, data_column_name="myname")
+ assert "myname" in polygonized2.ds.columns
+ assert np.array_equal(polygonized2.ds["myname"].values, polygonized.ds["id"].values)
+
+ # -- Test 2: data types --
+
+ # Check that polygonize works as expected for any input dtype (e.g. float64 being not supported by GeoPandas)
+ for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64"]:
+ img_dtype = img.copy()
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", category=UserWarning, message="dtype conversion will result in a " "loss of information.*"
+ )
+ warnings.filterwarnings(
+ "ignore",
+ category=UserWarning,
+ message="Unmasked values equal to the nodata value found in data array.*",
+ )
+ img_dtype = img_dtype.astype(dtype)
+ value = np.unique(img_dtype)[0]
+ img_dtype.polygonize(target_values=value)
+
+ # And for a boolean object, such as a mask
+ mask = img > value
+ mask.polygonize(target_values=1)
+
+
+class TestMaskVectorInterface:
+
+ # Paths to example data
+ landsat_b4_path = examples.get_path("everest_landsat_b4")
+ landsat_rgb_path = examples.get_path("everest_landsat_rgb")
+ everest_outlines_path = examples.get_path("everest_rgi_outlines")
+ aster_dem_path = examples.get_path("exploradores_aster_dem")
+
+ # Mask without nodata
+ mask_landsat_b4 = gu.Raster(landsat_b4_path) > 125
+ # Mask with nodata
+ mask_aster_dem = gu.Raster(aster_dem_path) > 2000
+ # Mask from an outline
+ mask_everest = gu.Vector(everest_outlines_path).create_mask(gu.Raster(landsat_b4_path))
+
+ @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
+ def test_polygonize(self, mask: gu.Mask) -> None:
+ mask_orig = mask.copy()
+ # Run default
+ vect = mask.polygonize()
+ # Check the dtype of the original mask was properly reconverted
+ assert mask.data.dtype == bool
+ # Check the original mask was not modified during polygonizing
+ assert mask_orig.raster_equal(mask)
+
+ # Check the output is cast into a vector
+ assert isinstance(vect, gu.Vector)
+
+ # Run with zero as target
+ vect = mask.polygonize(target_values=0)
+ assert isinstance(vect, gu.Vector)
+
+ # Check a warning is raised when using a non-boolean value
+ with pytest.warns(UserWarning, match="In-value converted to 1 for polygonizing boolean mask."):
+ mask.polygonize(target_values=2)
diff --git a/tests/test_pointcloud/test_pointcloud.py b/tests/test_pointcloud/test_pointcloud.py
new file mode 100644
index 00000000..ec26e5d3
--- /dev/null
+++ b/tests/test_pointcloud/test_pointcloud.py
@@ -0,0 +1 @@
+"""Test for future PointCloud class."""
diff --git a/tests/test_projtools.py b/tests/test_projtools.py
index a10baa7e..b3157210 100644
--- a/tests/test_projtools.py
+++ b/tests/test_projtools.py
@@ -1,6 +1,7 @@
"""
Test projtools
"""
+
import os.path
import geopandas as gpd
diff --git a/tests/test_raster/test_array.py b/tests/test_raster/test_array.py
index 54a6c876..1e02c612 100644
--- a/tests/test_raster/test_array.py
+++ b/tests/test_raster/test_array.py
@@ -1,4 +1,5 @@
"""Test array tools."""
+
from __future__ import annotations
import warnings
@@ -8,6 +9,11 @@
import rasterio as rio
import geoutils as gu
+from geoutils.raster.array import (
+ _get_array_and_mask,
+ _get_valid_extent,
+ _get_xy_rotated,
+)
class TestArray:
@@ -15,7 +21,7 @@ class TestArray:
@pytest.mark.parametrize(
"mask_and_viewable",
[
- (None, True), # An ndarray with no mask should support views
+ (None, True), # A ndarray with no mask should support views
(False, True), # A masked array with an empty mask should support views
([True, False, False, False], False), # A masked array with an occupied mask should not support views.
([False, False, False, False], True), # A masked array with an empty occupied mask should support views.
@@ -53,13 +59,13 @@ def test_get_array_and_mask(
# Validate that incorrect shapes raise the correct error.
if not check_should_pass:
with pytest.raises(ValueError, match="Invalid array shape given"):
- gu.raster.get_array_and_mask(array, check_shape=True)
+ _get_array_and_mask(array, check_shape=True)
# Stop the test here as the failure is now validated.
return
# Get a copy of the array and check its shape (it should always pass at this point)
- arr, _ = gu.raster.get_array_and_mask(array, copy=True, check_shape=True)
+ arr, _ = _get_array_and_mask(array, copy=True, check_shape=True)
# Validate that the array is a copy
assert not np.shares_memory(arr, array)
@@ -76,7 +82,7 @@ def test_get_array_and_mask(
warnings.simplefilter("always")
# Try to create a view.
- arr_view, mask = gu.raster.get_array_and_mask(array, copy=False)
+ arr_view, mask = _get_array_and_mask(array, copy=False)
# If it should be possible, validate that there were no warnings.
if view_should_be_possible:
@@ -102,21 +108,21 @@ def test_get_valid_extent(self) -> None:
# For no invalid values, the function should return the edges
# For the array
- assert (0, 4, 0, 4) == gu.raster.get_valid_extent(arr)
+ assert (0, 4, 0, 4) == _get_valid_extent(arr)
# For the masked-array
- assert (0, 4, 0, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 4, 0, 4) == _get_valid_extent(mask_ma)
# 1/ First column:
# If we mask it in the masked array
mask_ma[0, :] = np.ma.masked
- assert (1, 4, 0, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (1, 4, 0, 4) == _get_valid_extent(mask_ma)
# If we changed the array to NaNs
arr[0, :] = np.nan
- assert (1, 4, 0, 4) == gu.raster.get_valid_extent(arr)
+ assert (1, 4, 0, 4) == _get_valid_extent(arr)
mask_ma.data[0, :] = np.nan
mask_ma.mask = False
- assert (1, 4, 0, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (1, 4, 0, 4) == _get_valid_extent(mask_ma)
# 2/ First row:
arr = np.ones(shape=(5, 5))
@@ -124,14 +130,14 @@ def test_get_valid_extent(self) -> None:
mask_ma = np.ma.masked_array(data=arr, mask=arr_mask)
# If we mask it in the masked array
mask_ma[:, 0] = np.ma.masked
- assert (0, 4, 1, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 4, 1, 4) == _get_valid_extent(mask_ma)
# If we changed the array to NaNs
arr[:, 0] = np.nan
- assert (0, 4, 1, 4) == gu.raster.get_valid_extent(arr)
+ assert (0, 4, 1, 4) == _get_valid_extent(arr)
mask_ma.data[:, 0] = np.nan
mask_ma.mask = False
- assert (0, 4, 1, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 4, 1, 4) == _get_valid_extent(mask_ma)
# 3/ Last column:
arr = np.ones(shape=(5, 5))
@@ -140,14 +146,14 @@ def test_get_valid_extent(self) -> None:
# If we mask it in the masked array
mask_ma[-1, :] = np.ma.masked
- assert (0, 3, 0, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 3, 0, 4) == _get_valid_extent(mask_ma)
# If we changed the array to NaNs
arr[-1, :] = np.nan
- assert (0, 3, 0, 4) == gu.raster.get_valid_extent(arr)
+ assert (0, 3, 0, 4) == _get_valid_extent(arr)
mask_ma.data[-1, :] = np.nan
mask_ma.mask = False
- assert (0, 3, 0, 4) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 3, 0, 4) == _get_valid_extent(mask_ma)
# 4/ Last row:
arr = np.ones(shape=(5, 5))
@@ -156,14 +162,14 @@ def test_get_valid_extent(self) -> None:
# If we mask it in the masked array
mask_ma[:, -1] = np.ma.masked
- assert (0, 4, 0, 3) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 4, 0, 3) == _get_valid_extent(mask_ma)
# If we changed the array to NaNs
arr[:, -1] = np.nan
- assert (0, 4, 0, 3) == gu.raster.get_valid_extent(arr)
+ assert (0, 4, 0, 3) == _get_valid_extent(arr)
mask_ma.data[:, -1] = np.nan
mask_ma.mask = False
- assert (0, 4, 0, 3) == gu.raster.get_valid_extent(mask_ma)
+ assert (0, 4, 0, 3) == _get_valid_extent(mask_ma)
def test_get_xy_rotated(self) -> None:
"""Check the function to rotate array."""
@@ -178,27 +184,27 @@ def test_get_xy_rotated(self) -> None:
xx, yy = r1.coords(grid=True, force_offset="ll")
# Rotating the coordinates 90 degrees should be the same as rotating the array
- xx90, yy90 = gu.raster.get_xy_rotated(r1, along_track_angle=90)
+ xx90, yy90 = _get_xy_rotated(r1, along_track_angle=90)
assert np.allclose(np.rot90(xx90), xx)
assert np.allclose(np.rot90(yy90), yy)
# Same for 180 degrees
- xx180, yy180 = gu.raster.get_xy_rotated(r1, along_track_angle=180)
+ xx180, yy180 = _get_xy_rotated(r1, along_track_angle=180)
assert np.allclose(np.rot90(xx180, k=2), xx)
assert np.allclose(np.rot90(yy180, k=2), yy)
# Same for 270 degrees
- xx270, yy270 = gu.raster.get_xy_rotated(r1, along_track_angle=270)
+ xx270, yy270 = _get_xy_rotated(r1, along_track_angle=270)
assert np.allclose(np.rot90(xx270, k=3), xx)
assert np.allclose(np.rot90(yy270, k=3), yy)
# 360 degrees should get us back on our feet
- xx360, yy360 = gu.raster.get_xy_rotated(r1, along_track_angle=360)
+ xx360, yy360 = _get_xy_rotated(r1, along_track_angle=360)
assert np.allclose(xx360, xx)
assert np.allclose(yy360, yy)
# Test that the values make sense for 45 degrees
- xx45, yy45 = gu.raster.get_xy_rotated(r1, along_track_angle=45)
+ xx45, yy45 = _get_xy_rotated(r1, along_track_angle=45)
# Should have zero on the upper left corner for xx
assert xx45[0, 0] == pytest.approx(0)
# Then a multiple of sqrt2 along each dimension
@@ -209,4 +215,4 @@ def test_get_xy_rotated(self) -> None:
# Finally, yy should be rotated by 90
assert np.allclose(np.rot90(xx45), yy45)
- xx, yy = gu.raster.get_xy_rotated(r1, along_track_angle=90)
+ xx, yy = _get_xy_rotated(r1, along_track_angle=90)
diff --git a/tests/test_raster/test_geotransformations.py b/tests/test_raster/test_geotransformations.py
new file mode 100644
index 00000000..becf2d5e
--- /dev/null
+++ b/tests/test_raster/test_geotransformations.py
@@ -0,0 +1,777 @@
+"""Test for geotransformations of raster objects."""
+
+from __future__ import annotations
+
+import re
+import warnings
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+import rasterio as rio
+
+import geoutils as gu
+from geoutils import examples
+from geoutils.raster.geotransformations import _resampling_method_from_str
+from geoutils.raster.raster import _default_nodata
+
+DO_PLOT = False
+
+
+class TestRasterGeotransformations:
+
+ landsat_b4_path = examples.get_path("everest_landsat_b4")
+ landsat_b4_crop_path = examples.get_path("everest_landsat_b4_cropped")
+ landsat_rgb_path = examples.get_path("everest_landsat_rgb")
+ everest_outlines_path = examples.get_path("everest_rgi_outlines")
+ aster_dem_path = examples.get_path("exploradores_aster_dem")
+ aster_outlines_path = examples.get_path("exploradores_rgi_outlines")
+
+ def test_resampling_str(self) -> None:
+ """Test that resampling methods can be given as strings instead of rio enums."""
+ warnings.simplefilter("error")
+ assert _resampling_method_from_str("nearest") == rio.enums.Resampling.nearest # noqa
+ assert _resampling_method_from_str("cubic_spline") == rio.enums.Resampling.cubic_spline # noqa
+
+ # Check that odd strings return the appropriate error.
+ try:
+ _resampling_method_from_str("CUBIC_SPLINE") # noqa
+ except ValueError as exception:
+ if "not a valid rasterio.enums.Resampling method" not in str(exception):
+ raise exception
+
+ img1 = gu.Raster(self.landsat_b4_path)
+ img2 = gu.Raster(self.landsat_b4_crop_path)
+ # Set img2 pixel interpretation as "Point" to match "img1" and avoid any warnings
+ img2.set_area_or_point("Point", shift_area_or_point=False)
+ img1.set_nodata(0)
+ img2.set_nodata(0)
+
+ # Resample the rasters using a new resampling method and see that the string and enum gives the same result.
+ img3a = img1.reproject(img2, resampling="q1")
+ img3b = img1.reproject(img2, resampling=rio.enums.Resampling.q1)
+ assert img3a.raster_equal(img3b)
+
+ test_data = [[landsat_b4_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]]
+
+ @pytest.mark.parametrize("data", test_data) # type: ignore
+ def test_crop(self, data: list[str]) -> None:
+ """Test for crop method, also called by square brackets through __getitem__"""
+
+ raster_path, outlines_path = data
+ r = gu.Raster(raster_path)
+
+ # -- Test with crop_geom being a list/tuple -- ##
+ crop_geom: list[float] = list(r.bounds)
+
+ # Test unloaded inplace cropping conserves the shape
+ r.crop(crop_geom=[crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]], inplace=True)
+ assert len(r.data.shape) == 2
+
+ r = gu.Raster(raster_path)
+
+ # Test with same bounds -> should be the same #
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert r_cropped.raster_equal(r)
+
+ # - Test cropping each side by a random integer of pixels - #
+ rng = np.random.default_rng(42)
+ rand_int = rng.integers(1, min(r.shape) - 1)
+
+ # Left
+ crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert list(r_cropped.bounds) == crop_geom2
+ assert np.array_equal(r.data[:, rand_int:].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[:, rand_int:].mask, r_cropped.data.mask)
+
+ # Right
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * r.res[0], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert list(r_cropped.bounds) == crop_geom2
+ assert np.array_equal(r.data[:, :-rand_int].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[:, :-rand_int].mask, r_cropped.data.mask)
+
+ # Bottom
+ crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(r.res[1]), crop_geom[2], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert list(r_cropped.bounds) == crop_geom2
+ assert np.array_equal(r.data[:-rand_int, :].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[:-rand_int, :].mask, r_cropped.data.mask)
+
+ # Top
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(r.res[1])]
+ r_cropped = r.crop(crop_geom2)
+ assert list(r_cropped.bounds) == crop_geom2
+ assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data, equal_nan=True)
+ assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask)
+
+ # Same but tuple
+ crop_geom3: tuple[float, float, float, float] = (
+ crop_geom[0],
+ crop_geom[1],
+ crop_geom[2],
+ crop_geom[3] - rand_int * r.res[0],
+ )
+ r_cropped = r.crop(crop_geom3)
+ assert list(r_cropped.bounds) == list(crop_geom3)
+ assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask)
+
+ # -- Test with crop_geom being a Raster -- #
+ r_cropped2 = r.crop(r_cropped)
+ assert r_cropped2.raster_equal(r_cropped)
+
+ # Check that bound reprojection is done automatically if the CRS differ
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*")
+
+ r_cropped_reproj = r_cropped.reproject(crs=3857)
+ r_cropped3 = r.crop(r_cropped_reproj)
+
+ # Original CRS bounds can be deformed during transformation, but result should be equivalent to this
+ r_cropped4 = r.crop(crop_geom=r_cropped_reproj.get_bounds_projected(out_crs=r.crs))
+ assert r_cropped3.raster_equal(r_cropped4)
+
+ # -- Test with inplace=True -- #
+ r_copy = r.copy()
+ r_copy.crop(r_cropped, inplace=True)
+ assert r_copy.raster_equal(r_cropped)
+
+ # - Test cropping each side with a non integer pixel, mode='match_pixel' - #
+ rand_float = rng.integers(1, min(r.shape) - 1) + 0.25
+
+ # left
+ crop_geom2 = [crop_geom[0] + rand_float * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float)
+ assert np.array_equal(r.data[:, int(rand_float) :].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[:, int(rand_float) :].mask, r_cropped.data.mask)
+
+ # right
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_float * r.res[0], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float)
+ assert np.array_equal(r.data[:, : -int(rand_float)].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[:, : -int(rand_float)].mask, r_cropped.data.mask)
+
+ # bottom
+ crop_geom2 = [crop_geom[0], crop_geom[1] + rand_float * abs(r.res[1]), crop_geom[2], crop_geom[3]]
+ r_cropped = r.crop(crop_geom2)
+ assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float)
+ assert np.array_equal(r.data[: -int(rand_float), :].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[: -int(rand_float), :].mask, r_cropped.data.mask)
+
+ # top
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_float * abs(r.res[1])]
+ r_cropped = r.crop(crop_geom2)
+ assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float)
+ assert np.array_equal(r.data[int(rand_float) :, :].data, r_cropped.data.data, equal_nan=True)
+ assert np.array_equal(r.data[int(rand_float) :, :].mask, r_cropped.data.mask)
+
+ # -- Test with mode='match_extent' -- #
+ # Test all sides at once, with rand_float less than half the smallest extent
+ # The cropped extent should exactly match the requested extent, res will be changed accordingly
+ rand_float = rng.integers(1, min(r.shape) / 2 - 1) + 0.25
+ crop_geom2 = [
+ crop_geom[0] + rand_float * r.res[0],
+ crop_geom[1] + rand_float * abs(r.res[1]),
+ crop_geom[2] - rand_float * r.res[0],
+ crop_geom[3] - rand_float * abs(r.res[1]),
+ ]
+
+ # Filter warning about nodata not set in reprojection (because match_extent triggers reproject)
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*")
+ r_cropped = r.crop(crop_geom2, mode="match_extent")
+
+ assert list(r_cropped.bounds) == crop_geom2
+ # The change in resolution should be less than what would occur with +/- 1 pixel
+ assert np.all(
+ abs(np.array(r.res) - np.array(r_cropped.res)) < np.array(r.res) / np.array(r_cropped.shape)[::-1]
+ )
+
+ # Filter warning about nodata not set in reprojection (because match_extent triggers reproject)
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*")
+ r_cropped2 = r.crop(r_cropped, mode="match_extent")
+ assert r_cropped2.raster_equal(r_cropped)
+
+ # -- Test with crop_geom being a Vector -- #
+ outlines = gu.Vector(outlines_path)
+
+ # First, we reproject manually the outline
+ outlines_reproj = gu.Vector(outlines.ds.to_crs(r.crs))
+ r_cropped = r.crop(outlines_reproj)
+
+ # Calculate intersection of the two bounding boxes and make sure crop has same bounds
+ win_outlines = rio.windows.from_bounds(*outlines_reproj.bounds, transform=r.transform)
+ win_raster = rio.windows.from_bounds(*r.bounds, transform=r.transform)
+ final_window = win_outlines.intersection(win_raster).round_lengths().round_offsets()
+ new_bounds = rio.windows.bounds(final_window, transform=r.transform)
+ assert list(r_cropped.bounds) == list(new_bounds)
+
+ # Second, we check that bound reprojection is done automatically if the CRS differ
+ r_cropped2 = r.crop(outlines)
+ assert list(r_cropped2.bounds) == list(new_bounds)
+
+ # -- Test crop works as expected even if transform has been modified, e.g. through downsampling -- #
+ # Test that with downsampling, cropping to same bounds result in same raster
+ r = gu.Raster(raster_path, downsample=5)
+ r_test = r.crop(r.bounds)
+ assert r_test.raster_equal(r)
+
+ # - Test that cropping yields the same results whether data is loaded or not -
+ # With integer cropping (left)
+ rand_int = rng.integers(1, min(r.shape) - 1)
+ crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
+ r = gu.Raster(raster_path, downsample=5, load_data=False)
+ assert not r.is_loaded
+ r_crop_unloaded = r.crop(crop_geom2)
+ r.load()
+ r_crop_loaded = r.crop(crop_geom2)
+ # TODO: the following condition should be met once issue #447 is solved
+ # assert r_crop_unloaded.raster_equal(r_crop_loaded)
+ assert r_crop_unloaded.shape == r_crop_loaded.shape
+ assert r_crop_unloaded.transform == r_crop_loaded.transform
+
+ # With a float number of pixels added to the right, mode 'match_pixel'
+ rand_float = rng.integers(1, min(r.shape) - 1) + 0.25
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] + rand_float * r.res[0], crop_geom[3]]
+ r = gu.Raster(raster_path, downsample=5, load_data=False)
+ assert not r.is_loaded
+ r_crop_unloaded = r.crop(crop_geom2, mode="match_pixel")
+ r.load()
+ r_crop_loaded = r.crop(crop_geom2, mode="match_pixel")
+ # TODO: the following condition should be met once issue #447 is solved
+ # assert r_crop_unloaded.raster_equal(r_crop_loaded)
+ assert r_crop_unloaded.shape == r_crop_loaded.shape
+ assert r_crop_unloaded.transform == r_crop_loaded.transform
+
+ # - Check related to pixel interpretation -
+
+ # Check warning for a different area_or_point for the match-reference geometry works
+ r.set_area_or_point("Area", shift_area_or_point=False)
+ r2 = r.copy()
+ r2.set_area_or_point("Point", shift_area_or_point=False)
+
+ with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'):
+ r.crop(r2)
+
+ # Check that cropping preserves the interpretation
+ crop_geom = [crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
+ r_crop = r.crop(crop_geom)
+ assert r_crop.area_or_point == "Area"
+ r2_crop = r2.crop(crop_geom)
+ assert r2_crop.area_or_point == "Point"
+
+ @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path, landsat_rgb_path]) # type: ignore
+ def test_translate(self, example: str) -> None:
+ """Test translation works as intended"""
+
+ r = gu.Raster(example)
+
+ # Get original transform
+ orig_transform = r.transform
+ orig_bounds = r.bounds
+
+ # Shift raster by georeferenced units (default)
+ # Check the default behaviour is not inplace
+ r_notinplace = r.translate(xoff=1, yoff=1)
+ assert isinstance(r_notinplace, gu.Raster)
+
+ # Check inplace
+ r.translate(xoff=1, yoff=1, inplace=True)
+ # Both shifts should have yielded the same transform
+ assert r.transform == r_notinplace.transform
+
+ # Only bounds should change
+ assert orig_transform.c + 1 == r.transform.c
+ assert orig_transform.f + 1 == r.transform.f
+ for attr in ["a", "b", "d", "e"]:
+ assert getattr(orig_transform, attr) == getattr(r.transform, attr)
+
+ assert orig_bounds.left + 1 == r.bounds.left
+ assert orig_bounds.right + 1 == r.bounds.right
+ assert orig_bounds.bottom + 1 == r.bounds.bottom
+ assert orig_bounds.top + 1 == r.bounds.top
+
+ # Shift raster using pixel units
+ orig_transform = r.transform
+ orig_bounds = r.bounds
+ orig_res = r.res
+ r.translate(xoff=1, yoff=1, distance_unit="pixel", inplace=True)
+
+ # Only bounds should change
+ assert orig_transform.c + 1 * orig_res[0] == r.transform.c
+ assert orig_transform.f + 1 * orig_res[1] == r.transform.f
+ for attr in ["a", "b", "d", "e"]:
+ assert getattr(orig_transform, attr) == getattr(r.transform, attr)
+
+ assert orig_bounds.left + 1 * orig_res[0] == r.bounds.left
+ assert orig_bounds.right + 1 * orig_res[0] == r.bounds.right
+ assert orig_bounds.bottom + 1 * orig_res[1] == r.bounds.bottom
+ assert orig_bounds.top + 1 * orig_res[1] == r.bounds.top
+
+ # Check that an error is raised for a wrong distance_unit
+ with pytest.raises(ValueError, match="Argument 'distance_unit' should be either 'pixel' or 'georeferenced'."):
+ r.translate(xoff=1, yoff=1, distance_unit="wrong_value") # type: ignore
+
+ @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore
+ def test_reproject(self, example: str) -> None:
+ warnings.simplefilter("error")
+
+ # Reference raster to be used
+ r = gu.Raster(example)
+
+ # -- Check proper errors are raised if nodata are not set -- #
+ r_nodata = r.copy()
+ r_nodata.set_nodata(None)
+
+ # Make sure at least one pixel is masked for test 1
+ rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True)
+ r_nodata.data[rand_indices] = np.ma.masked
+ assert np.count_nonzero(r_nodata.data.mask) > 0
+
+ # make sure at least one pixel is set at default nodata for test
+ default_nodata = _default_nodata(r_nodata.dtype)
+ rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True)
+ r_nodata.data[rand_indices] = default_nodata
+ assert np.count_nonzero(r_nodata.data == default_nodata) > 0
+
+ # 1 - if no force_source_nodata is set and masked values exist, raises an error
+ with pytest.raises(
+ ValueError,
+ match=re.escape(
+ "No nodata set, set one for the raster with self.set_nodata() or use a "
+ "temporary one with `force_source_nodata`."
+ ),
+ ):
+ _ = r_nodata.reproject(res=r_nodata.res[0] / 2, nodata=0)
+
+ # 2 - if no nodata is set and default value conflicts with existing value, a warning is raised
+ with pytest.warns(
+ UserWarning,
+ match=re.escape(
+ f"For reprojection, nodata must be set. Default chosen value "
+ f"{_default_nodata(r_nodata.dtype)} exists in self.data. This may have unexpected "
+ f"consequences. Consider setting a different nodata with self.set_nodata()."
+ ),
+ ):
+ r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata)
+ assert r_test.nodata == default_nodata
+
+ # 3 - if default nodata does not conflict, should not raise a warning
+ r_nodata.data[r_nodata.data == default_nodata] = 3
+ r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata)
+ assert r_test.nodata == default_nodata
+
+ # -- Test setting each combination of georeferences bounds, res and size -- #
+
+ # specific for the landsat test case, default nodata 255 cannot be used (see above), so use 0
+ if r.nodata is None:
+ r.set_nodata(0)
+
+ # - Test size - this should modify the shape, and hence resolution, but not the bounds -
+ out_size = (r.shape[1] // 2, r.shape[0] // 2) # Outsize is (ncol, nrow)
+ r_test = r.reproject(grid_size=out_size)
+ assert r_test.shape == (out_size[1], out_size[0])
+ assert r_test.res != r.res
+ assert r_test.bounds == r.bounds
+
+ # - Test bounds -
+ # if bounds is a multiple of res, outptut res should be preserved
+ bounds = np.copy(r.bounds)
+ dst_bounds = rio.coords.BoundingBox(
+ left=bounds[0], bottom=bounds[1] + r.res[0], right=bounds[2] - 2 * r.res[1], top=bounds[3]
+ )
+ r_test = r.reproject(bounds=dst_bounds)
+ assert r_test.bounds == dst_bounds
+ assert r_test.res == r.res
+
+ # Create bounds with 1/2 and 1/3 pixel extra on the right/bottom.
+ bounds = np.copy(r.bounds)
+ dst_bounds = rio.coords.BoundingBox(
+ left=bounds[0], bottom=bounds[1] - r.res[0] / 3.0, right=bounds[2] + r.res[1] / 2.0, top=bounds[3]
+ )
+
+ # If bounds are not a multiple of res, the latter will be updated accordingly
+ r_test = r.reproject(bounds=dst_bounds)
+ assert r_test.bounds == dst_bounds
+ assert r_test.res != r.res
+
+ # - Test size and bounds -
+ r_test = r.reproject(grid_size=out_size, bounds=dst_bounds)
+ assert r_test.shape == (out_size[1], out_size[0])
+ assert r_test.bounds == dst_bounds
+
+ # - Test res -
+ # Using a single value, output res will be enforced, resolution will be different
+ res_single = r.res[0] * 2
+ r_test = r.reproject(res=res_single)
+ assert r_test.res == (res_single, res_single)
+ assert r_test.shape != r.shape
+
+ # Using a tuple
+ res_tuple = (r.res[0] * 0.5, r.res[1] * 4)
+ r_test = r.reproject(res=res_tuple)
+ assert r_test.res == res_tuple
+ assert r_test.shape != r.shape
+
+ # - Test res and bounds -
+ # Bounds will be enforced for upper-left pixel, but adjusted by up to one pixel for the lower right bound.
+ # for single res value
+ r_test = r.reproject(bounds=dst_bounds, res=res_single)
+ assert r_test.res == (res_single, res_single)
+ assert r_test.bounds.left == dst_bounds.left
+ assert r_test.bounds.top == dst_bounds.top
+ assert np.abs(r_test.bounds.right - dst_bounds.right) < res_single
+ assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_single
+
+ # For tuple
+ r_test = r.reproject(bounds=dst_bounds, res=res_tuple)
+ assert r_test.res == res_tuple
+ assert r_test.bounds.left == dst_bounds.left
+ assert r_test.bounds.top == dst_bounds.top
+ assert np.abs(r_test.bounds.right - dst_bounds.right) < res_tuple[0]
+ assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_tuple[1]
+
+ # - Test crs -
+ out_crs = rio.crs.CRS.from_epsg(4326)
+ r_test = r.reproject(crs=out_crs)
+ assert r_test.crs.to_epsg() == 4326
+
+ # -- Additional tests --
+ # First, make sure dst_bounds extend beyond current extent to create nodata
+ dst_bounds = rio.coords.BoundingBox(
+ left=bounds[0], bottom=bounds[1] - r.res[0], right=bounds[2] + 2 * r.res[1], top=bounds[3]
+ )
+ r_test = r.reproject(bounds=dst_bounds)
+ assert np.count_nonzero(r_test.data.mask) > 0
+
+ # If nodata falls outside the original image range, check range is preserved (with nearest interpolation)
+ r_float = r.astype("float32") # type: ignore
+ if (r_float.nodata < np.min(r_float)) or (r_float.nodata > np.max(r_float)):
+ r_test = r_float.reproject(bounds=dst_bounds, resampling="nearest")
+ assert r_test.nodata == r_float.nodata
+ assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0 # Some values should be set to nodata
+ assert np.min(r_test.data) == np.min(r_float.data) # But min and max should not be affected
+ assert np.max(r_test.data) == np.max(r_float.data)
+
+ # Check that nodata works as expected
+ r_test = r_float.reproject(bounds=dst_bounds, nodata=9999)
+ assert r_test.nodata == 9999
+ assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0
+
+ # Test that reproject works the same whether data is already loaded or not
+ assert r.is_loaded
+ r_test1 = r.reproject(crs=out_crs, nodata=0)
+ r_unload = gu.Raster(example, load_data=False)
+ assert not r_unload.is_loaded
+ r_test2 = r_unload.reproject(crs=out_crs, nodata=0)
+ assert r_test1.raster_equal(r_test2)
+
+ # Test that reproject does not fail with resolution as np.integer or np.float types, single value or tuple
+ astype_funcs = [int, np.int32, float, np.float64]
+ for astype_func in astype_funcs:
+ r.reproject(res=astype_func(20.5), nodata=0)
+ for i in range(len(astype_funcs)):
+ for j in range(len(astype_funcs)):
+ r.reproject(res=(astype_funcs[i](20.5), astype_funcs[j](10.5)), nodata=0)
+
+ # Test that reprojection works for several bands
+ for n in [2, 3, 4]:
+ img1 = gu.Raster.from_array(
+ np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(0, 500, 1, 1), crs=4326
+ )
+
+ img2 = gu.Raster.from_array(
+ np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(50, 500, 1, 1), crs=4326
+ )
+
+ out_img = img2.reproject(img1)
+ assert np.shape(out_img.data) == (n, 500, 500)
+ assert (out_img.count, *out_img.shape) == (n, 500, 500)
+
+ # Test that the rounding of resolution is correct for large decimal numbers
+ # (we take an example that used to fail, see issue #354 and #357)
+ data = np.ones((4759, 2453))
+ transform = rio.transform.Affine(
+ 24.12423878332849, 0.0, 238286.29553975424, 0.0, -24.12423878332849, 6995453.456051373
+ )
+ crs = rio.CRS.from_epsg(32633)
+ nodata = -9999.0
+ rst = gu.Raster.from_array(data=data, transform=transform, crs=crs, nodata=nodata)
+
+ rst_reproj = rst.reproject(bounds=rst.bounds, res=(20.0, 20.0))
+ # This used to be 19.999999999999999 due to floating point precision
+ assert rst_reproj.res == (20.0, 20.0)
+
+ # -- Test match reference functionalities --
+
+ # - Create 2 artificial rasters -
+ # for r2b, bounds are cropped to the upper left by an integer number of pixels (i.e. crop)
+ # for r2, resolution is also set to 2/3 the input res
+ min_size = min(r.shape)
+ rng = np.random.default_rng(42)
+ rand_int = rng.integers(min_size / 10, min(r.shape) - min_size / 10)
+ new_transform = rio.transform.from_origin(
+ r.bounds.left + rand_int * r.res[0], r.bounds.top - rand_int * abs(r.res[1]), r.res[0], r.res[1]
+ )
+
+ # data is cropped to the same extent
+ new_data = r.data[rand_int::, rand_int::]
+ r2b = gu.Raster.from_array(data=new_data, transform=new_transform, crs=r.crs, nodata=r.nodata)
+
+ # Create a raster with different resolution
+ dst_res = r.res[0] * 2 / 3
+ r2 = r2b.reproject(res=dst_res)
+ assert r2.res == (dst_res, dst_res)
+
+ # Assert the initial rasters are different
+ assert r.bounds != r2b.bounds
+ assert r.shape != r2b.shape
+ assert r.bounds != r2.bounds
+ assert r.shape != r2.shape
+ assert r.res != r2.res
+
+ # Test reprojecting with ref=r2b (i.e. crop) -> output should have same shape, bounds and data, i.e. be the
+ # same object
+ r3 = r.reproject(r2b)
+ assert r3.bounds == r2b.bounds
+ assert r3.shape == r2b.shape
+ assert r3.bounds == r2b.bounds
+ assert r3.transform == r2b.transform
+ assert np.array_equal(r3.data.data, r2b.data.data, equal_nan=True)
+ assert np.array_equal(r3.data.mask, r2b.data.mask)
+
+ if DO_PLOT:
+ fig1, ax1 = plt.subplots()
+ r.plot(ax=ax1, title="Raster 1")
+
+ fig2, ax2 = plt.subplots()
+ r2b.plot(ax=ax2, title="Raster 2")
+
+ fig3, ax3 = plt.subplots()
+ r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2")
+
+ plt.show()
+
+ # Test reprojecting with ref=r2 -> output should have same shape, bounds and transform
+ # Data should be slightly different due to difference in input resolution
+ r3 = r.reproject(r2)
+ assert r3.bounds == r2.bounds
+ assert r3.shape == r2.shape
+ assert r3.bounds == r2.bounds
+ assert r3.transform == r2.transform
+ assert not np.array_equal(r3.data.data, r2.data.data, equal_nan=True)
+
+ if DO_PLOT:
+ fig1, ax1 = plt.subplots()
+ r.plot(ax=ax1, title="Raster 1")
+
+ fig2, ax2 = plt.subplots()
+ r2.plot(ax=ax2, title="Raster 2")
+
+ fig3, ax3 = plt.subplots()
+ r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2")
+
+ plt.show()
+
+ # -- Check that if mask is modified afterwards, it is taken into account during reproject -- #
+ # Create a raster with (additional) random gaps
+ r_gaps = r.copy()
+ nsamples = 200
+ rand_indices = gu.raster.subsample_array(r_gaps.data, nsamples, return_indices=True)
+ r_gaps.data[rand_indices] = np.ma.masked
+ assert np.sum(r_gaps.data.mask) - np.sum(r.data.mask) == nsamples # sanity check
+
+ # reproject raster, and reproject mask. Check that both have same number of masked pixels
+ # TODO: should test other resampling algo
+ r_gaps_reproj = r_gaps.reproject(res=dst_res, resampling="nearest")
+ mask = gu.Raster.from_array(
+ r_gaps.data.mask.astype("uint8"), crs=r_gaps.crs, transform=r_gaps.transform, nodata=None
+ )
+ mask_reproj = mask.reproject(res=dst_res, nodata=255, resampling="nearest")
+ # Final masked pixels are those originally masked (=1) and the values masked during reproject, e.g. edges
+ tot_masked_true = np.count_nonzero(mask_reproj.data.mask) + np.count_nonzero(mask_reproj.data == 1)
+ assert np.count_nonzero(r_gaps_reproj.data.mask) == tot_masked_true
+
+ # If a nodata is set, make sure it is preserved
+ r_nodata = r.copy()
+
+ r_nodata.set_nodata(0)
+
+ r3 = r_nodata.reproject(r2)
+ assert r_nodata.nodata == r3.nodata
+
+ # -- Check inplace behaviour works -- #
+
+ # Check when transform is updated (via res)
+ r_tmp_res = r.copy()
+ r_res = r_tmp_res.reproject(res=r.res[0] / 2)
+ r_tmp_res.reproject(res=r.res[0] / 2, inplace=True)
+
+ assert r_res.raster_equal(r_tmp_res)
+
+ # Check when CRS is updated
+ r_tmp_crs = r.copy()
+ r_crs = r_tmp_crs.reproject(crs=out_crs)
+ r_tmp_crs.reproject(crs=out_crs, inplace=True)
+
+ assert r_crs.raster_equal(r_tmp_crs)
+
+ # -- Test additional errors raised for argument combinations -- #
+
+ # If both ref and crs are set
+ with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")):
+ _ = r.reproject(ref=r2, crs=r.crs)
+
+ # Size and res are mutually exclusive
+ with pytest.raises(ValueError, match=re.escape("size and res both specified. Specify only one.")):
+ _ = r.reproject(grid_size=(10, 10), res=50)
+
+ # If wrong type for `ref`
+ with pytest.raises(
+ TypeError, match=re.escape("Type of ref not understood, must be path to file (str), Raster.")
+ ):
+ _ = r.reproject(ref=3)
+
+ # If input reference is string and file and does not exist
+ with pytest.raises(ValueError, match=re.escape("Reference raster does not exist.")):
+ _ = r.reproject(ref="no_file.tif")
+
+ # -- Check warning for area_or_point works -- #
+ r.set_area_or_point("Area", shift_area_or_point=False)
+ r2 = r.copy()
+ r2.set_area_or_point("Point", shift_area_or_point=False)
+
+ with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'):
+ r.reproject(r2)
+
+ # Check that reprojecting preserves interpretation
+ r_reproj = r.reproject(res=r.res[0] * 2)
+ assert r_reproj.area_or_point == "Area"
+ r2_reproj = r2.reproject(res=r2.res[0] * 2)
+ assert r2_reproj.area_or_point == "Point"
+
+
+class TestMaskGeotransformations:
+ # Paths to example data
+ landsat_b4_path = examples.get_path("everest_landsat_b4")
+ landsat_rgb_path = examples.get_path("everest_landsat_rgb")
+ everest_outlines_path = examples.get_path("everest_rgi_outlines")
+ aster_dem_path = examples.get_path("exploradores_aster_dem")
+
+ # Mask without nodata
+ mask_landsat_b4 = gu.Raster(landsat_b4_path) > 125
+ # Mask with nodata
+ mask_aster_dem = gu.Raster(aster_dem_path) > 2000
+ # Mask from an outline
+ mask_everest = gu.Vector(everest_outlines_path).create_mask(gu.Raster(landsat_b4_path))
+
+ @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
+ def test_crop(self, mask: gu.Mask) -> None:
+ # Test with same bounds -> should be the same #
+
+ mask_orig = mask.copy()
+ crop_geom = mask.bounds
+ mask_cropped = mask.crop(crop_geom)
+ assert mask_cropped.raster_equal(mask)
+
+ # Check if instance is respected
+ assert isinstance(mask_cropped, gu.Mask)
+ # Check the dtype of the original mask was properly reconverted
+ assert mask.data.dtype == bool
+ # Check the original mask was not modified during cropping
+ assert mask_orig.raster_equal(mask)
+
+ # Check inplace behaviour works
+ mask_tmp = mask.copy()
+ mask_tmp.crop(crop_geom, inplace=True)
+ assert mask_tmp.raster_equal(mask_cropped)
+
+ # - Test cropping each side by a random integer of pixels - #
+ rng = np.random.default_rng(42)
+ rand_int = rng.integers(1, min(mask.shape) - 1)
+
+ # Left
+ crop_geom2 = [crop_geom[0] + rand_int * mask.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
+ mask_cropped = mask.crop(crop_geom2)
+ assert list(mask_cropped.bounds) == crop_geom2
+ assert np.array_equal(mask.data[:, rand_int:].data, mask_cropped.data.data, equal_nan=True)
+ assert np.array_equal(mask.data[:, rand_int:].mask, mask_cropped.data.mask)
+
+ # Right
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * mask.res[0], crop_geom[3]]
+ mask_cropped = mask.crop(crop_geom2)
+ assert list(mask_cropped.bounds) == crop_geom2
+ assert np.array_equal(mask.data[:, :-rand_int].data, mask_cropped.data.data, equal_nan=True)
+ assert np.array_equal(mask.data[:, :-rand_int].mask, mask_cropped.data.mask)
+
+ # Bottom
+ crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(mask.res[1]), crop_geom[2], crop_geom[3]]
+ mask_cropped = mask.crop(crop_geom2)
+ assert list(mask_cropped.bounds) == crop_geom2
+ assert np.array_equal(mask.data[:-rand_int, :].data, mask_cropped.data.data, equal_nan=True)
+ assert np.array_equal(mask.data[:-rand_int, :].mask, mask_cropped.data.mask)
+
+ # Top
+ crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(mask.res[1])]
+ mask_cropped = mask.crop(crop_geom2)
+ assert list(mask_cropped.bounds) == crop_geom2
+ assert np.array_equal(mask.data[rand_int:, :].data, mask_cropped.data, equal_nan=True)
+ assert np.array_equal(mask.data[rand_int:, :].mask, mask_cropped.data.mask)
+
+ # Test inplace
+ mask_orig = mask.copy()
+ mask_orig.crop(crop_geom2, inplace=True)
+ assert list(mask_orig.bounds) == crop_geom2
+ assert np.array_equal(mask.data[rand_int:, :].data, mask_orig.data, equal_nan=True)
+ assert np.array_equal(mask.data[rand_int:, :].mask, mask_orig.data.mask)
+
+ # Run with match_extent, check that inplace or not yields the same result
+
+ # TODO: Pretty sketchy with the current functioning of "match_extent",
+ # should we just remove it from Raster.crop() ?
+
+ # mask_cropped = mask.crop(crop_geom2, inplace=False, mode="match_extent")
+ # mask_orig.crop(crop_geom2, mode="match_extent")
+ # assert mask_cropped.raster_equal(mask_orig)
+
+ @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
+ def test_reproject(self, mask: gu.Mask) -> None:
+ # Test 1: with a classic resampling (bilinear)
+
+ # Reproject mask - resample to 100 x 100 grid
+ mask_orig = mask.copy()
+ mask_reproj = mask.reproject(grid_size=(100, 100), force_source_nodata=2)
+
+ # Check instance is respected
+ assert isinstance(mask_reproj, gu.Mask)
+ # Check the dtype of the original mask was properly reconverted
+ assert mask.data.dtype == bool
+ # Check the original mask was not modified during reprojection
+ assert mask_orig.raster_equal(mask)
+
+ # Check inplace behaviour works
+ mask_tmp = mask.copy()
+ mask_tmp.reproject(grid_size=(100, 100), force_source_nodata=2, inplace=True)
+ assert mask_tmp.raster_equal(mask_reproj)
+
+ # This should be equivalent to converting the array to uint8, reprojecting, converting back
+ mask_uint8 = mask.astype("uint8")
+ mask_uint8_reproj = mask_uint8.reproject(grid_size=(100, 100), force_source_nodata=2)
+ mask_uint8_reproj.data = mask_uint8_reproj.data.astype("bool")
+
+ assert mask_reproj.raster_equal(mask_uint8_reproj)
+
+ # Test 2: should raise a warning when the resampling differs from nearest
+
+ with pytest.warns(
+ UserWarning,
+ match="Reprojecting a mask with a resampling method other than 'nearest', "
+ "the boolean array will be converted to float during interpolation.",
+ ):
+ mask.reproject(res=50, resampling="bilinear", force_source_nodata=2)
diff --git a/tests/test_raster/test_multiraster.py b/tests/test_raster/test_multiraster.py
index f93d2741..86cd7803 100644
--- a/tests/test_raster/test_multiraster.py
+++ b/tests/test_raster/test_multiraster.py
@@ -1,6 +1,7 @@
"""
Test tools involving multiple rasters.
"""
+
from __future__ import annotations
import warnings
@@ -203,7 +204,7 @@ def test_stack_rasters(self, rasters) -> None: # type: ignore
assert rasters.img.width == pytest.approx(stacked_img.width, abs=1)
else:
assert rasters.img.shape == stacked_img.shape
- assert type(stacked_img) == gu.Raster # Check output object is always Raster, whatever input was given
+ assert isinstance(stacked_img, gu.Raster) # Check output object is always Raster, whatever input was given
assert np.count_nonzero(np.isnan(stacked_img.data)) == 0 # Check no NaNs introduced
merged_bounds = gu.projtools.merge_bounds(
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index ed16c0ae..404e4745 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1,6 +1,7 @@
"""
Test functions for raster
"""
+
from __future__ import annotations
import os
@@ -22,51 +23,11 @@
import geoutils as gu
from geoutils import examples
from geoutils._typing import MArrayNum, NDArrayNum
-from geoutils.misc import resampling_method_from_str
from geoutils.raster.raster import _default_nodata, _default_rio_attrs
DO_PLOT = False
-def run_gdal_proximity(
- input_raster: gu.Raster, target_values: list[float] | None, distunits: str = "GEO"
-) -> NDArrayNum:
- """Run GDAL's ComputeProximity and return the read numpy array."""
- # Rasterio strongly recommends against importing gdal along rio, so this is done here instead.
- from osgeo import gdal, gdalconst
-
- gdal.UseExceptions()
-
- # Initiate empty GDAL raster for proximity output
- drv = gdal.GetDriverByName("MEM")
- proxy_ds = drv.Create("", input_raster.shape[1], input_raster.shape[0], 1, gdal.GetDataTypeByName("Float32"))
- proxy_ds.GetRasterBand(1).SetNoDataValue(-9999)
-
- # Save input in temporary file to read with GDAL
- # (avoids the nightmare of setting nodata, transform, crs in GDAL format...)
- with tempfile.TemporaryDirectory() as temp_dir:
- temp_path = os.path.join(temp_dir, "input.tif")
- input_raster.save(temp_path)
- ds_raster_in = gdal.Open(temp_path, gdalconst.GA_ReadOnly)
-
- # Define GDAL options
- proximity_options = ["DISTUNITS=" + distunits]
- if target_values is not None:
- proximity_options.insert(0, "VALUES=" + ",".join([str(tgt) for tgt in target_values]))
-
- # Compute proximity
- gdal.ComputeProximity(ds_raster_in.GetRasterBand(1), proxy_ds.GetRasterBand(1), proximity_options)
- # Save array
- proxy_array = proxy_ds.GetRasterBand(1).ReadAsArray().astype("float32")
- proxy_array[proxy_array == -9999] = np.nan
-
- # Close GDAL datasets
- proxy_ds = None
- ds_raster_in = None
-
- return proxy_array
-
-
class TestRaster:
landsat_b4_path = examples.get_path("everest_landsat_b4")
landsat_b4_crop_path = examples.get_path("everest_landsat_b4_cropped")
@@ -1048,7 +1009,7 @@ def test_copy(self, example: str) -> None:
# When passing the new array as a NaN ndarray, only the valid data is equal, because masked data is NaN in one
# case, and -9999 in the other
- r_arr = gu.raster.get_array_and_mask(r)[0]
+ r_arr = gu.raster.array._get_array_and_mask(r)[0]
r2 = r.copy(new_array=r_arr)
assert np.ma.allequal(r.data, r2.data)
# If a nodata value exists, and we update the NaN pixels to be that nodata value, then the two Rasters should
@@ -1251,610 +1212,6 @@ def test_getitem_setitem(self, example: str) -> None:
with pytest.raises(ValueError, match=re.escape(message_raster.format(op_name_assign))):
rst[mask] = 1
- test_data = [[landsat_b4_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]]
-
- @pytest.mark.parametrize("data", test_data) # type: ignore
- def test_crop(self, data: list[str]) -> None:
- """Test for crop method, also called by square brackets through __getitem__"""
-
- raster_path, outlines_path = data
- r = gu.Raster(raster_path)
-
- # -- Test with crop_geom being a list/tuple -- ##
- crop_geom: list[float] = list(r.bounds)
-
- # Test unloaded inplace cropping conserves the shape
- r.crop(crop_geom=[crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]], inplace=True)
- assert len(r.data.shape) == 2
-
- r = gu.Raster(raster_path)
-
- # Test with same bounds -> should be the same #
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert r_cropped.raster_equal(r)
-
- # - Test cropping each side by a random integer of pixels - #
- rng = np.random.default_rng(42)
- rand_int = rng.integers(1, min(r.shape) - 1)
-
- # Left
- crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert list(r_cropped.bounds) == crop_geom2
- assert np.array_equal(r.data[:, rand_int:].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[:, rand_int:].mask, r_cropped.data.mask)
-
- # Right
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * r.res[0], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert list(r_cropped.bounds) == crop_geom2
- assert np.array_equal(r.data[:, :-rand_int].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[:, :-rand_int].mask, r_cropped.data.mask)
-
- # Bottom
- crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(r.res[1]), crop_geom[2], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert list(r_cropped.bounds) == crop_geom2
- assert np.array_equal(r.data[:-rand_int, :].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[:-rand_int, :].mask, r_cropped.data.mask)
-
- # Top
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(r.res[1])]
- r_cropped = r.crop(crop_geom2)
- assert list(r_cropped.bounds) == crop_geom2
- assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data, equal_nan=True)
- assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask)
-
- # Same but tuple
- crop_geom3: tuple[float, float, float, float] = (
- crop_geom[0],
- crop_geom[1],
- crop_geom[2],
- crop_geom[3] - rand_int * r.res[0],
- )
- r_cropped = r.crop(crop_geom3)
- assert list(r_cropped.bounds) == list(crop_geom3)
- assert np.array_equal(r.data[rand_int:, :].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[rand_int:, :].mask, r_cropped.data.mask)
-
- # -- Test with crop_geom being a Raster -- #
- r_cropped2 = r.crop(r_cropped)
- assert r_cropped2.raster_equal(r_cropped)
-
- # Check that bound reprojection is done automatically if the CRS differ
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*")
-
- r_cropped_reproj = r_cropped.reproject(crs=3857)
- r_cropped3 = r.crop(r_cropped_reproj)
-
- # Original CRS bounds can be deformed during transformation, but result should be equivalent to this
- r_cropped4 = r.crop(crop_geom=r_cropped_reproj.get_bounds_projected(out_crs=r.crs))
- assert r_cropped3.raster_equal(r_cropped4)
-
- # -- Test with inplace=True -- #
- r_copy = r.copy()
- r_copy.crop(r_cropped, inplace=True)
- assert r_copy.raster_equal(r_cropped)
-
- # - Test cropping each side with a non integer pixel, mode='match_pixel' - #
- rand_float = rng.integers(1, min(r.shape) - 1) + 0.25
-
- # left
- crop_geom2 = [crop_geom[0] + rand_float * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float)
- assert np.array_equal(r.data[:, int(rand_float) :].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[:, int(rand_float) :].mask, r_cropped.data.mask)
-
- # right
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_float * r.res[0], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert r.shape[1] - (r_cropped.bounds.right - r_cropped.bounds.left) / r.res[0] == int(rand_float)
- assert np.array_equal(r.data[:, : -int(rand_float)].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[:, : -int(rand_float)].mask, r_cropped.data.mask)
-
- # bottom
- crop_geom2 = [crop_geom[0], crop_geom[1] + rand_float * abs(r.res[1]), crop_geom[2], crop_geom[3]]
- r_cropped = r.crop(crop_geom2)
- assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float)
- assert np.array_equal(r.data[: -int(rand_float), :].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[: -int(rand_float), :].mask, r_cropped.data.mask)
-
- # top
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_float * abs(r.res[1])]
- r_cropped = r.crop(crop_geom2)
- assert r.shape[0] - (r_cropped.bounds.top - r_cropped.bounds.bottom) / r.res[1] == int(rand_float)
- assert np.array_equal(r.data[int(rand_float) :, :].data, r_cropped.data.data, equal_nan=True)
- assert np.array_equal(r.data[int(rand_float) :, :].mask, r_cropped.data.mask)
-
- # -- Test with mode='match_extent' -- #
- # Test all sides at once, with rand_float less than half the smallest extent
- # The cropped extent should exactly match the requested extent, res will be changed accordingly
- rand_float = rng.integers(1, min(r.shape) / 2 - 1) + 0.25
- crop_geom2 = [
- crop_geom[0] + rand_float * r.res[0],
- crop_geom[1] + rand_float * abs(r.res[1]),
- crop_geom[2] - rand_float * r.res[0],
- crop_geom[3] - rand_float * abs(r.res[1]),
- ]
-
- # Filter warning about nodata not set in reprojection (because match_extent triggers reproject)
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*")
- r_cropped = r.crop(crop_geom2, mode="match_extent")
-
- assert list(r_cropped.bounds) == crop_geom2
- # The change in resolution should be less than what would occur with +/- 1 pixel
- assert np.all(
- abs(np.array(r.res) - np.array(r_cropped.res)) < np.array(r.res) / np.array(r_cropped.shape)[::-1]
- )
-
- # Filter warning about nodata not set in reprojection (because match_extent triggers reproject)
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*")
- r_cropped2 = r.crop(r_cropped, mode="match_extent")
- assert r_cropped2.raster_equal(r_cropped)
-
- # -- Test with crop_geom being a Vector -- #
- outlines = gu.Vector(outlines_path)
-
- # First, we reproject manually the outline
- outlines_reproj = gu.Vector(outlines.ds.to_crs(r.crs))
- r_cropped = r.crop(outlines_reproj)
-
- # Calculate intersection of the two bounding boxes and make sure crop has same bounds
- win_outlines = rio.windows.from_bounds(*outlines_reproj.bounds, transform=r.transform)
- win_raster = rio.windows.from_bounds(*r.bounds, transform=r.transform)
- final_window = win_outlines.intersection(win_raster).round_lengths().round_offsets()
- new_bounds = rio.windows.bounds(final_window, transform=r.transform)
- assert list(r_cropped.bounds) == list(new_bounds)
-
- # Second, we check that bound reprojection is done automatically if the CRS differ
- r_cropped2 = r.crop(outlines)
- assert list(r_cropped2.bounds) == list(new_bounds)
-
- # -- Test crop works as expected even if transform has been modified, e.g. through downsampling -- #
- # Test that with downsampling, cropping to same bounds result in same raster
- r = gu.Raster(raster_path, downsample=5)
- r_test = r.crop(r.bounds)
- assert r_test.raster_equal(r)
-
- # - Test that cropping yields the same results whether data is loaded or not -
- # With integer cropping (left)
- rand_int = rng.integers(1, min(r.shape) - 1)
- crop_geom2 = [crop_geom[0] + rand_int * r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
- r = gu.Raster(raster_path, downsample=5, load_data=False)
- assert not r.is_loaded
- r_crop_unloaded = r.crop(crop_geom2)
- r.load()
- r_crop_loaded = r.crop(crop_geom2)
- # TODO: the following condition should be met once issue #447 is solved
- # assert r_crop_unloaded.raster_equal(r_crop_loaded)
- assert r_crop_unloaded.shape == r_crop_loaded.shape
- assert r_crop_unloaded.transform == r_crop_loaded.transform
-
- # With a float number of pixels added to the right, mode 'match_pixel'
- rand_float = rng.integers(1, min(r.shape) - 1) + 0.25
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] + rand_float * r.res[0], crop_geom[3]]
- r = gu.Raster(raster_path, downsample=5, load_data=False)
- assert not r.is_loaded
- r_crop_unloaded = r.crop(crop_geom2, mode="match_pixel")
- r.load()
- r_crop_loaded = r.crop(crop_geom2, mode="match_pixel")
- # TODO: the following condition should be met once issue #447 is solved
- # assert r_crop_unloaded.raster_equal(r_crop_loaded)
- assert r_crop_unloaded.shape == r_crop_loaded.shape
- assert r_crop_unloaded.transform == r_crop_loaded.transform
-
- # - Check related to pixel interpretation -
-
- # Check warning for a different area_or_point for the match-reference geometry works
- r.set_area_or_point("Area", shift_area_or_point=False)
- r2 = r.copy()
- r2.set_area_or_point("Point", shift_area_or_point=False)
-
- with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'):
- r.crop(r2)
-
- # Check that cropping preserves the interpretation
- crop_geom = [crop_geom[0] + r.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
- r_crop = r.crop(crop_geom)
- assert r_crop.area_or_point == "Area"
- r2_crop = r2.crop(crop_geom)
- assert r2_crop.area_or_point == "Point"
-
- @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path, landsat_rgb_path]) # type: ignore
- def test_translate(self, example: str) -> None:
- """Test translation works as intended"""
-
- r = gu.Raster(example)
-
- # Get original transform
- orig_transform = r.transform
- orig_bounds = r.bounds
-
- # Shift raster by georeferenced units (default)
- # Check the default behaviour is not inplace
- r_notinplace = r.translate(xoff=1, yoff=1)
- assert isinstance(r_notinplace, gu.Raster)
-
- # Check inplace
- r.translate(xoff=1, yoff=1, inplace=True)
- # Both shifts should have yielded the same transform
- assert r.transform == r_notinplace.transform
-
- # Only bounds should change
- assert orig_transform.c + 1 == r.transform.c
- assert orig_transform.f + 1 == r.transform.f
- for attr in ["a", "b", "d", "e"]:
- assert getattr(orig_transform, attr) == getattr(r.transform, attr)
-
- assert orig_bounds.left + 1 == r.bounds.left
- assert orig_bounds.right + 1 == r.bounds.right
- assert orig_bounds.bottom + 1 == r.bounds.bottom
- assert orig_bounds.top + 1 == r.bounds.top
-
- # Shift raster using pixel units
- orig_transform = r.transform
- orig_bounds = r.bounds
- orig_res = r.res
- r.translate(xoff=1, yoff=1, distance_unit="pixel", inplace=True)
-
- # Only bounds should change
- assert orig_transform.c + 1 * orig_res[0] == r.transform.c
- assert orig_transform.f + 1 * orig_res[1] == r.transform.f
- for attr in ["a", "b", "d", "e"]:
- assert getattr(orig_transform, attr) == getattr(r.transform, attr)
-
- assert orig_bounds.left + 1 * orig_res[0] == r.bounds.left
- assert orig_bounds.right + 1 * orig_res[0] == r.bounds.right
- assert orig_bounds.bottom + 1 * orig_res[1] == r.bounds.bottom
- assert orig_bounds.top + 1 * orig_res[1] == r.bounds.top
-
- # Check that an error is raised for a wrong distance_unit
- with pytest.raises(ValueError, match="Argument 'distance_unit' should be either 'pixel' or 'georeferenced'."):
- r.translate(xoff=1, yoff=1, distance_unit="wrong_value") # type: ignore
-
- @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore
- def test_reproject(self, example: str) -> None:
- warnings.simplefilter("error")
-
- # Reference raster to be used
- r = gu.Raster(example)
-
- # -- Check proper errors are raised if nodata are not set -- #
- r_nodata = r.copy()
- r_nodata.set_nodata(None)
-
- # Make sure at least one pixel is masked for test 1
- rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True)
- r_nodata.data[rand_indices] = np.ma.masked
- assert np.count_nonzero(r_nodata.data.mask) > 0
-
- # make sure at least one pixel is set at default nodata for test
- default_nodata = _default_nodata(r_nodata.dtype)
- rand_indices = gu.raster.subsample_array(r_nodata.data, 10, return_indices=True)
- r_nodata.data[rand_indices] = default_nodata
- assert np.count_nonzero(r_nodata.data == default_nodata) > 0
-
- # 1 - if no force_source_nodata is set and masked values exist, raises an error
- with pytest.raises(
- ValueError,
- match=re.escape(
- "No nodata set, set one for the raster with self.set_nodata() or use a "
- "temporary one with `force_source_nodata`."
- ),
- ):
- _ = r_nodata.reproject(res=r_nodata.res[0] / 2, nodata=0)
-
- # 2 - if no nodata is set and default value conflicts with existing value, a warning is raised
- with pytest.warns(
- UserWarning,
- match=re.escape(
- f"For reprojection, nodata must be set. Default chosen value "
- f"{_default_nodata(r_nodata.dtype)} exists in self.data. This may have unexpected "
- f"consequences. Consider setting a different nodata with self.set_nodata()."
- ),
- ):
- r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata)
- assert r_test.nodata == default_nodata
-
- # 3 - if default nodata does not conflict, should not raise a warning
- r_nodata.data[r_nodata.data == default_nodata] = 3
- r_test = r_nodata.reproject(res=r_nodata.res[0] / 2, force_source_nodata=default_nodata)
- assert r_test.nodata == default_nodata
-
- # -- Test setting each combination of georeferences bounds, res and size -- #
-
- # specific for the landsat test case, default nodata 255 cannot be used (see above), so use 0
- if r.nodata is None:
- r.set_nodata(0)
-
- # - Test size - this should modify the shape, and hence resolution, but not the bounds -
- out_size = (r.shape[1] // 2, r.shape[0] // 2) # Outsize is (ncol, nrow)
- r_test = r.reproject(grid_size=out_size)
- assert r_test.shape == (out_size[1], out_size[0])
- assert r_test.res != r.res
- assert r_test.bounds == r.bounds
-
- # - Test bounds -
- # if bounds is a multiple of res, outptut res should be preserved
- bounds = np.copy(r.bounds)
- dst_bounds = rio.coords.BoundingBox(
- left=bounds[0], bottom=bounds[1] + r.res[0], right=bounds[2] - 2 * r.res[1], top=bounds[3]
- )
- r_test = r.reproject(bounds=dst_bounds)
- assert r_test.bounds == dst_bounds
- assert r_test.res == r.res
-
- # Create bounds with 1/2 and 1/3 pixel extra on the right/bottom.
- bounds = np.copy(r.bounds)
- dst_bounds = rio.coords.BoundingBox(
- left=bounds[0], bottom=bounds[1] - r.res[0] / 3.0, right=bounds[2] + r.res[1] / 2.0, top=bounds[3]
- )
-
- # If bounds are not a multiple of res, the latter will be updated accordingly
- r_test = r.reproject(bounds=dst_bounds)
- assert r_test.bounds == dst_bounds
- assert r_test.res != r.res
-
- # - Test size and bounds -
- r_test = r.reproject(grid_size=out_size, bounds=dst_bounds)
- assert r_test.shape == (out_size[1], out_size[0])
- assert r_test.bounds == dst_bounds
-
- # - Test res -
- # Using a single value, output res will be enforced, resolution will be different
- res_single = r.res[0] * 2
- r_test = r.reproject(res=res_single)
- assert r_test.res == (res_single, res_single)
- assert r_test.shape != r.shape
-
- # Using a tuple
- res_tuple = (r.res[0] * 0.5, r.res[1] * 4)
- r_test = r.reproject(res=res_tuple)
- assert r_test.res == res_tuple
- assert r_test.shape != r.shape
-
- # - Test res and bounds -
- # Bounds will be enforced for upper-left pixel, but adjusted by up to one pixel for the lower right bound.
- # for single res value
- r_test = r.reproject(bounds=dst_bounds, res=res_single)
- assert r_test.res == (res_single, res_single)
- assert r_test.bounds.left == dst_bounds.left
- assert r_test.bounds.top == dst_bounds.top
- assert np.abs(r_test.bounds.right - dst_bounds.right) < res_single
- assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_single
-
- # For tuple
- r_test = r.reproject(bounds=dst_bounds, res=res_tuple)
- assert r_test.res == res_tuple
- assert r_test.bounds.left == dst_bounds.left
- assert r_test.bounds.top == dst_bounds.top
- assert np.abs(r_test.bounds.right - dst_bounds.right) < res_tuple[0]
- assert np.abs(r_test.bounds.bottom - dst_bounds.bottom) < res_tuple[1]
-
- # - Test crs -
- out_crs = rio.crs.CRS.from_epsg(4326)
- r_test = r.reproject(crs=out_crs)
- assert r_test.crs.to_epsg() == 4326
-
- # -- Additional tests --
- # First, make sure dst_bounds extend beyond current extent to create nodata
- dst_bounds = rio.coords.BoundingBox(
- left=bounds[0], bottom=bounds[1] - r.res[0], right=bounds[2] + 2 * r.res[1], top=bounds[3]
- )
- r_test = r.reproject(bounds=dst_bounds)
- assert np.count_nonzero(r_test.data.mask) > 0
-
- # If nodata falls outside the original image range, check range is preserved (with nearest interpolation)
- r_float = r.astype("float32") # type: ignore
- if (r_float.nodata < np.min(r_float)) or (r_float.nodata > np.max(r_float)):
- r_test = r_float.reproject(bounds=dst_bounds, resampling="nearest")
- assert r_test.nodata == r_float.nodata
- assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0 # Some values should be set to nodata
- assert np.min(r_test.data) == np.min(r_float.data) # But min and max should not be affected
- assert np.max(r_test.data) == np.max(r_float.data)
-
- # Check that nodata works as expected
- r_test = r_float.reproject(bounds=dst_bounds, nodata=9999)
- assert r_test.nodata == 9999
- assert np.count_nonzero(r_test.data.data == r_test.nodata) > 0
-
- # Test that reproject works the same whether data is already loaded or not
- assert r.is_loaded
- r_test1 = r.reproject(crs=out_crs, nodata=0)
- r_unload = gu.Raster(example, load_data=False)
- assert not r_unload.is_loaded
- r_test2 = r_unload.reproject(crs=out_crs, nodata=0)
- assert r_test1.raster_equal(r_test2)
-
- # Test that reproject does not fail with resolution as np.integer or np.float types, single value or tuple
- astype_funcs = [int, np.int32, float, np.float64]
- for astype_func in astype_funcs:
- r.reproject(res=astype_func(20.5), nodata=0)
- for i in range(len(astype_funcs)):
- for j in range(len(astype_funcs)):
- r.reproject(res=(astype_funcs[i](20.5), astype_funcs[j](10.5)), nodata=0)
-
- # Test that reprojection works for several bands
- for n in [2, 3, 4]:
- img1 = gu.Raster.from_array(
- np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(0, 500, 1, 1), crs=4326
- )
-
- img2 = gu.Raster.from_array(
- np.ones((n, 500, 500), dtype="uint8"), transform=rio.transform.from_origin(50, 500, 1, 1), crs=4326
- )
-
- out_img = img2.reproject(img1)
- assert np.shape(out_img.data) == (n, 500, 500)
- assert (out_img.count, *out_img.shape) == (n, 500, 500)
-
- # Test that the rounding of resolution is correct for large decimal numbers
- # (we take an example that used to fail, see issue #354 and #357)
- data = np.ones((4759, 2453))
- transform = rio.transform.Affine(
- 24.12423878332849, 0.0, 238286.29553975424, 0.0, -24.12423878332849, 6995453.456051373
- )
- crs = rio.CRS.from_epsg(32633)
- nodata = -9999.0
- rst = gu.Raster.from_array(data=data, transform=transform, crs=crs, nodata=nodata)
-
- rst_reproj = rst.reproject(bounds=rst.bounds, res=(20.0, 20.0))
- # This used to be 19.999999999999999 due to floating point precision
- assert rst_reproj.res == (20.0, 20.0)
-
- # -- Test match reference functionalities --
-
- # - Create 2 artificial rasters -
- # for r2b, bounds are cropped to the upper left by an integer number of pixels (i.e. crop)
- # for r2, resolution is also set to 2/3 the input res
- min_size = min(r.shape)
- rng = np.random.default_rng(42)
- rand_int = rng.integers(min_size / 10, min(r.shape) - min_size / 10)
- new_transform = rio.transform.from_origin(
- r.bounds.left + rand_int * r.res[0], r.bounds.top - rand_int * abs(r.res[1]), r.res[0], r.res[1]
- )
-
- # data is cropped to the same extent
- new_data = r.data[rand_int::, rand_int::]
- r2b = gu.Raster.from_array(data=new_data, transform=new_transform, crs=r.crs, nodata=r.nodata)
-
- # Create a raster with different resolution
- dst_res = r.res[0] * 2 / 3
- r2 = r2b.reproject(res=dst_res)
- assert r2.res == (dst_res, dst_res)
-
- # Assert the initial rasters are different
- assert r.bounds != r2b.bounds
- assert r.shape != r2b.shape
- assert r.bounds != r2.bounds
- assert r.shape != r2.shape
- assert r.res != r2.res
-
- # Test reprojecting with ref=r2b (i.e. crop) -> output should have same shape, bounds and data, i.e. be the
- # same object
- r3 = r.reproject(r2b)
- assert r3.bounds == r2b.bounds
- assert r3.shape == r2b.shape
- assert r3.bounds == r2b.bounds
- assert r3.transform == r2b.transform
- assert np.array_equal(r3.data.data, r2b.data.data, equal_nan=True)
- assert np.array_equal(r3.data.mask, r2b.data.mask)
-
- if DO_PLOT:
- fig1, ax1 = plt.subplots()
- r.plot(ax=ax1, title="Raster 1")
-
- fig2, ax2 = plt.subplots()
- r2b.plot(ax=ax2, title="Raster 2")
-
- fig3, ax3 = plt.subplots()
- r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2")
-
- plt.show()
-
- # Test reprojecting with ref=r2 -> output should have same shape, bounds and transform
- # Data should be slightly different due to difference in input resolution
- r3 = r.reproject(r2)
- assert r3.bounds == r2.bounds
- assert r3.shape == r2.shape
- assert r3.bounds == r2.bounds
- assert r3.transform == r2.transform
- assert not np.array_equal(r3.data.data, r2.data.data, equal_nan=True)
-
- if DO_PLOT:
- fig1, ax1 = plt.subplots()
- r.plot(ax=ax1, title="Raster 1")
-
- fig2, ax2 = plt.subplots()
- r2.plot(ax=ax2, title="Raster 2")
-
- fig3, ax3 = plt.subplots()
- r3.plot(ax=ax3, title="Raster 1 reprojected to Raster 2")
-
- plt.show()
-
- # -- Check that if mask is modified afterwards, it is taken into account during reproject -- #
- # Create a raster with (additional) random gaps
- r_gaps = r.copy()
- nsamples = 200
- rand_indices = gu.raster.subsample_array(r_gaps.data, nsamples, return_indices=True)
- r_gaps.data[rand_indices] = np.ma.masked
- assert np.sum(r_gaps.data.mask) - np.sum(r.data.mask) == nsamples # sanity check
-
- # reproject raster, and reproject mask. Check that both have same number of masked pixels
- # TODO: should test other resampling algo
- r_gaps_reproj = r_gaps.reproject(res=dst_res, resampling="nearest")
- mask = gu.Raster.from_array(
- r_gaps.data.mask.astype("uint8"), crs=r_gaps.crs, transform=r_gaps.transform, nodata=None
- )
- mask_reproj = mask.reproject(res=dst_res, nodata=255, resampling="nearest")
- # Final masked pixels are those originally masked (=1) and the values masked during reproject, e.g. edges
- tot_masked_true = np.count_nonzero(mask_reproj.data.mask) + np.count_nonzero(mask_reproj.data == 1)
- assert np.count_nonzero(r_gaps_reproj.data.mask) == tot_masked_true
-
- # If a nodata is set, make sure it is preserved
- r_nodata = r.copy()
-
- r_nodata.set_nodata(0)
-
- r3 = r_nodata.reproject(r2)
- assert r_nodata.nodata == r3.nodata
-
- # -- Check inplace behaviour works -- #
-
- # Check when transform is updated (via res)
- r_tmp_res = r.copy()
- r_res = r_tmp_res.reproject(res=r.res[0] / 2)
- r_tmp_res.reproject(res=r.res[0] / 2, inplace=True)
-
- assert r_res.raster_equal(r_tmp_res)
-
- # Check when CRS is updated
- r_tmp_crs = r.copy()
- r_crs = r_tmp_crs.reproject(crs=out_crs)
- r_tmp_crs.reproject(crs=out_crs, inplace=True)
-
- assert r_crs.raster_equal(r_tmp_crs)
-
- # -- Test additional errors raised for argument combinations -- #
-
- # If both ref and crs are set
- with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")):
- _ = r.reproject(ref=r2, crs=r.crs)
-
- # Size and res are mutually exclusive
- with pytest.raises(ValueError, match=re.escape("size and res both specified. Specify only one.")):
- _ = r.reproject(grid_size=(10, 10), res=50)
-
- # If wrong type for `ref`
- with pytest.raises(
- TypeError, match=re.escape("Type of ref not understood, must be path to file (str), Raster.")
- ):
- _ = r.reproject(ref=3)
-
- # If input reference is string and file and does not exist
- with pytest.raises(ValueError, match=re.escape("Reference raster does not exist.")):
- _ = r.reproject(ref="no_file.tif")
-
- # -- Check warning for area_or_point works -- #
- r.set_area_or_point("Area", shift_area_or_point=False)
- r2 = r.copy()
- r2.set_area_or_point("Point", shift_area_or_point=False)
-
- with pytest.warns(UserWarning, match='One raster has a pixel interpretation "Area" and the other "Point".*'):
- r.reproject(r2)
-
- # Check that reprojecting preserves interpretation
- r_reproj = r.reproject(res=r.res[0] * 2)
- assert r_reproj.area_or_point == "Area"
- r2_reproj = r2.reproject(res=r2.res[0] * 2)
- assert r2_reproj.area_or_point == "Point"
-
@pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore
def test_intersection(self, example: list[str]) -> None:
"""Check the behaviour of the intersection function"""
@@ -2302,7 +1659,7 @@ def test_astype(self, example: str) -> None:
# The multi-band example will not have a colorbar, so not used in tests
@pytest.mark.parametrize("example", [landsat_b4_path, landsat_b4_crop_path, aster_dem_path]) # type: ignore
@pytest.mark.parametrize("figsize", np.arange(2, 20, 2)) # type: ignore
- def test_show_cbar(self, example, figsize) -> None:
+ def test_plot_cbar(self, example, figsize) -> None:
"""
Test cbar matches plot height.
"""
@@ -2330,7 +1687,7 @@ def test_show_cbar(self, example, figsize) -> None:
# Assert height is the same
assert h == pytest.approx(h_cbar)
- def test_show(self) -> None:
+ def test_plot(self) -> None:
# Read single band raster and RGB raster
img = gu.Raster(self.landsat_b4_path)
img_RGB = gu.Raster(self.landsat_rgb_path)
@@ -2587,408 +1944,6 @@ def test_split_bands(self) -> None:
red_c.data.data.squeeze().astype("float32"), img.data.data[0, :, :].astype("float32"), equal_nan=True
)
- def test_resampling_str(self) -> None:
- """Test that resampling methods can be given as strings instead of rio enums."""
- warnings.simplefilter("error")
- assert resampling_method_from_str("nearest") == rio.enums.Resampling.nearest # noqa
- assert resampling_method_from_str("cubic_spline") == rio.enums.Resampling.cubic_spline # noqa
-
- # Check that odd strings return the appropriate error.
- try:
- resampling_method_from_str("CUBIC_SPLINE") # noqa
- except ValueError as exception:
- if "not a valid rasterio.enums.Resampling method" not in str(exception):
- raise exception
-
- img1 = gu.Raster(self.landsat_b4_path)
- img2 = gu.Raster(self.landsat_b4_crop_path)
- # Set img2 pixel interpretation as "Point" to match "img1" and avoid any warnings
- img2.set_area_or_point("Point", shift_area_or_point=False)
- img1.set_nodata(0)
- img2.set_nodata(0)
-
- # Resample the rasters using a new resampling method and see that the string and enum gives the same result.
- img3a = img1.reproject(img2, resampling="q1")
- img3b = img1.reproject(img2, resampling=rio.enums.Resampling.q1)
- assert img3a.raster_equal(img3b)
-
- @pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path]) # type: ignore
- def test_polygonize(self, example: str) -> None:
- """Test that polygonize doesn't raise errors."""
-
- img = gu.Raster(example)
-
- # -- Test 1: basic functioning of polygonize --
-
- # Get unique value for image and the corresponding area
- value = np.unique(img)[0]
- pixel_area = np.count_nonzero(img.data == value) * img.res[0] * img.res[1]
-
- # Polygonize the raster for this value, and compute the total area
- polygonized = img.polygonize(target_values=value)
- polygon_area = polygonized.ds.area.sum()
-
- # Check that these two areas are approximately equal
- assert polygon_area == pytest.approx(pixel_area)
- assert isinstance(polygonized, gu.Vector)
- assert polygonized.crs == img.crs
-
- # Check default name of data column, and that defining a custom name works the same
- assert "id" in polygonized.ds.columns
- polygonized2 = img.polygonize(target_values=value, data_column_name="myname")
- assert "myname" in polygonized2.ds.columns
- assert np.array_equal(polygonized2.ds["myname"].values, polygonized.ds["id"].values)
-
- # -- Test 2: data types --
-
- # Check that polygonize works as expected for any input dtype (e.g. float64 being not supported by GeoPandas)
- for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64"]:
- img_dtype = img.copy()
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore", category=UserWarning, message="dtype conversion will result in a " "loss of information.*"
- )
- warnings.filterwarnings(
- "ignore",
- category=UserWarning,
- message="Unmasked values equal to the nodata value found in data array.*",
- )
- img_dtype = img_dtype.astype(dtype)
- value = np.unique(img_dtype)[0]
- img_dtype.polygonize(target_values=value)
-
- # And for a boolean object, such as a mask
- mask = img > value
- mask.polygonize(target_values=1)
-
- # Test all options, with both an artificial Raster (that has all target values) and a real Raster
- @pytest.mark.parametrize("distunits", ["GEO", "PIXEL"]) # type: ignore
- # 0 and 1,2,3 are especially useful for the artificial Raster, and 112 for the real Raster
- @pytest.mark.parametrize("target_values", [[1, 2, 3], [0], [112], None]) # type: ignore
- @pytest.mark.parametrize(
- "raster",
- [
- gu.Raster(landsat_b4_path),
- gu.Raster.from_array(
- np.arange(25, dtype="int32").reshape(5, 5), transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326
- ),
- ],
- ) # type: ignore
- def test_proximity_against_gdal(self, distunits: str, target_values: list[float] | None, raster: gu.Raster) -> None:
- """Test that proximity matches the results of GDAL for any parameter."""
-
- # TODO: When adding new rasters for tests, specify warning only for Landsat
- warnings.filterwarnings("ignore", message="Setting default nodata -99999 to mask non-finite values *")
-
- # We generate proximity with GDAL and GeoUtils
- gdal_proximity = run_gdal_proximity(raster, target_values=target_values, distunits=distunits)
- # We translate distunits GDAL option into its GeoUtils equivalent
- if distunits == "GEO":
- distance_unit = "georeferenced"
- else:
- distance_unit = "pixel"
- geoutils_proximity = (
- raster.proximity(distance_unit=distance_unit, target_values=target_values)
- .data.data.squeeze()
- .astype("float32")
- )
-
- # The results should be the same in all cases
- try:
- # In some cases, the proximity differs slightly (generally <1%) for complex settings
- # (Landsat Raster with target of 112)
- # It looks like GDAL might not have the right value,
- # so this particular case is treated differently in tests
- if target_values is not None and target_values[0] == 112 and raster.filename is not None:
- # Get index and number of not almost equal point (tolerance of 10-4)
- ind_not_almost_equal = np.abs(gdal_proximity - geoutils_proximity) > 1e-04
- nb_not_almost_equal = np.count_nonzero(ind_not_almost_equal)
- # Check that this is a minority of points (less than 0.5%)
- assert nb_not_almost_equal < 0.005 * raster.width * raster.height
-
- # Replace these exceptions by zero in both
- gdal_proximity[ind_not_almost_equal] = 0.0
- geoutils_proximity[ind_not_almost_equal] = 0.0
- # Check that all the rest is almost equal
- assert np.allclose(gdal_proximity, geoutils_proximity, atol=1e-04, equal_nan=True)
-
- # Otherwise, results are exactly equal
- else:
- assert np.array_equal(gdal_proximity, geoutils_proximity, equal_nan=True)
-
- # For debugging
- except Exception as exception:
- import matplotlib.pyplot as plt
-
- # Plotting the xdem and GDAL attributes for comparison (plotting "diff" can also help debug)
- plt.subplot(121)
- plt.imshow(gdal_proximity)
- # plt.imshow(np.abs(gdal_proximity - geoutils_proximity)>0.1)
- plt.colorbar()
- plt.subplot(122)
- plt.imshow(geoutils_proximity)
- # plt.imshow(raster.data.data == 112)
- plt.colorbar()
- plt.show()
-
- # ind_not_equal = np.abs(gdal_proximity - geoutils_proximity)>0.1
- # print(gdal_proximity[ind_not_equal])
- # print(geoutils_proximity[ind_not_equal])
-
- raise exception
-
- def test_proximity_parameters(self) -> None:
- """
- Test that new (different to GDAL's) proximity parameters run.
- No need to test the results specifically, as those rely entirely on the previous test with GDAL,
- and tests in rasterize and shapely.
- #TODO: Maybe add one test with an artificial vector to check it works as intended
- """
-
- # -- Test 1: with self's Raster alone --
- raster1 = gu.Raster(self.landsat_b4_path)
- prox1 = raster1.proximity()
-
- # The raster should have the same extent, resolution and CRS
- assert raster1.georeferenced_grid_equal(prox1)
-
- # It should change with target values specified
- prox2 = raster1.proximity(target_values=[255])
- assert not np.array_equal(prox1.data, prox2.data)
-
- # -- Test 2: with a vector provided --
- vector = gu.Vector(self.everest_outlines_path)
-
- # With default options (boundary geometry)
- raster1.proximity(vector=vector)
-
- # With the base geometry
- raster1.proximity(vector=vector, geometry_type="geometry")
-
- # With another geometry option
- raster1.proximity(vector=vector, geometry_type="centroid")
-
- # With only inside proximity
- raster1.proximity(vector=vector, in_or_out="in")
-
- def test_to_pointcloud(self) -> None:
- """Test to_pointcloud method."""
-
- # 1/ Single band synthetic data
-
- # Create a small raster to test point sampling on
- img_arr = np.arange(25, dtype="int32").reshape(5, 5)
- img0 = gu.Raster.from_array(img_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326)
-
- # Sample the whole raster (fraction==1)
- points = img0.to_pointcloud()
- points_arr = img0.to_pointcloud(as_array=True)
-
- # Check output types
- assert isinstance(points, gu.Vector)
- assert isinstance(points_arr, np.ndarray)
-
- # Check that both outputs (array or vector) are fully consistent, order matters here
- assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0])
- assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1])
- assert np.array_equal(points.ds["b1"].values, points_arr[:, 2])
-
- # Validate that 25 points were sampled (equating to img1.height * img1.width) with x, y, and band0 values.
- assert points_arr.shape == (25, 3)
- assert points.ds.shape == (25, 2) # One less column here due to geometry storing X and Y
- # Check that X, Y and Z arrays are equal to raster array input independently of value order
- x_coords, y_coords = img0.ij2xy(i=np.arange(0, 5), j=np.arange(0, 5))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5)))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5)))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_arr.ravel()))
-
- # Check that subsampling works properly
- points_arr = img0.to_pointcloud(subsample=0.2, as_array=True)
- assert points_arr.shape == (5, 3)
-
- # All values should be between 0 and 25
- assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25)
-
- # 2/ Multi-band synthetic data
- img_arr = np.arange(25, dtype="int32").reshape(5, 5)
- img_3d_arr = np.stack((img_arr, 25 + img_arr, 50 + img_arr), axis=0)
- img3d = gu.Raster.from_array(img_3d_arr, transform=rio.transform.from_origin(0, 5, 1, 1), crs=4326)
-
- # Sample the whole raster (fraction==1)
- points = img3d.to_pointcloud(auxiliary_data_bands=[2, 3])
- points_arr = img3d.to_pointcloud(as_array=True, auxiliary_data_bands=[2, 3])
-
- # Check equality between both output types
- assert np.array_equal(points.ds.geometry.x.values, points_arr[:, 0])
- assert np.array_equal(points.ds.geometry.y.values, points_arr[:, 1])
- assert np.array_equal(points.ds["b1"].values, points_arr[:, 2])
- assert np.array_equal(points.ds["b2"].values, points_arr[:, 3])
- assert np.array_equal(points.ds["b3"].values, points_arr[:, 4])
-
- # Check it is the right data
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 0])), np.sort(np.tile(x_coords, 5)))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 1])), np.sort(np.tile(y_coords, 5)))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 2])), np.sort(img_3d_arr[0, :, :].ravel()))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 3])), np.sort(img_3d_arr[1, :, :].ravel()))
- assert np.array_equal(np.sort(np.asarray(points_arr[:, 4])), np.sort(img_3d_arr[2, :, :].ravel()))
-
- # With a subsample
- points_arr = img3d.to_pointcloud(as_array=True, subsample=10, auxiliary_data_bands=[2, 3])
- assert points_arr.shape == (10, 5)
-
- # Check the values are still good
- assert all(0 <= points_arr[:, 2]) and all(points_arr[:, 2] < 25)
- assert all(25 <= points_arr[:, 3]) and all(points_arr[:, 3] < 50)
- assert all(50 <= points_arr[:, 4]) and all(points_arr[:, 4] < 75)
-
- # 3/ Single-band real raster with nodata values
- img1 = gu.Raster(self.aster_dem_path)
-
- # Get a large sample to ensure they should be some NaNs normally
- points_arr = img1.to_pointcloud(subsample=10000, as_array=True, random_state=42)
- points = img1.to_pointcloud(subsample=10000, random_state=42)
-
- # This should not load the image
- assert not img1.is_loaded
-
- # The subsampled values should be valid and the right shape
- assert points_arr.shape == (10000, 3)
- assert points.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y
- assert all(np.isfinite(points_arr[:, 2]))
-
- # The output should respect the default band naming and the input CRS
- assert np.array_equal(points.ds.columns, ["b1", "geometry"])
- assert points.crs == img1.crs
-
- # Try setting the band name
- points = img1.to_pointcloud(data_column_name="lol", subsample=10)
- assert np.array_equal(points.ds.columns, ["lol", "geometry"])
-
- # Keeping the nodata values
- points_invalid = img1.to_pointcloud(subsample=10000, random_state=42, skip_nodata=False)
-
- # The subsampled values should not all be valid and the right shape
- assert points_invalid.ds.shape == (10000, 2) # One less column here due to geometry storing X and Y
- assert any(~np.isfinite(points_invalid["b1"].values))
-
- # 4/ Multi-band real raster
- img2 = gu.Raster(self.landsat_rgb_path)
-
- # By default only loads a single band without loading
- points_arr = img2.to_pointcloud(subsample=10, as_array=True)
- points = img2.to_pointcloud(subsample=10)
-
- assert points_arr.shape == (10, 3)
- assert points.ds.shape == (10, 2) # One less column here due to geometry storing X and Y
- assert not img2.is_loaded
-
- # Storing auxiliary bands
- points_arr = img2.to_pointcloud(subsample=10, as_array=True, auxiliary_data_bands=[2, 3])
- points = img2.to_pointcloud(subsample=10, auxiliary_data_bands=[2, 3])
- assert points_arr.shape == (10, 5)
- assert points.ds.shape == (10, 4) # One less column here due to geometry storing X and Y
- assert not img2.is_loaded
- assert np.array_equal(points.ds.columns, ["b1", "b2", "b3", "geometry"])
-
- # Try setting the column name of a specific band while storing all
- points = img2.to_pointcloud(subsample=10, data_column_name="yes", data_band=2, auxiliary_data_bands=[1, 3])
- assert np.array_equal(points.ds.columns, ["yes", "b1", "b3", "geometry"])
-
- # 5/ Error raising
- with pytest.raises(ValueError, match="Data column name must be a string.*"):
- img1.to_pointcloud(data_column_name=1) # type: ignore
- with pytest.raises(
- ValueError,
- match=re.escape("Data band number must be an integer between 1 and the total number of bands (3)."),
- ):
- img2.to_pointcloud(data_band=4)
- with pytest.raises(
- ValueError, match="Passing auxiliary column names requires passing auxiliary data band numbers as well."
- ):
- img2.to_pointcloud(auxiliary_column_names=["a"])
- with pytest.raises(
- ValueError, match="Auxiliary data band number must be an iterable containing only integers."
- ):
- img2.to_pointcloud(auxiliary_data_bands=[1, 2.5]) # type: ignore
- img2.to_pointcloud(auxiliary_data_bands="lol") # type: ignore
- with pytest.raises(
- ValueError,
- match=re.escape("Auxiliary data band numbers must be between 1 and the total number of bands (3)."),
- ):
- img2.to_pointcloud(auxiliary_data_bands=[0])
- img2.to_pointcloud(auxiliary_data_bands=[4])
- with pytest.raises(
- ValueError, match=re.escape("Main data band 1 should not be listed in auxiliary data bands [1, 2].")
- ):
- img2.to_pointcloud(auxiliary_data_bands=[1, 2])
- with pytest.raises(ValueError, match="Auxiliary column names must be an iterable containing only strings."):
- img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", 1])
- with pytest.raises(
- ValueError, match="Length of auxiliary column name and data band numbers should be the same*"
- ):
- img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", "lol2", "lol3"])
-
- def test_from_pointcloud(self) -> None:
- """Test from_pointcloud method."""
-
- # 1/ Create a small raster to test point sampling on
- shape = (5, 5)
- nodata = 100
- img_arr = np.arange(np.prod(shape), dtype="int32").reshape(shape)
- transform = rio.transform.from_origin(0, 5, 1, 1)
- img1 = gu.Raster.from_array(img_arr, transform=transform, crs=4326, nodata=nodata)
-
- # Check both inputs work (grid coords or transform+shape) on a subsample
- pc1 = img1.to_pointcloud(subsample=10)
- img1_sub = gu.Raster.from_pointcloud_regular(pc1, transform=transform, shape=shape)
-
- grid_coords1 = img1.coords(grid=False)
- img1_sub2 = gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1)
-
- assert img1_sub.raster_equal(img1_sub2)
-
- # Check that number of valid values are equal to point cloud size
- assert np.count_nonzero(~img1_sub.data.mask) == 10
-
- # With no subsampling, should get the exact same raster back
- pc1_full = img1.to_pointcloud()
- img1_full = gu.Raster.from_pointcloud_regular(pc1_full, transform=transform, shape=shape, nodata=nodata)
- assert img1.raster_equal(img1_full, warn_failure_reason=True)
-
- # 2/ Single-band real raster with nodata values
- img2 = gu.Raster(self.aster_dem_path)
- nodata = img2.nodata
- transform = img2.transform
- shape = img2.shape
-
- # Check both inputs work (grid coords or transform+shape) on a subsample
- pc2 = img2.to_pointcloud(subsample=10000, random_state=42)
- img2_sub = gu.Raster.from_pointcloud_regular(pc2, transform=transform, shape=shape, nodata=nodata)
-
- grid_coords2 = img2.coords(grid=False)
- img2_sub2 = gu.Raster.from_pointcloud_regular(pc2, grid_coords=grid_coords2, nodata=nodata)
-
- assert img2_sub.raster_equal(img2_sub2, warn_failure_reason=True)
-
- # Check that number of valid values are equal to point cloud size
- assert np.count_nonzero(~img2_sub.data.mask) == 10000
-
- # With no subsampling, should get the exact same raster back
- pc2_full = img2.to_pointcloud()
- img2_full = gu.Raster.from_pointcloud_regular(pc2_full, transform=transform, shape=shape, nodata=nodata)
- assert img2.raster_equal(img2_full, warn_failure_reason=True, strict_masked=False)
-
- # 3/ Error raising
- with pytest.raises(TypeError, match="Input grid coordinates must be 1D arrays.*"):
- gu.Raster.from_pointcloud_regular(pc1, grid_coords=(1, "lol")) # type: ignore
- with pytest.raises(ValueError, match="Grid coordinates must be regular*"):
- grid_coords1[0][0] += 1
- gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) # type: ignore
- with pytest.raises(
- ValueError, match="Either grid coordinates or both geotransform and shape must be provided."
- ):
- gu.Raster.from_pointcloud_regular(pc1)
-
class TestMask:
# Paths to example data
@@ -3147,149 +2102,6 @@ def test_implicit_logical_casting_real(self, example: str) -> None:
assert np.array_equal(mask.data.data, rst.data.data >= 1)
assert np.array_equal(mask.data.mask, rst.data.mask)
- @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
- def test_reproject(self, mask: gu.Mask) -> None:
- # Test 1: with a classic resampling (bilinear)
-
- # Reproject mask - resample to 100 x 100 grid
- mask_orig = mask.copy()
- mask_reproj = mask.reproject(grid_size=(100, 100), force_source_nodata=2)
-
- # Check instance is respected
- assert isinstance(mask_reproj, gu.Mask)
- # Check the dtype of the original mask was properly reconverted
- assert mask.data.dtype == bool
- # Check the original mask was not modified during reprojection
- assert mask_orig.raster_equal(mask)
-
- # Check inplace behaviour works
- mask_tmp = mask.copy()
- mask_tmp.reproject(grid_size=(100, 100), force_source_nodata=2, inplace=True)
- assert mask_tmp.raster_equal(mask_reproj)
-
- # This should be equivalent to converting the array to uint8, reprojecting, converting back
- mask_uint8 = mask.astype("uint8")
- mask_uint8_reproj = mask_uint8.reproject(grid_size=(100, 100), force_source_nodata=2)
- mask_uint8_reproj.data = mask_uint8_reproj.data.astype("bool")
-
- assert mask_reproj.raster_equal(mask_uint8_reproj)
-
- # Test 2: should raise a warning when the resampling differs from nearest
-
- with pytest.warns(
- UserWarning,
- match="Reprojecting a mask with a resampling method other than 'nearest', "
- "the boolean array will be converted to float during interpolation.",
- ):
- mask.reproject(res=50, resampling="bilinear", force_source_nodata=2)
-
- @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
- def test_crop(self, mask: gu.Mask) -> None:
- # Test with same bounds -> should be the same #
-
- mask_orig = mask.copy()
- crop_geom = mask.bounds
- mask_cropped = mask.crop(crop_geom)
- assert mask_cropped.raster_equal(mask)
-
- # Check if instance is respected
- assert isinstance(mask_cropped, gu.Mask)
- # Check the dtype of the original mask was properly reconverted
- assert mask.data.dtype == bool
- # Check the original mask was not modified during cropping
- assert mask_orig.raster_equal(mask)
-
- # Check inplace behaviour works
- mask_tmp = mask.copy()
- mask_tmp.crop(crop_geom, inplace=True)
- assert mask_tmp.raster_equal(mask_cropped)
-
- # - Test cropping each side by a random integer of pixels - #
- rng = np.random.default_rng(42)
- rand_int = rng.integers(1, min(mask.shape) - 1)
-
- # Left
- crop_geom2 = [crop_geom[0] + rand_int * mask.res[0], crop_geom[1], crop_geom[2], crop_geom[3]]
- mask_cropped = mask.crop(crop_geom2)
- assert list(mask_cropped.bounds) == crop_geom2
- assert np.array_equal(mask.data[:, rand_int:].data, mask_cropped.data.data, equal_nan=True)
- assert np.array_equal(mask.data[:, rand_int:].mask, mask_cropped.data.mask)
-
- # Right
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2] - rand_int * mask.res[0], crop_geom[3]]
- mask_cropped = mask.crop(crop_geom2)
- assert list(mask_cropped.bounds) == crop_geom2
- assert np.array_equal(mask.data[:, :-rand_int].data, mask_cropped.data.data, equal_nan=True)
- assert np.array_equal(mask.data[:, :-rand_int].mask, mask_cropped.data.mask)
-
- # Bottom
- crop_geom2 = [crop_geom[0], crop_geom[1] + rand_int * abs(mask.res[1]), crop_geom[2], crop_geom[3]]
- mask_cropped = mask.crop(crop_geom2)
- assert list(mask_cropped.bounds) == crop_geom2
- assert np.array_equal(mask.data[:-rand_int, :].data, mask_cropped.data.data, equal_nan=True)
- assert np.array_equal(mask.data[:-rand_int, :].mask, mask_cropped.data.mask)
-
- # Top
- crop_geom2 = [crop_geom[0], crop_geom[1], crop_geom[2], crop_geom[3] - rand_int * abs(mask.res[1])]
- mask_cropped = mask.crop(crop_geom2)
- assert list(mask_cropped.bounds) == crop_geom2
- assert np.array_equal(mask.data[rand_int:, :].data, mask_cropped.data, equal_nan=True)
- assert np.array_equal(mask.data[rand_int:, :].mask, mask_cropped.data.mask)
-
- # Test inplace
- mask_orig = mask.copy()
- mask_orig.crop(crop_geom2, inplace=True)
- assert list(mask_orig.bounds) == crop_geom2
- assert np.array_equal(mask.data[rand_int:, :].data, mask_orig.data, equal_nan=True)
- assert np.array_equal(mask.data[rand_int:, :].mask, mask_orig.data.mask)
-
- # Run with match_extent, check that inplace or not yields the same result
-
- # TODO: Pretty sketchy with the current functioning of "match_extent",
- # should we just remove it from Raster.crop() ?
-
- # mask_cropped = mask.crop(crop_geom2, inplace=False, mode="match_extent")
- # mask_orig.crop(crop_geom2, mode="match_extent")
- # assert mask_cropped.raster_equal(mask_orig)
-
- @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
- def test_polygonize(self, mask: gu.Mask) -> None:
-
- mask_orig = mask.copy()
- # Run default
- vect = mask.polygonize()
- # Check the dtype of the original mask was properly reconverted
- assert mask.data.dtype == bool
- # Check the original mask was not modified during polygonizing
- assert mask_orig.raster_equal(mask)
-
- # Check the output is cast into a vector
- assert isinstance(vect, gu.Vector)
-
- # Run with zero as target
- vect = mask.polygonize(target_values=0)
- assert isinstance(vect, gu.Vector)
-
- # Check a warning is raised when using a non-boolean value
- with pytest.warns(UserWarning, match="In-value converted to 1 for polygonizing boolean mask."):
- mask.polygonize(target_values=2)
-
- @pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
- def test_proximity(self, mask: gu.Mask) -> None:
-
- mask_orig = mask.copy()
- # Run default
- rast = mask.proximity()
- # Check the dtype of the original mask was properly reconverted
- assert mask.data.dtype == bool
- # Check the original mask was not modified during reprojection
- assert mask_orig.raster_equal(mask)
-
- # Check that output is cast back into a raster
- assert isinstance(rast, gu.Raster)
- # A mask is a raster, so also need to check this
- assert not isinstance(rast, gu.Mask)
-
@pytest.mark.parametrize("mask", [mask_landsat_b4, mask_aster_dem, mask_everest]) # type: ignore
def test_save(self, mask: gu.Mask) -> None:
"""Test saving for masks"""
diff --git a/tests/test_raster/test_satimg.py b/tests/test_raster/test_satimg.py
index b6aa3ff6..c05a46ee 100644
--- a/tests/test_raster/test_satimg.py
+++ b/tests/test_raster/test_satimg.py
@@ -1,6 +1,7 @@
"""
Test functions for SatelliteImage class
"""
+
import datetime
import datetime as dt
import sys
diff --git a/tests/test_vector.py b/tests/test_vector.py
deleted file mode 100644
index 08d848d3..00000000
--- a/tests/test_vector.py
+++ /dev/null
@@ -1,927 +0,0 @@
-from __future__ import annotations
-
-import inspect
-import os.path
-import pathlib
-import re
-import tempfile
-import warnings
-
-import geopandas as gpd
-import geopandas.base
-import matplotlib.pyplot as plt
-import numpy as np
-import pyproj
-import pytest
-from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
-from pandas.testing import assert_series_equal
-from scipy.ndimage import binary_erosion
-from shapely.geometry.base import BaseGeometry
-from shapely.geometry.linestring import LineString
-from shapely.geometry.multilinestring import MultiLineString
-from shapely.geometry.multipolygon import MultiPolygon
-from shapely.geometry.polygon import Polygon
-
-import geoutils as gu
-
-GLACIER_OUTLINES_URL = "http://public.data.npolar.no/cryoclim/CryoClim_GAO_SJ_1990.zip"
-
-
-class TestVector:
- landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped")
- everest_outlines_path = gu.examples.get_path("everest_rgi_outlines")
- aster_dem_path = gu.examples.get_path("exploradores_aster_dem")
- aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines")
- glacier_outlines = gu.Vector(GLACIER_OUTLINES_URL)
-
- def test_init(self) -> None:
- """Test class initiation works as intended"""
-
- # First, with a URL filename
- v = gu.Vector(GLACIER_OUTLINES_URL)
- assert isinstance(v, gu.Vector)
-
- # Second, with a string filename
- v0 = gu.Vector(self.aster_outlines_path)
- assert isinstance(v0, gu.Vector)
-
- # Third, with a pathlib path
- path = pathlib.Path(self.aster_outlines_path)
- v1 = gu.Vector(path)
- assert isinstance(v1, gu.Vector)
-
- # Fourth, with a geopandas dataframe
- v2 = gu.Vector(gpd.read_file(self.aster_outlines_path))
- assert isinstance(v2, gu.Vector)
-
- # Fifth, passing a Vector itself (points back to Vector passed)
- v3 = gu.Vector(v2)
- assert isinstance(v3, gu.Vector)
-
- # Check errors are raised when filename has wrong type
- with pytest.raises(TypeError, match="Filename argument should be a string, Path or geopandas.GeoDataFrame."):
- gu.Vector(1) # type: ignore
-
- def test_copy(self) -> None:
- vector2 = self.glacier_outlines.copy()
-
- assert vector2 is not self.glacier_outlines
-
- vector2.ds = vector2.ds.query("NAME == 'Ayerbreen'")
-
- assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0]
-
- def test_info(self) -> None:
-
- v = gu.Vector(GLACIER_OUTLINES_URL)
-
- # Check default runs without error (prints to screen)
- output = v.info()
- assert output is None
-
- # Otherwise returns info
- output2 = v.info(verbose=False)
- assert isinstance(output2, str)
- list_prints = ["Filename", "Coordinate system", "Extent", "Number of features", "Attributes"]
- assert all(p in output2 for p in list_prints)
-
- def test_query(self) -> None:
- vector2 = self.glacier_outlines.query("NAME == 'Ayerbreen'")
-
- assert vector2 is not self.glacier_outlines
-
- assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0]
-
- def test_save(self) -> None:
- """Test the save wrapper for GeoDataFrame.to_file()."""
-
- vector = gu.Vector(self.aster_outlines_path)
-
- # Create a temporary file in a temporary directory
- temp_dir = tempfile.TemporaryDirectory()
- temp_file = os.path.join(temp_dir.name, "test.gpkg")
-
- # Save and check the file exists
- vector.save(temp_file)
- assert os.path.exists(temp_file)
-
- # Open and check the object is the same
- vector_save = gu.Vector(temp_file)
- vector_save.vector_equal(vector)
-
- def test_bounds(self) -> None:
- bounds = self.glacier_outlines.bounds
-
- assert bounds.left < bounds.right
- assert bounds.bottom < bounds.top
-
- assert bounds.left == self.glacier_outlines.ds.total_bounds[0]
- assert bounds.bottom == self.glacier_outlines.ds.total_bounds[1]
- assert bounds.right == self.glacier_outlines.ds.total_bounds[2]
- assert bounds.top == self.glacier_outlines.ds.total_bounds[3]
-
- def test_footprint(self) -> None:
-
- footprint = self.glacier_outlines.footprint
-
- assert isinstance(footprint, gu.Vector)
- assert footprint.vector_equal(self.glacier_outlines.get_footprint_projected(self.glacier_outlines.crs))
-
- def test_reproject(self) -> None:
- """Test that the reproject function works as intended"""
-
- v0 = gu.Vector(self.aster_outlines_path)
- r0 = gu.Raster(self.aster_dem_path)
- v1 = gu.Vector(self.everest_outlines_path)
-
- # First, test with a EPSG integer
- v1 = v0.reproject(crs=32617)
- assert isinstance(v1, gu.Vector)
- assert v1.crs.to_epsg() == 32617
-
- # Check the inplace behaviour matches the not-inplace one
- v2 = v0.copy()
- v2.reproject(crs=32617, inplace=True)
- v2.vector_equal(v1)
-
- # Check that the reprojection is the same as with geopandas
- gpd1 = v0.ds.to_crs(epsg=32617)
- assert_geodataframe_equal(gpd1, v1.ds)
-
- # Second, with a Raster object
- v2 = v0.reproject(r0)
- assert v2.crs == r0.crs
-
- # Third, with a Vector object that has a different CRS
- assert v0.crs != v1.crs
- v3 = v0.reproject(v1)
- assert v3.crs == v1.crs
-
- # Fourth, check that errors are raised when appropriate
- # When no destination CRS is defined, or both dst_crs and dst_ref are passed
- with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")):
- v0.reproject()
- v0.reproject(ref=r0, crs=32617)
- # If the path provided does not exist
- with pytest.raises(ValueError, match=re.escape("Reference raster or vector path does not exist.")):
- v0.reproject(ref="tmp.lol")
- # If it exists but cannot be opened by rasterio or fiona
- with pytest.raises(ValueError, match=re.escape("Could not open raster or vector with rasterio or pyogrio.")):
- v0.reproject(ref="geoutils/examples.py")
- # If input of wrong type
- with pytest.raises(TypeError, match=re.escape("Type of ref must be string path to file, Raster or Vector.")):
- v0.reproject(ref=10) # type: ignore
-
- def test_rasterize_proj(self) -> None:
- # Capture the warning on resolution not matching exactly bounds
- with pytest.warns(UserWarning):
- burned = self.glacier_outlines.rasterize(xres=3000)
-
- assert burned.shape[0] == 146
- assert burned.shape[1] == 115
-
- def test_rasterize_unproj(self) -> None:
- """Test rasterizing an EPSG:3426 dataset into a projection."""
-
- vct = gu.Vector(self.everest_outlines_path)
- rst = gu.Raster(self.landsat_b4_crop_path)
-
- # Use Web Mercator at 30 m.
- # Capture the warning on resolution not matching exactly bounds
- with pytest.warns(UserWarning):
- burned = vct.rasterize(xres=30, crs=3857)
-
- assert burned.shape[0] == 1251
- assert burned.shape[1] == 1522
-
- # Typically, rasterize returns a raster
- burned_in2_out1 = vct.rasterize(raster=rst, in_value=2, out_value=1)
- assert isinstance(burned_in2_out1, gu.Raster)
-
- # For an in_value of 1 and out_value of 0 (default), it returns a mask
- burned_mask = vct.rasterize(raster=rst, in_value=1)
- assert isinstance(burned_mask, gu.Mask)
-
- # Check that rasterizing with in_value=1 is the same as creating a mask
- assert burned_mask.raster_equal(vct.create_mask(raster=rst))
-
- # The two rasterization should match
- assert np.all(burned_in2_out1[burned_mask] == 2)
- assert np.all(burned_in2_out1[~burned_mask] == 1)
-
- # Check that errors are raised
- with pytest.raises(ValueError, match="Only one of raster or crs can be provided."):
- vct.rasterize(raster=rst, crs=3857)
-
- test_data = [[landsat_b4_crop_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]]
-
- @pytest.mark.parametrize("data", test_data) # type: ignore
- def test_crop(self, data: list[str]) -> None:
- # Load data
- raster_path, outlines_path = data
- rst = gu.Raster(raster_path)
- outlines = gu.Vector(outlines_path)
-
- # Need to reproject to r.crs. Otherwise, crop will work but will be approximate
- # Because outlines might be warped in a different crs
- outlines.ds = outlines.ds.to_crs(rst.crs)
-
- # Crop
- outlines_new = outlines.copy()
- outlines_new.crop(crop_geom=rst, inplace=True)
-
- # Check default behaviour - crop and return copy
- outlines_copy = outlines.crop(crop_geom=rst)
-
- # Crop by passing bounds
- outlines_new_bounds = outlines.copy()
- outlines_new_bounds.crop(crop_geom=list(rst.bounds), inplace=True)
- assert_geodataframe_equal(outlines_new.ds, outlines_new_bounds.ds)
- # Check the return-by-copy as well
- assert_geodataframe_equal(outlines_copy.ds, outlines_new_bounds.ds)
-
- # Verify that geometries intersect with raster bound
- rst_poly = gu.projtools.bounds2poly(rst.bounds)
- intersects_new = []
- for poly in outlines_new.ds.geometry:
- intersects_new.append(poly.intersects(rst_poly))
-
- assert np.all(intersects_new)
-
- # Check that some of the original outlines did not intersect and were removed
- intersects_old = []
- for poly in outlines.ds.geometry:
- intersects_old.append(poly.intersects(rst_poly))
-
- assert np.sum(intersects_old) == np.sum(intersects_new)
-
- # Check that some features were indeed removed
- assert np.sum(~np.array(intersects_old)) > 0
-
- # Check that error is raised when cropGeom argument is invalid
- with pytest.raises(TypeError, match="Crop geometry must be a Raster, Vector, or list of coordinates."):
- outlines.crop(1, inplace=True) # type: ignore
-
- def test_translate(self) -> None:
-
- vector = gu.Vector(self.everest_outlines_path)
-
- # Check default behaviour is not inplace
- vector_shifted = vector.translate(xoff=2.5, yoff=5.7)
- assert isinstance(vector_shifted, gu.Vector)
- assert_geoseries_equal(vector_shifted.geometry, vector.geometry.translate(xoff=2.5, yoff=5.7))
-
- # Check inplace behaviour works correctly
- vector2 = vector.copy()
- output = vector2.translate(xoff=2.5, yoff=5.7, inplace=True)
- assert output is None
- assert_geoseries_equal(vector2.geometry, vector_shifted.geometry)
-
- def test_proximity(self) -> None:
- """
- The core functionality is already tested against GDAL in test_raster: just verify the vector-specific behaviour.
- #TODO: add an artificial test as well (mirroring TODO in test_raster)
- """
-
- vector = gu.Vector(self.everest_outlines_path)
-
- # -- Test 1: with a Raster provided --
- raster1 = gu.Raster(self.landsat_b4_crop_path)
- prox1 = vector.proximity(raster=raster1)
-
- # The proximity should have the same extent, resolution and CRS
- assert raster1.georeferenced_grid_equal(prox1)
-
- # With the base geometry
- vector.proximity(raster=raster1, geometry_type="geometry")
-
- # With another geometry option
- vector.proximity(raster=raster1, geometry_type="centroid")
-
- # With only inside proximity
- vector.proximity(raster=raster1, in_or_out="in")
-
- # -- Test 2: with no Raster provided, just grid size --
-
- # Default grid size
- vector.proximity()
-
- # With specific grid size
- vector.proximity(size=(100, 100))
-
-
-class TestSynthetic:
- # Create a synthetic vector file with a square of size 1, started at position (10, 10)
- poly1 = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)])
- gdf = gpd.GeoDataFrame({"geometry": [poly1]}, crs="EPSG:4326")
- vector = gu.Vector(gdf)
-
- # Same with a square started at position (5, 5)
- poly2 = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)])
- gdf = gpd.GeoDataFrame({"geometry": [poly2]}, crs="EPSG:4326")
- vector2 = gu.Vector(gdf)
-
- # Create a multipolygon with both
- multipoly = MultiPolygon([poly1, poly2])
- gdf = gpd.GeoDataFrame({"geometry": [multipoly]}, crs="EPSG:4326")
- vector_multipoly = gu.Vector(gdf)
-
- # Create a synthetic vector file with a square of size 5, started at position (8, 8)
- poly3 = Polygon([(8, 8), (13, 8), (13, 13), (8, 13)])
- gdf = gpd.GeoDataFrame({"geometry": [poly3]}, crs="EPSG:4326")
- vector_5 = gu.Vector(gdf)
-
- # Create a synthetic LineString geometry
- lines = LineString([(10, 10), (11, 10), (11, 11)])
- gdf = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326")
- vector_lines = gu.Vector(gdf)
-
- # Create a synthetic MultiLineString geometry
- multilines = MultiLineString([[(10, 10), (11, 10), (11, 11)], [(5, 5), (6, 5), (6, 6)]])
- gdf = gpd.GeoDataFrame({"geometry": [multilines]}, crs="EPSG:4326")
- vector_multilines = gu.Vector(gdf)
-
- def test_create_mask(self) -> None:
- """
- Test Vector.create_mask.
- """
- # First with given res and bounds -> Should be a 21 x 21 array with 0 everywhere except center pixel
- vector = self.vector.copy()
- out_mask = vector.create_mask(xres=1, bounds=(0, 0, 21, 21), as_array=True)
- ref_mask = np.zeros((21, 21), dtype="bool")
- ref_mask[10, 10] = True
- assert out_mask.shape == (21, 21)
- assert np.all(ref_mask == out_mask)
-
- # Check that vector has not been modified by accident
- assert vector.bounds == self.vector.bounds
- assert len(vector.ds) == len(self.vector.ds)
- assert vector.crs == self.vector.crs
-
- # Then with a gu.Raster as reference, single band
- rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326")
- out_mask = vector.create_mask(rst, as_array=True)
- assert out_mask.shape == (21, 21)
-
- # With gu.Raster, 2 bands -> fails...
- # rst = gu.Raster.from_array(np.zeros((2, 21, 21)), transform=(1., 0., 0., 0., -1., 21.), crs='EPSG:4326')
- # out_mask = vector.create_mask(rst)
-
- # Test that buffer = 0 works
- out_mask_buff = vector.create_mask(rst, buffer=0, as_array=True)
- assert np.all(ref_mask == out_mask_buff)
-
- # Test that buffer > 0 works
- rst = gu.Raster.from_array(np.zeros((21, 21)), transform=(1.0, 0.0, 0.0, 0.0, -1.0, 21.0), crs="EPSG:4326")
- out_mask = vector.create_mask(rst, as_array=True)
- for buffer in np.arange(1, 8):
- out_mask_buff = vector.create_mask(rst, buffer=buffer, as_array=True)
- diff = out_mask_buff & ~out_mask
- assert np.count_nonzero(diff) > 0
- # Difference between masks should always be thinner than buffer + 1
- eroded_diff = binary_erosion(diff.squeeze(), np.ones((buffer + 1, buffer + 1)))
- assert np.count_nonzero(eroded_diff) == 0
-
- # Test that buffer < 0 works
- vector_5 = self.vector_5
- out_mask = vector_5.create_mask(rst, as_array=True)
- for buffer in np.arange(-1, -3, -1):
- out_mask_buff = vector_5.create_mask(rst, buffer=buffer, as_array=True)
- diff = ~out_mask_buff & out_mask
- assert np.count_nonzero(diff) > 0
- # Difference between masks should always be thinner than buffer + 1
- eroded_diff = binary_erosion(diff.squeeze(), np.ones((abs(buffer) + 1, abs(buffer) + 1)))
- assert np.count_nonzero(eroded_diff) == 0
-
- # Check that no warning is raised when creating a mask with a xres not multiple of vector bounds
- mask = vector.create_mask(xres=1.01)
-
- # Check that by default, create_mask returns a Mask
- assert isinstance(mask, gu.Mask)
-
- # Check that an error is raised if xres is not passed
- with pytest.raises(ValueError, match="At least raster or xres must be set."):
- vector.create_mask()
-
- # Check that an error is raised if buffer is the wrong type
- with pytest.raises(TypeError, match="Buffer must be a number, currently set to str."):
- vector.create_mask(rst, buffer="lol") # type: ignore
-
- # If the raster has the wrong type
- with pytest.raises(TypeError, match="Raster must be a geoutils.Raster or None."):
- vector.create_mask("lol") # type: ignore
-
- # Check that a warning is raised if the bounds were passed specifically by the user
- with pytest.warns(UserWarning):
- vector.create_mask(xres=1.01, bounds=(0, 0, 21, 21))
-
- def test_extract_vertices(self) -> None:
- """
- Test that extract_vertices works with simple geometries.
- """
- # Polygons
- vertices = gu.vector.extract_vertices(self.vector.ds)
- assert len(vertices) == 1
- assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)]]
-
- # MultiPolygons
- vertices = gu.vector.extract_vertices(self.vector_multipoly.ds)
- assert len(vertices) == 2
- assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)]
- assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0), (5.0, 6.0), (5.0, 5.0)]
-
- # LineString
- vertices = gu.vector.extract_vertices(self.vector_lines.ds)
- assert len(vertices) == 1
- assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)]]
-
- # MultiLineString
- vertices = gu.vector.extract_vertices(self.vector_multilines.ds)
- assert len(vertices) == 2
- assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)]
- assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0)]
-
- def test_generate_voronoi(self) -> None:
- """
- Check that vector.generate_voronoi_polygons works on a simple Polygon.
- Does not work with simple shapes as squares or triangles as the diagram is infinite.
- For now, test on a set of two squares.
- """
- # Check with a multipolygon
- voronoi = gu.vector.generate_voronoi_polygons(self.vector_multipoly.ds)
- assert len(voronoi) == 2
- vertices = gu.vector.extract_vertices(voronoi)
- assert vertices == [
- [(5.5, 10.5), (10.5, 10.5), (10.5, 5.5), (5.5, 10.5)],
- [(5.5, 10.5), (10.5, 5.5), (5.5, 5.5), (5.5, 10.5)],
- ]
-
- # Check that it fails with proper error for too simple geometries
- expected_message = "Invalid geometry, cannot generate finite Voronoi polygons"
- with pytest.raises(ValueError, match=expected_message):
- voronoi = gu.vector.generate_voronoi_polygons(self.vector.ds)
-
- def test_buffer_metric(self) -> None:
- """Check that metric buffering works"""
-
- # Case with two squares: test that the buffered area is without deformations
- # https://epsg.io/32631
- utm31_x_center = 500000
- utm31_y_center = 4649776
- poly1_utm31 = Polygon(
- [
- (utm31_x_center, utm31_y_center),
- (utm31_x_center + 1, utm31_y_center),
- (utm31_x_center + 1, utm31_y_center + 1),
- (utm31_x_center, utm31_y_center + 1),
- ]
- )
-
- poly2_utm31 = Polygon(
- [
- (utm31_x_center + 10, utm31_y_center + 10),
- (utm31_x_center + 11, utm31_y_center + 10),
- (utm31_x_center + 11, utm31_y_center + 11),
- (utm31_x_center + 10, utm31_y_center + 11),
- ]
- )
-
- # We initiate the squares of size 1x1 in a UTM projection
- two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[poly1_utm31, poly2_utm31], crs="EPSG:32631"))
-
- # Their area should now be 1 for each polygon
- assert two_squares.ds.area.values[0] == 1
- assert two_squares.ds.area.values[1] == 1
-
- # We buffer them
- two_squares_utm_buffered = two_squares.buffer_metric(buffer_size=1.0)
-
- # Their area should now be 1 (square) + 4 (buffer along the sides) + 4*(pi*1**2 /4)
- # (buffer of corners = quarter-disks)
- expected_area = 1 + 4 + np.pi
- assert two_squares_utm_buffered.ds.area.values[0] == pytest.approx(expected_area, abs=0.01)
- assert two_squares_utm_buffered.ds.area.values[1] == pytest.approx(expected_area, abs=0.01)
-
- # And the new GeoDataFrame should exactly match that of one buffer from the original one
- direct_gpd_buffer = gu.Vector(
- gpd.GeoDataFrame(geometry=two_squares.ds.buffer(distance=1.0).geometry, crs=two_squares.crs)
- )
- assert_geodataframe_equal(direct_gpd_buffer.ds, two_squares_utm_buffered.ds)
-
- # Now, if we reproject the original vector in a non-metric system
- two_squares_geographic = gu.Vector(two_squares.ds.to_crs(epsg=4326))
- # We buffer directly the Vector object in the non-metric system
- two_squares_geographic_buffered = two_squares_geographic.buffer_metric(buffer_size=1.0)
- # Then, we reproject that vector in the UTM zone
- two_squares_geographic_buffered_reproj = gu.Vector(
- two_squares_geographic_buffered.ds.to_crs(crs=two_squares.crs)
- )
-
- # Their area should now be the same as before for each polygon
- assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01)
- assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01)
-
- # And this time, it is the reprojected GeoDataFrame that should almost match (within a tolerance of 10e-06)
- assert all(direct_gpd_buffer.ds.geom_equals_exact(two_squares_geographic_buffered_reproj.ds, tolerance=10e-6))
-
- def test_buffer_without_overlap(self, monkeypatch) -> None: # type: ignore
- """
- Check that non-overlapping buffer feature works. Does not work on simple geometries, so test on MultiPolygon.
- Yet, very simple geometries yield unexpected results, as is the case for the second test case here.
- """
- # Case 1, test with two squares, in separate Polygons
- two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[self.poly1, self.poly2], crs="EPSG:4326"))
-
- # Check with buffers that should not overlap
- # ------------------------------------------
- buffer_size = 2
- # We force metric = False, so buffer should raise a GeoPandas warning
- with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"):
- buffer = two_squares.buffer_without_overlap(buffer_size, metric=False)
-
- # Output should be of same size as input and same geometry type
- assert len(buffer.ds) == len(two_squares.ds)
- assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type)
-
- # Extract individual geometries
- polys = []
- for geom in buffer.ds.geometry:
- if geom.geom_type in ["MultiPolygon"]:
- polys.extend(list(geom))
- else:
- polys.append(geom)
-
- # Check they do not overlap
- for i in range(len(polys)):
- for j in range(i + 1, len(polys)):
- assert not polys[i].intersects(polys[j])
-
- # buffer should yield the same result as create_mask with buffer, minus the original mask
- mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
- mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size)
- mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
- assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer)
-
- # Case 2 - Check with buffers that overlap -> this case is actually not the expected result !
- # -------------------------------
- buffer_size = 5
- # We force metric = False, so buffer should raise a GeoPandas warning
- with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"):
- buffer = two_squares.buffer_without_overlap(buffer_size, metric=False)
-
- # Output should be of same size as input and same geometry type
- assert len(buffer.ds) == len(two_squares.ds)
- assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type)
-
- # Extract individual geometries
- polys = []
- for geom in buffer.ds.geometry:
- if geom.geom_type in ["MultiPolygon"]:
- polys.extend(list(geom))
- else:
- polys.append(geom)
-
- # Check they do not overlap
- for i in range(len(polys)):
- for j in range(i + 1, len(polys)):
- assert polys[i].intersection(polys[j]).area == 0
-
- # buffer should yield the same result as create_mask with buffer, minus the original mask
- mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
- mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size)
- mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
- assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer)
-
- # Check that plotting runs without errors and close it
- monkeypatch.setattr(plt, "show", lambda: None)
- two_squares.buffer_without_overlap(buffer_size, plot=True)
-
-
-class NeedToImplementWarning(FutureWarning):
- """Warning to remember to implement new GeoPandas methods"""
-
-
-class TestGeoPandasMethods:
- # Use two synthetic vectors
- poly = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)])
- gdf1 = gpd.GeoDataFrame({"geometry": [poly]}, crs="EPSG:4326")
- synthvec1 = gu.Vector(gdf1)
-
- # Create a synthetic LineString geometry
- lines = LineString([(10, 10), (10.5, 10.5), (11, 11)])
- gdf2 = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326")
- synthvec2 = gu.Vector(gdf2)
-
- # Use two real-life vectors
- realvec1 = gu.Vector(gu.examples.get_path("exploradores_rgi_outlines"))
- realvec2 = gu.Vector(gu.examples.get_path("everest_rgi_outlines"))
-
- # Properties and methods derived from Shapely or GeoPandas
- # List of properties and methods with non-geometric output that are implemented in GeoUtils
- main_properties = ["crs", "geometry", "total_bounds"]
- nongeo_properties = [
- "area",
- "length",
- "interiors",
- "geom_type",
- "is_empty",
- "is_ring",
- "is_simple",
- "is_valid",
- "has_z",
- ]
- nongeo_methods = [
- "contains",
- "geom_equals",
- "geom_almost_equals",
- "geom_equals_exact",
- "crosses",
- "disjoint",
- "intersects",
- "overlaps",
- "touches",
- "within",
- "covers",
- "covered_by",
- "distance",
- ]
-
- # List of properties and methods with geometric output that are implemented in GeoUtils
- geo_properties = ["boundary", "unary_union", "centroid", "convex_hull", "envelope", "exterior"]
- geo_methods = [
- "representative_point",
- "normalize",
- "make_valid",
- "difference",
- "symmetric_difference",
- "union",
- "intersection",
- "clip_by_rect",
- "buffer",
- "simplify",
- "affine_transform",
- "translate",
- "rotate",
- "scale",
- "skew",
- "dissolve",
- "explode",
- "sjoin",
- "sjoin_nearest",
- "overlay",
- "to_crs",
- "set_crs",
- "rename_geometry",
- "set_geometry",
- "clip",
- ]
- # List of class methods
- io_methods = [
- "from_file",
- "from_postgis",
- "from_dict",
- "from_features",
- "to_feather",
- "to_parquet",
- "to_file",
- "to_postgis",
- "to_json",
- "to_wkb",
- "to_wkt",
- "to_csv",
- ]
-
- # List of other properties and methods
- other = ["has_sindex", "sindex", "estimate_utm_crs", "cx", "iterfeatures"]
- all_declared = (
- main_properties + nongeo_methods + nongeo_properties + geo_methods + geo_properties + other + io_methods
- )
-
- # Exceptions for GeoPandasBase functions not implemented (or deprecrated) in GeoSeries/GeoDataFrame
- exceptions_unimplemented = [
- "plot",
- "explore",
- "cascaded_union",
- "bounds",
- "relate",
- "project",
- "interpolate",
- "equals",
- "type",
- "convert_dtypes",
- "merge",
- "apply",
- "astype",
- "minimum_bounding_circle",
- "minimum_bounding_radius",
- "get_coordinates",
- "hilbert_distance",
- "sample_points",
- "copy",
- ]
- # Exceptions for IO/conversion that can be done directly from .ds
- all_exceptions = exceptions_unimplemented
-
- # Get all GeoPandasBase public methods with some exceptions
- geobase_methods = gpd.base.GeoPandasBase.__dict__.copy()
-
- # Get all GeoDataFrame public methods with some exceptions
- gdf_methods = gpd.GeoDataFrame.__dict__.copy()
-
- def test_overridden_funcs_exist(self) -> None:
- """Check that all methods listed above exist in Vector."""
-
- # Check that all methods declared in the class above exist in Vector
- vector_methods = gu.Vector.__dict__
-
- list_missing = [method for method in self.all_declared if method not in vector_methods.keys()]
-
- assert len(list_missing) == 0, print(f"Test method listed that is not in GeoUtils: {list_missing}")
-
- def test_geopandas_coverage(self) -> None:
- """Check that all existing methods of GeoPandas are overridden, with a couple exceptions."""
-
- # Merge the two
- all_methods = self.geobase_methods.copy()
- all_methods.update(self.gdf_methods)
-
- # Remove exceptions we don't want to reuse from GeoPandas (mirrored in Vector)
- name_all_methods = list(all_methods.keys())
- public_methods = [method for method in name_all_methods if method[0] != "_"]
-
- covered_methods = [method for method in public_methods if method not in self.all_exceptions]
-
- # Check that all methods declared in the class above are covered in Vector
- list_missing = [method for method in covered_methods if method not in self.all_declared]
-
- if len(list_missing) != 0:
- warnings.warn(
- f"New GeoPandas methods are not implemented in GeoUtils: {list_missing}", NeedToImplementWarning
- )
-
- @pytest.mark.parametrize("method", nongeo_methods + geo_methods) # type: ignore
- def test_overridden_funcs_args(self, method: str) -> None:
- """Check that all methods overridden have the same arguments as in GeoPandas."""
-
- # Get GeoPandas class where the methods live
- if method in self.geobase_methods.keys():
- upstream_class = gpd.base.GeoPandasBase
- elif method in self.gdf_methods.keys():
- upstream_class = gpd.GeoDataFrame
- else:
- raise ValueError("Method did not belong to GeoDataFrame or GeoPandasBase class.")
-
- # Get a full argument inspection object for each class
- argspec_upstream = inspect.getfullargspec(getattr(upstream_class, method))
- argspec_geoutils = inspect.getfullargspec(getattr(gu.Vector, method))
-
- # Check that all positional arguments are the same
- if argspec_upstream.args != argspec_geoutils.args:
- warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
-
- # Check that the *args and **kwargs argument are declared consistently
- if argspec_upstream.varargs != argspec_geoutils.varargs:
- warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
-
- if argspec_upstream.varkw != argspec_geoutils.varkw:
- warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
-
- # Check that default argument values are the same
- if argspec_upstream.defaults != argspec_geoutils.defaults:
- warnings.warn("Default argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
-
- @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore
- @pytest.mark.parametrize("method", nongeo_properties) # type: ignore
- def test_nongeo_properties(self, vector: gu.Vector, method: str) -> None:
- """Check non-geometric properties are consistent with GeoPandas."""
-
- # Remove warnings about operations in a non-projected system, and future changes
- warnings.simplefilter("ignore", category=UserWarning)
- warnings.simplefilter("ignore", category=FutureWarning)
-
- # Get method for each class
- output_geoutils = getattr(vector, method)
- output_geopandas = getattr(vector.ds, method)
-
- # Assert equality
- assert_series_equal(output_geoutils, output_geopandas)
-
- @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore
- @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore
- @pytest.mark.parametrize("method", nongeo_methods) # type: ignore
- def test_nongeo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None:
- """
- Check non-geometric methods are consistent with GeoPandas.
- All these methods require two inputs ("other", "df", or "right" argument), except one.
- """
-
- # Remove warnings about operations in a non-projected system, and future changes
- warnings.simplefilter("ignore", category=UserWarning)
- warnings.simplefilter("ignore", category=FutureWarning)
-
- # Get method for each class
- if method != "geom_equals_exact":
- output_geoutils = getattr(vector1, method)(vector2)
- output_geopandas = getattr(vector1.ds, method)(vector2.ds)
- else:
- output_geoutils = getattr(vector1, method)(vector2, tolerance=0.1)
- output_geopandas = getattr(vector1.ds, method)(vector2.ds, tolerance=0.1)
-
- # Assert equality
- assert_series_equal(output_geoutils, output_geopandas)
-
- @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore
- @pytest.mark.parametrize("method", geo_properties) # type: ignore
- def test_geo_properties(self, vector: gu.Vector, method: str) -> None:
- """Check geometric properties are consistent with GeoPandas."""
-
- # Remove warnings about operations in a non-projected system, and future changes
- warnings.simplefilter("ignore", category=UserWarning)
- warnings.simplefilter("ignore", category=FutureWarning)
-
- # Get method for each class
- output_geoutils = getattr(vector, method)
- output_geopandas = getattr(vector.ds, method)
-
- # Assert output types
- assert isinstance(output_geoutils, gu.Vector)
- assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame, BaseGeometry))
-
- # Separate cases depending on GeoPandas' output
- if isinstance(output_geopandas, gpd.GeoSeries):
- # Assert geoseries equality
- assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas)
- elif isinstance(output_geopandas, BaseGeometry):
- assert_geodataframe_equal(
- output_geoutils.ds, gpd.GeoDataFrame({"geometry": [output_geopandas]}, crs=vector.crs)
- )
- else:
- assert_geodataframe_equal(output_geoutils.ds, output_geopandas)
-
- specific_method_args = {
- "buffer": {"distance": 1},
- "clip_by_rect": {"xmin": 10.5, "ymin": 10.5, "xmax": 11, "ymax": 11},
- "affine_transform": {"matrix": [1, 1, 1, 1, 1, 1]},
- "translate": {"xoff": 1, "yoff": 1, "zoff": 0},
- "rotate": {"angle": 90},
- "scale": {"xfact": 1.1, "yfact": 1.1, "zfact": 1.1, "origin": "center"},
- "skew": {"xs": 1.1, "ys": 1.1},
- "interpolate": {"distance": 1},
- "simplify": {"tolerance": 0.1},
- "to_crs": {"crs": pyproj.CRS.from_epsg(32610)},
- "set_crs": {"crs": pyproj.CRS.from_epsg(32610), "allow_override": True},
- "rename_geometry": {"col": "lol"},
- "set_geometry": {"col": synthvec1.geometry},
- "clip": {"mask": poly},
- }
-
- @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore
- @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore
- @pytest.mark.parametrize("method", geo_methods) # type: ignore
- def test_geo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None:
- """Check geometric methods are consistent with GeoPandas."""
-
- # Remove warnings about operations in a non-projected system, and future changes
- warnings.simplefilter("ignore", category=UserWarning)
- warnings.simplefilter("ignore", category=FutureWarning)
-
- # Methods that require two inputs
- if method in [
- "difference",
- "symmetric_difference",
- "union",
- "intersection",
- "sjoin",
- "sjoin_nearest",
- "overlay",
- ]:
- output_geoutils = getattr(vector1, method)(vector2)
- output_geopandas = getattr(vector1.ds, method)(vector2.ds)
- # Methods that require zero input
- elif method in ["representative_point", "normalize", "make_valid", "dissolve", "explode"]:
- output_geoutils = getattr(vector1, method)()
- output_geopandas = getattr(vector1.ds, method)()
- elif method in self.specific_method_args.keys():
- output_geoutils = getattr(vector1, method)(**self.specific_method_args[method])
- output_geopandas = getattr(vector1.ds, method)(**self.specific_method_args[method])
- else:
- raise ValueError(f"The method '{method}' is not covered by this test.")
-
- # Assert output types
- assert isinstance(output_geoutils, gu.Vector)
- assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame))
-
- # Separate cases depending on GeoPandas' output, and nature of the function
- # Simplify is a special case that can make geometries invalid, so adjust test
- if method == "simplify":
- # TODO: Unskip this random test failure (one index not matching) when this is fixed in GeoPandas/Shapely
- pass
- # assert_geoseries_equal(
- # output_geopandas.make_valid(), output_geoutils.ds.geometry.make_valid(), check_less_precise=True
- # )
- # For geoseries output, check equality of it
- elif isinstance(output_geopandas, gpd.GeoSeries):
- assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas)
- # For geodataframe output, check equality
- else:
- assert_geodataframe_equal(output_geoutils.ds, output_geopandas)
diff --git a/tests/test_vector/test_geometric.py b/tests/test_vector/test_geometric.py
new file mode 100644
index 00000000..20d38463
--- /dev/null
+++ b/tests/test_vector/test_geometric.py
@@ -0,0 +1,227 @@
+"""Tests for geometry operations on vectors."""
+
+from __future__ import annotations
+
+import geopandas as gpd
+import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+from shapely import LineString, MultiLineString, MultiPolygon, Polygon
+
+import geoutils as gu
+from geoutils.vector.geometric import _extract_vertices, _generate_voronoi_polygons
+
+
+class TestGeometric:
+
+ # Create a synthetic vector file with a square of size 1, started at position (10, 10)
+ poly1 = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)])
+ gdf = gpd.GeoDataFrame({"geometry": [poly1]}, crs="EPSG:4326")
+ vector = gu.Vector(gdf)
+
+ # Same with a square started at position (5, 5)
+ poly2 = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)])
+ gdf = gpd.GeoDataFrame({"geometry": [poly2]}, crs="EPSG:4326")
+ vector2 = gu.Vector(gdf)
+
+ # Create a multipolygon with both
+ multipoly = MultiPolygon([poly1, poly2])
+ gdf = gpd.GeoDataFrame({"geometry": [multipoly]}, crs="EPSG:4326")
+ vector_multipoly = gu.Vector(gdf)
+
+ # Create a synthetic vector file with a square of size 5, started at position (8, 8)
+ poly3 = Polygon([(8, 8), (13, 8), (13, 13), (8, 13)])
+ gdf = gpd.GeoDataFrame({"geometry": [poly3]}, crs="EPSG:4326")
+ vector_5 = gu.Vector(gdf)
+
+ # Create a synthetic LineString geometry
+ lines = LineString([(10, 10), (11, 10), (11, 11)])
+ gdf = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326")
+ vector_lines = gu.Vector(gdf)
+
+ # Create a synthetic MultiLineString geometry
+ multilines = MultiLineString([[(10, 10), (11, 10), (11, 11)], [(5, 5), (6, 5), (6, 6)]])
+ gdf = gpd.GeoDataFrame({"geometry": [multilines]}, crs="EPSG:4326")
+ vector_multilines = gu.Vector(gdf)
+
+ def test_extract_vertices(self) -> None:
+ """
+ Test that extract_vertices works with simple geometries.
+ """
+ # Polygons
+ vertices = _extract_vertices(self.vector.ds)
+ assert len(vertices) == 1
+ assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)]]
+
+ # MultiPolygons
+ vertices = _extract_vertices(self.vector_multipoly.ds)
+ assert len(vertices) == 2
+ assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0), (10.0, 11.0), (10.0, 10.0)]
+ assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0), (5.0, 6.0), (5.0, 5.0)]
+
+ # LineString
+ vertices = _extract_vertices(self.vector_lines.ds)
+ assert len(vertices) == 1
+ assert vertices == [[(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)]]
+
+ # MultiLineString
+ vertices = _extract_vertices(self.vector_multilines.ds)
+ assert len(vertices) == 2
+ assert vertices[0] == [(10.0, 10.0), (11.0, 10.0), (11.0, 11.0)]
+ assert vertices[1] == [(5.0, 5.0), (6.0, 5.0), (6.0, 6.0)]
+
+ def test_generate_voronoi(self) -> None:
+ """
+ Check that vector.generate_voronoi_polygons works on a simple Polygon.
+ Does not work with simple shapes as squares or triangles as the diagram is infinite.
+ For now, test on a set of two squares.
+ """
+ # Check with a multipolygon
+ voronoi = _generate_voronoi_polygons(self.vector_multipoly.ds)
+ assert len(voronoi) == 2
+ vertices = _extract_vertices(voronoi)
+ assert vertices == [
+ [(5.5, 10.5), (10.5, 10.5), (10.5, 5.5), (5.5, 10.5)],
+ [(5.5, 10.5), (10.5, 5.5), (5.5, 5.5), (5.5, 10.5)],
+ ]
+
+ # Check that it fails with proper error for too simple geometries
+ expected_message = "Invalid geometry, cannot generate finite Voronoi polygons"
+ with pytest.raises(ValueError, match=expected_message):
+ voronoi = _generate_voronoi_polygons(self.vector.ds)
+
+ def test_buffer_metric(self) -> None:
+ """Check that metric buffering works"""
+
+ # Case with two squares: test that the buffered area is without deformations
+ # https://epsg.io/32631
+ utm31_x_center = 500000
+ utm31_y_center = 4649776
+ poly1_utm31 = Polygon(
+ [
+ (utm31_x_center, utm31_y_center),
+ (utm31_x_center + 1, utm31_y_center),
+ (utm31_x_center + 1, utm31_y_center + 1),
+ (utm31_x_center, utm31_y_center + 1),
+ ]
+ )
+
+ poly2_utm31 = Polygon(
+ [
+ (utm31_x_center + 10, utm31_y_center + 10),
+ (utm31_x_center + 11, utm31_y_center + 10),
+ (utm31_x_center + 11, utm31_y_center + 11),
+ (utm31_x_center + 10, utm31_y_center + 11),
+ ]
+ )
+
+ # We initiate the squares of size 1x1 in a UTM projection
+ two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[poly1_utm31, poly2_utm31], crs="EPSG:32631"))
+
+ # Their area should now be 1 for each polygon
+ assert two_squares.ds.area.values[0] == 1
+ assert two_squares.ds.area.values[1] == 1
+
+ # We buffer them
+ two_squares_utm_buffered = two_squares.buffer_metric(buffer_size=1.0)
+
+ # Their area should now be 1 (square) + 4 (buffer along the sides) + 4*(pi*1**2 /4)
+ # (buffer of corners = quarter-disks)
+ expected_area = 1 + 4 + np.pi
+ assert two_squares_utm_buffered.ds.area.values[0] == pytest.approx(expected_area, abs=0.01)
+ assert two_squares_utm_buffered.ds.area.values[1] == pytest.approx(expected_area, abs=0.01)
+
+ # And the new GeoDataFrame should exactly match that of one buffer from the original one
+ direct_gpd_buffer = gu.Vector(
+ gpd.GeoDataFrame(geometry=two_squares.ds.buffer(distance=1.0).geometry, crs=two_squares.crs)
+ )
+ assert_geodataframe_equal(direct_gpd_buffer.ds, two_squares_utm_buffered.ds)
+
+ # Now, if we reproject the original vector in a non-metric system
+ two_squares_geographic = gu.Vector(two_squares.ds.to_crs(epsg=4326))
+ # We buffer directly the Vector object in the non-metric system
+ two_squares_geographic_buffered = two_squares_geographic.buffer_metric(buffer_size=1.0)
+ # Then, we reproject that vector in the UTM zone
+ two_squares_geographic_buffered_reproj = gu.Vector(
+ two_squares_geographic_buffered.ds.to_crs(crs=two_squares.crs)
+ )
+
+ # Their area should now be the same as before for each polygon
+ assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01)
+ assert two_squares_geographic_buffered_reproj.ds.area.values[0] == pytest.approx(expected_area, abs=0.01)
+
+ # And this time, it is the reprojected GeoDataFrame that should almost match (within a tolerance of 10e-06)
+ assert all(direct_gpd_buffer.ds.geom_equals_exact(two_squares_geographic_buffered_reproj.ds, tolerance=10e-6))
+
+ def test_buffer_without_overlap(self, monkeypatch) -> None: # type: ignore
+ """
+ Check that non-overlapping buffer feature works. Does not work on simple geometries, so test on MultiPolygon.
+ Yet, very simple geometries yield unexpected results, as is the case for the second test case here.
+ """
+ # Case 1, test with two squares, in separate Polygons
+ two_squares = gu.Vector(gpd.GeoDataFrame(geometry=[self.poly1, self.poly2], crs="EPSG:4326"))
+
+ # Check with buffers that should not overlap
+ # ------------------------------------------
+ buffer_size = 2
+ # We force metric = False, so buffer should raise a GeoPandas warning
+ with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"):
+ buffer = two_squares.buffer_without_overlap(buffer_size, metric=False)
+
+ # Output should be of same size as input and same geometry type
+ assert len(buffer.ds) == len(two_squares.ds)
+ assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type)
+
+ # Extract individual geometries
+ polys = []
+ for geom in buffer.ds.geometry:
+ if geom.geom_type in ["MultiPolygon"]:
+ polys.extend(list(geom))
+ else:
+ polys.append(geom)
+
+ # Check they do not overlap
+ for i in range(len(polys)):
+ for j in range(i + 1, len(polys)):
+ assert not polys[i].intersects(polys[j])
+
+ # buffer should yield the same result as create_mask with buffer, minus the original mask
+ mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
+ mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size)
+ mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
+ assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer)
+
+ # Case 2 - Check with buffers that overlap -> this case is actually not the expected result !
+ # -------------------------------
+ buffer_size = 5
+ # We force metric = False, so buffer should raise a GeoPandas warning
+ with pytest.warns(UserWarning, match="Geometry is in a geographic CRS.*"):
+ buffer = two_squares.buffer_without_overlap(buffer_size, metric=False)
+
+ # Output should be of same size as input and same geometry type
+ assert len(buffer.ds) == len(two_squares.ds)
+ assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type)
+
+ # Extract individual geometries
+ polys = []
+ for geom in buffer.ds.geometry:
+ if geom.geom_type in ["MultiPolygon"]:
+ polys.extend(list(geom))
+ else:
+ polys.append(geom)
+
+ # Check they do not overlap
+ for i in range(len(polys)):
+ for j in range(i + 1, len(polys)):
+ assert polys[i].intersection(polys[j]).area == 0
+
+ # buffer should yield the same result as create_mask with buffer, minus the original mask
+ mask_nonoverlap = buffer.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
+ mask_buffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21), buffer=buffer_size)
+ mask_nobuffer = two_squares.create_mask(xres=0.1, bounds=(0, 0, 21, 21))
+ assert np.all(mask_nobuffer | mask_nonoverlap == mask_buffer)
+
+ # Check that plotting runs without errors and close it
+ monkeypatch.setattr(plt, "show", lambda: None)
+ two_squares.buffer_without_overlap(buffer_size, plot=True)
diff --git a/tests/test_vector/test_geotransformations_vector.py b/tests/test_vector/test_geotransformations_vector.py
new file mode 100644
index 00000000..e1db15df
--- /dev/null
+++ b/tests/test_vector/test_geotransformations_vector.py
@@ -0,0 +1,128 @@
+"""Tests for geotransformations of vectors."""
+
+from __future__ import annotations
+
+import re
+
+import numpy as np
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+
+import geoutils as gu
+
+
+class TestGeotransformations:
+
+ landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped")
+ everest_outlines_path = gu.examples.get_path("everest_rgi_outlines")
+ aster_dem_path = gu.examples.get_path("exploradores_aster_dem")
+ aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines")
+
+ def test_reproject(self) -> None:
+ """Test that the reproject function works as intended"""
+
+ v0 = gu.Vector(self.aster_outlines_path)
+ r0 = gu.Raster(self.aster_dem_path)
+ v1 = gu.Vector(self.everest_outlines_path)
+
+ # First, test with a EPSG integer
+ v1 = v0.reproject(crs=32617)
+ assert isinstance(v1, gu.Vector)
+ assert v1.crs.to_epsg() == 32617
+
+ # Check the inplace behaviour matches the not-inplace one
+ v2 = v0.copy()
+ v2.reproject(crs=32617, inplace=True)
+ v2.vector_equal(v1)
+
+ # Check that the reprojection is the same as with geopandas
+ gpd1 = v0.ds.to_crs(epsg=32617)
+ assert_geodataframe_equal(gpd1, v1.ds)
+
+ # Second, with a Raster object
+ v2 = v0.reproject(r0)
+ assert v2.crs == r0.crs
+
+ # Third, with a Vector object that has a different CRS
+ assert v0.crs != v1.crs
+ v3 = v0.reproject(v1)
+ assert v3.crs == v1.crs
+
+ # Fourth, check that errors are raised when appropriate
+ # When no destination CRS is defined, or both dst_crs and dst_ref are passed
+ with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")):
+ v0.reproject()
+ v0.reproject(ref=r0, crs=32617)
+ # If the path provided does not exist
+ with pytest.raises(ValueError, match=re.escape("Reference raster or vector path does not exist.")):
+ v0.reproject(ref="tmp.lol")
+ # If it exists but cannot be opened by rasterio or fiona
+ with pytest.raises(ValueError, match=re.escape("Could not open raster or vector with rasterio or pyogrio.")):
+ v0.reproject(ref="geoutils/examples.py")
+ # If input of wrong type
+ with pytest.raises(TypeError, match=re.escape("Type of ref must be string path to file, Raster or Vector.")):
+ v0.reproject(ref=10) # type: ignore
+
+ test_data = [[landsat_b4_crop_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]]
+
+ @pytest.mark.parametrize("data", test_data) # type: ignore
+ def test_crop(self, data: list[str]) -> None:
+ # Load data
+ raster_path, outlines_path = data
+ rst = gu.Raster(raster_path)
+ outlines = gu.Vector(outlines_path)
+
+ # Need to reproject to r.crs. Otherwise, crop will work but will be approximate
+ # Because outlines might be warped in a different crs
+ outlines.ds = outlines.ds.to_crs(rst.crs)
+
+ # Crop
+ outlines_new = outlines.copy()
+ outlines_new.crop(crop_geom=rst, inplace=True)
+
+ # Check default behaviour - crop and return copy
+ outlines_copy = outlines.crop(crop_geom=rst)
+
+ # Crop by passing bounds
+ outlines_new_bounds = outlines.copy()
+ outlines_new_bounds.crop(crop_geom=list(rst.bounds), inplace=True)
+ assert_geodataframe_equal(outlines_new.ds, outlines_new_bounds.ds)
+ # Check the return-by-copy as well
+ assert_geodataframe_equal(outlines_copy.ds, outlines_new_bounds.ds)
+
+ # Verify that geometries intersect with raster bound
+ rst_poly = gu.projtools.bounds2poly(rst.bounds)
+ intersects_new = []
+ for poly in outlines_new.ds.geometry:
+ intersects_new.append(poly.intersects(rst_poly))
+
+ assert np.all(intersects_new)
+
+ # Check that some of the original outlines did not intersect and were removed
+ intersects_old = []
+ for poly in outlines.ds.geometry:
+ intersects_old.append(poly.intersects(rst_poly))
+
+ assert np.sum(intersects_old) == np.sum(intersects_new)
+
+ # Check that some features were indeed removed
+ assert np.sum(~np.array(intersects_old)) > 0
+
+ # Check that error is raised when cropGeom argument is invalid
+ with pytest.raises(TypeError, match="Crop geometry must be a Raster, Vector, or list of coordinates."):
+ outlines.crop(1, inplace=True) # type: ignore
+
+ def test_translate(self) -> None:
+
+ vector = gu.Vector(self.everest_outlines_path)
+
+ # Check default behaviour is not inplace
+ vector_shifted = vector.translate(xoff=2.5, yoff=5.7)
+ assert isinstance(vector_shifted, gu.Vector)
+ assert_geoseries_equal(vector_shifted.geometry, vector.geometry.translate(xoff=2.5, yoff=5.7))
+
+ # Check inplace behaviour works correctly
+ vector2 = vector.copy()
+ output = vector2.translate(xoff=2.5, yoff=5.7, inplace=True)
+ assert output is None
+ assert_geoseries_equal(vector2.geometry, vector_shifted.geometry)
diff --git a/tests/test_vector/test_vector.py b/tests/test_vector/test_vector.py
new file mode 100644
index 00000000..bc524325
--- /dev/null
+++ b/tests/test_vector/test_vector.py
@@ -0,0 +1,454 @@
+"""Test functions specific to the Vector class."""
+
+from __future__ import annotations
+
+import inspect
+import os.path
+import pathlib
+import tempfile
+import warnings
+
+import geopandas as gpd
+import geopandas.base
+import pyproj
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_series_equal
+from shapely.geometry.base import BaseGeometry
+from shapely.geometry.linestring import LineString
+from shapely.geometry.polygon import Polygon
+
+import geoutils as gu
+
+GLACIER_OUTLINES_URL = "http://public.data.npolar.no/cryoclim/CryoClim_GAO_SJ_1990.zip"
+
+
+class TestVector:
+ landsat_b4_crop_path = gu.examples.get_path("everest_landsat_b4_cropped")
+ everest_outlines_path = gu.examples.get_path("everest_rgi_outlines")
+ aster_dem_path = gu.examples.get_path("exploradores_aster_dem")
+ aster_outlines_path = gu.examples.get_path("exploradores_rgi_outlines")
+ glacier_outlines = gu.Vector(GLACIER_OUTLINES_URL)
+
+ def test_init(self) -> None:
+ """Test class initiation works as intended"""
+
+ # First, with a URL filename
+ v = gu.Vector(GLACIER_OUTLINES_URL)
+ assert isinstance(v, gu.Vector)
+
+ # Second, with a string filename
+ v0 = gu.Vector(self.aster_outlines_path)
+ assert isinstance(v0, gu.Vector)
+
+ # Third, with a pathlib path
+ path = pathlib.Path(self.aster_outlines_path)
+ v1 = gu.Vector(path)
+ assert isinstance(v1, gu.Vector)
+
+ # Fourth, with a geopandas dataframe
+ v2 = gu.Vector(gpd.read_file(self.aster_outlines_path))
+ assert isinstance(v2, gu.Vector)
+
+ # Fifth, passing a Vector itself (points back to Vector passed)
+ v3 = gu.Vector(v2)
+ assert isinstance(v3, gu.Vector)
+
+ # Check errors are raised when filename has wrong type
+ with pytest.raises(TypeError, match="Filename argument should be a string, Path or geopandas.GeoDataFrame."):
+ gu.Vector(1) # type: ignore
+
+ def test_copy(self) -> None:
+ vector2 = self.glacier_outlines.copy()
+
+ assert vector2 is not self.glacier_outlines
+
+ vector2.ds = vector2.ds.query("NAME == 'Ayerbreen'")
+
+ assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0]
+
+ def test_info(self) -> None:
+
+ v = gu.Vector(GLACIER_OUTLINES_URL)
+
+ # Check default runs without error (prints to screen)
+ output = v.info()
+ assert output is None
+
+ # Otherwise returns info
+ output2 = v.info(verbose=False)
+ assert isinstance(output2, str)
+ list_prints = ["Filename", "Coordinate system", "Extent", "Number of features", "Attributes"]
+ assert all(p in output2 for p in list_prints)
+
+ def test_query(self) -> None:
+ vector2 = self.glacier_outlines.query("NAME == 'Ayerbreen'")
+
+ assert vector2 is not self.glacier_outlines
+
+ assert vector2.ds.shape[0] < self.glacier_outlines.ds.shape[0]
+
+ def test_save(self) -> None:
+ """Test the save wrapper for GeoDataFrame.to_file()."""
+
+ vector = gu.Vector(self.aster_outlines_path)
+
+ # Create a temporary file in a temporary directory
+ temp_dir = tempfile.TemporaryDirectory()
+ temp_file = os.path.join(temp_dir.name, "test.gpkg")
+
+ # Save and check the file exists
+ vector.save(temp_file)
+ assert os.path.exists(temp_file)
+
+ # Open and check the object is the same
+ vector_save = gu.Vector(temp_file)
+ vector_save.vector_equal(vector)
+
+ def test_bounds(self) -> None:
+ bounds = self.glacier_outlines.bounds
+
+ assert bounds.left < bounds.right
+ assert bounds.bottom < bounds.top
+
+ assert bounds.left == self.glacier_outlines.ds.total_bounds[0]
+ assert bounds.bottom == self.glacier_outlines.ds.total_bounds[1]
+ assert bounds.right == self.glacier_outlines.ds.total_bounds[2]
+ assert bounds.top == self.glacier_outlines.ds.total_bounds[3]
+
+ def test_footprint(self) -> None:
+
+ footprint = self.glacier_outlines.footprint
+
+ assert isinstance(footprint, gu.Vector)
+ assert footprint.vector_equal(self.glacier_outlines.get_footprint_projected(self.glacier_outlines.crs))
+
+
+class NeedToImplementWarning(FutureWarning):
+ """Warning to remember to implement new GeoPandas methods"""
+
+
+class TestGeoPandasMethods:
+ # Use two synthetic vectors
+ poly = Polygon([(10, 10), (11, 10), (11, 11), (10, 11)])
+ gdf1 = gpd.GeoDataFrame({"geometry": [poly]}, crs="EPSG:4326")
+ synthvec1 = gu.Vector(gdf1)
+
+ # Create a synthetic LineString geometry
+ lines = LineString([(10, 10), (10.5, 10.5), (11, 11)])
+ gdf2 = gpd.GeoDataFrame({"geometry": [lines]}, crs="EPSG:4326")
+ synthvec2 = gu.Vector(gdf2)
+
+ # Use two real-life vectors
+ realvec1 = gu.Vector(gu.examples.get_path("exploradores_rgi_outlines"))
+ realvec2 = gu.Vector(gu.examples.get_path("everest_rgi_outlines"))
+
+ # Properties and methods derived from Shapely or GeoPandas
+ # List of properties and methods with non-geometric output that are implemented in GeoUtils
+ main_properties = ["crs", "geometry", "total_bounds"]
+ nongeo_properties = [
+ "area",
+ "length",
+ "interiors",
+ "geom_type",
+ "is_empty",
+ "is_ring",
+ "is_simple",
+ "is_valid",
+ "has_z",
+ ]
+ nongeo_methods = [
+ "contains",
+ "geom_equals",
+ "geom_almost_equals",
+ "geom_equals_exact",
+ "crosses",
+ "disjoint",
+ "intersects",
+ "overlaps",
+ "touches",
+ "within",
+ "covers",
+ "covered_by",
+ "distance",
+ ]
+
+ # List of properties and methods with geometric output that are implemented in GeoUtils
+ geo_properties = ["boundary", "unary_union", "centroid", "convex_hull", "envelope", "exterior"]
+ geo_methods = [
+ "representative_point",
+ "normalize",
+ "make_valid",
+ "difference",
+ "symmetric_difference",
+ "union",
+ "intersection",
+ "clip_by_rect",
+ "buffer",
+ "simplify",
+ "affine_transform",
+ "translate",
+ "rotate",
+ "scale",
+ "skew",
+ "dissolve",
+ "explode",
+ "sjoin",
+ "sjoin_nearest",
+ "overlay",
+ "to_crs",
+ "set_crs",
+ "rename_geometry",
+ "set_geometry",
+ "clip",
+ ]
+ # List of class methods
+ io_methods = [
+ "from_file",
+ "from_postgis",
+ "from_dict",
+ "from_features",
+ "to_feather",
+ "to_parquet",
+ "to_file",
+ "to_postgis",
+ "to_json",
+ "to_wkb",
+ "to_wkt",
+ "to_csv",
+ ]
+
+ # List of other properties and methods
+ other = ["has_sindex", "sindex", "estimate_utm_crs", "cx", "iterfeatures"]
+ all_declared = (
+ main_properties + nongeo_methods + nongeo_properties + geo_methods + geo_properties + other + io_methods
+ )
+
+ # Exceptions for GeoPandasBase functions not implemented (or deprecrated) in GeoSeries/GeoDataFrame
+ exceptions_unimplemented = [
+ "plot",
+ "explore",
+ "cascaded_union",
+ "bounds",
+ "relate",
+ "project",
+ "interpolate",
+ "equals",
+ "type",
+ "convert_dtypes",
+ "merge",
+ "apply",
+ "astype",
+ "minimum_bounding_circle",
+ "minimum_bounding_radius",
+ "get_coordinates",
+ "hilbert_distance",
+ "sample_points",
+ "copy",
+ ]
+ # Exceptions for IO/conversion that can be done directly from .ds
+ all_exceptions = exceptions_unimplemented
+
+ # Get all GeoPandasBase public methods with some exceptions
+ geobase_methods = gpd.base.GeoPandasBase.__dict__.copy()
+
+ # Get all GeoDataFrame public methods with some exceptions
+ gdf_methods = gpd.GeoDataFrame.__dict__.copy()
+
+ def test_overridden_funcs_exist(self) -> None:
+ """Check that all methods listed above exist in Vector."""
+
+ # Check that all methods declared in the class above exist in Vector
+ vector_methods = gu.Vector.__dict__
+
+ list_missing = [method for method in self.all_declared if method not in vector_methods.keys()]
+
+ assert len(list_missing) == 0, print(f"Test method listed that is not in GeoUtils: {list_missing}")
+
+ def test_geopandas_coverage(self) -> None:
+ """Check that all existing methods of GeoPandas are overridden, with a couple exceptions."""
+
+ # Merge the two
+ all_methods = self.geobase_methods.copy()
+ all_methods.update(self.gdf_methods)
+
+ # Remove exceptions we don't want to reuse from GeoPandas (mirrored in Vector)
+ name_all_methods = list(all_methods.keys())
+ public_methods = [method for method in name_all_methods if method[0] != "_"]
+
+ covered_methods = [method for method in public_methods if method not in self.all_exceptions]
+
+ # Check that all methods declared in the class above are covered in Vector
+ list_missing = [method for method in covered_methods if method not in self.all_declared]
+
+ if len(list_missing) != 0:
+ warnings.warn(
+ f"New GeoPandas methods are not implemented in GeoUtils: {list_missing}", NeedToImplementWarning
+ )
+
+ @pytest.mark.parametrize("method", nongeo_methods + geo_methods) # type: ignore
+ def test_overridden_funcs_args(self, method: str) -> None:
+ """Check that all methods overridden have the same arguments as in GeoPandas."""
+
+ # Get GeoPandas class where the methods live
+ if method in self.geobase_methods.keys():
+ upstream_class = gpd.base.GeoPandasBase
+ elif method in self.gdf_methods.keys():
+ upstream_class = gpd.GeoDataFrame
+ else:
+ raise ValueError("Method did not belong to GeoDataFrame or GeoPandasBase class.")
+
+ # Get a full argument inspection object for each class
+ argspec_upstream = inspect.getfullargspec(getattr(upstream_class, method))
+ argspec_geoutils = inspect.getfullargspec(getattr(gu.Vector, method))
+
+ # Check that all positional arguments are the same
+ if argspec_upstream.args != argspec_geoutils.args:
+ warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
+
+ # Check that the *args and **kwargs argument are declared consistently
+ if argspec_upstream.varargs != argspec_geoutils.varargs:
+ warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
+
+ if argspec_upstream.varkw != argspec_geoutils.varkw:
+ warnings.warn("Argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
+
+ # Check that default argument values are the same
+ if argspec_upstream.defaults != argspec_geoutils.defaults:
+ warnings.warn("Default argument of GeoPandas method not consistent in GeoUtils.", NeedToImplementWarning)
+
+ @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore
+ @pytest.mark.parametrize("method", nongeo_properties) # type: ignore
+ def test_nongeo_properties(self, vector: gu.Vector, method: str) -> None:
+ """Check non-geometric properties are consistent with GeoPandas."""
+
+ # Remove warnings about operations in a non-projected system, and future changes
+ warnings.simplefilter("ignore", category=UserWarning)
+ warnings.simplefilter("ignore", category=FutureWarning)
+
+ # Get method for each class
+ output_geoutils = getattr(vector, method)
+ output_geopandas = getattr(vector.ds, method)
+
+ # Assert equality
+ assert_series_equal(output_geoutils, output_geopandas)
+
+ @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore
+ @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore
+ @pytest.mark.parametrize("method", nongeo_methods) # type: ignore
+ def test_nongeo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None:
+ """
+ Check non-geometric methods are consistent with GeoPandas.
+ All these methods require two inputs ("other", "df", or "right" argument), except one.
+ """
+
+ # Remove warnings about operations in a non-projected system, and future changes
+ warnings.simplefilter("ignore", category=UserWarning)
+ warnings.simplefilter("ignore", category=FutureWarning)
+
+ # Get method for each class
+ if method != "geom_equals_exact":
+ output_geoutils = getattr(vector1, method)(vector2)
+ output_geopandas = getattr(vector1.ds, method)(vector2.ds)
+ else:
+ output_geoutils = getattr(vector1, method)(vector2, tolerance=0.1)
+ output_geopandas = getattr(vector1.ds, method)(vector2.ds, tolerance=0.1)
+
+ # Assert equality
+ assert_series_equal(output_geoutils, output_geopandas)
+
+ @pytest.mark.parametrize("vector", [synthvec1, synthvec2, realvec1, realvec2]) # type: ignore
+ @pytest.mark.parametrize("method", geo_properties) # type: ignore
+ def test_geo_properties(self, vector: gu.Vector, method: str) -> None:
+ """Check geometric properties are consistent with GeoPandas."""
+
+ # Remove warnings about operations in a non-projected system, and future changes
+ warnings.simplefilter("ignore", category=UserWarning)
+ warnings.simplefilter("ignore", category=FutureWarning)
+
+ # Get method for each class
+ output_geoutils = getattr(vector, method)
+ output_geopandas = getattr(vector.ds, method)
+
+ # Assert output types
+ assert isinstance(output_geoutils, gu.Vector)
+ assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame, BaseGeometry))
+
+ # Separate cases depending on GeoPandas' output
+ if isinstance(output_geopandas, gpd.GeoSeries):
+ # Assert geoseries equality
+ assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas)
+ elif isinstance(output_geopandas, BaseGeometry):
+ assert_geodataframe_equal(
+ output_geoutils.ds, gpd.GeoDataFrame({"geometry": [output_geopandas]}, crs=vector.crs)
+ )
+ else:
+ assert_geodataframe_equal(output_geoutils.ds, output_geopandas)
+
+ specific_method_args = {
+ "buffer": {"distance": 1},
+ "clip_by_rect": {"xmin": 10.5, "ymin": 10.5, "xmax": 11, "ymax": 11},
+ "affine_transform": {"matrix": [1, 1, 1, 1, 1, 1]},
+ "translate": {"xoff": 1, "yoff": 1, "zoff": 0},
+ "rotate": {"angle": 90},
+ "scale": {"xfact": 1.1, "yfact": 1.1, "zfact": 1.1, "origin": "center"},
+ "skew": {"xs": 1.1, "ys": 1.1},
+ "interpolate": {"distance": 1},
+ "simplify": {"tolerance": 0.1},
+ "to_crs": {"crs": pyproj.CRS.from_epsg(32610)},
+ "set_crs": {"crs": pyproj.CRS.from_epsg(32610), "allow_override": True},
+ "rename_geometry": {"col": "lol"},
+ "set_geometry": {"col": synthvec1.geometry},
+ "clip": {"mask": poly},
+ }
+
+ @pytest.mark.parametrize("vector1", [synthvec1, realvec1]) # type: ignore
+ @pytest.mark.parametrize("vector2", [synthvec2, realvec2]) # type: ignore
+ @pytest.mark.parametrize("method", geo_methods) # type: ignore
+ def test_geo_methods(self, vector1: gu.Vector, vector2: gu.Vector, method: str) -> None:
+ """Check geometric methods are consistent with GeoPandas."""
+
+ # Remove warnings about operations in a non-projected system, and future changes
+ warnings.simplefilter("ignore", category=UserWarning)
+ warnings.simplefilter("ignore", category=FutureWarning)
+
+ # Methods that require two inputs
+ if method in [
+ "difference",
+ "symmetric_difference",
+ "union",
+ "intersection",
+ "sjoin",
+ "sjoin_nearest",
+ "overlay",
+ ]:
+ output_geoutils = getattr(vector1, method)(vector2)
+ output_geopandas = getattr(vector1.ds, method)(vector2.ds)
+ # Methods that require zero input
+ elif method in ["representative_point", "normalize", "make_valid", "dissolve", "explode"]:
+ output_geoutils = getattr(vector1, method)()
+ output_geopandas = getattr(vector1.ds, method)()
+ elif method in self.specific_method_args.keys():
+ output_geoutils = getattr(vector1, method)(**self.specific_method_args[method])
+ output_geopandas = getattr(vector1.ds, method)(**self.specific_method_args[method])
+ else:
+ raise ValueError(f"The method '{method}' is not covered by this test.")
+
+ # Assert output types
+ assert isinstance(output_geoutils, gu.Vector)
+ assert isinstance(output_geopandas, (gpd.GeoSeries, gpd.GeoDataFrame))
+
+ # Separate cases depending on GeoPandas' output, and nature of the function
+ # Simplify is a special case that can make geometries invalid, so adjust test
+ if method == "simplify":
+ # TODO: Unskip this random test failure (one index not matching) when this is fixed in GeoPandas/Shapely
+ pass
+ # assert_geoseries_equal(
+ # output_geopandas.make_valid(), output_geoutils.ds.geometry.make_valid(), check_less_precise=True
+ # )
+ # For geoseries output, check equality of it
+ elif isinstance(output_geopandas, gpd.GeoSeries):
+ assert_geoseries_equal(output_geoutils.ds.geometry, output_geopandas)
+ # For geodataframe output, check equality
+ else:
+ assert_geodataframe_equal(output_geoutils.ds, output_geopandas)