Merge pull request #3 from iosefa/tests/add-basic-tests

Tests/add basic tests
iosefa · Sep 20, 2024 · 05a95db · 05a95db
2 parents d90d5ae + 7735696
commit 05a95db
Show file tree

Hide file tree

Showing 23 changed files with 1,504 additions and 215 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -23,9 +23,15 @@ jobs:
           python -m pip install --upgrade pip
           pip install setuptools wheel twine
 
+      - name: Run tests
+        run: |
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+          pytest
+
       - name: Publish to PyPI
         env:
           PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
         run: |
           python setup.py sdist bdist_wheel
-          twine upload dist/* -u __token__ -p $PYPI_TOKEN
+          twine upload dist/* -u __token__ -p $PYPI_TOKEN
diff --git a/.gitignore b/.gitignore
@@ -166,4 +166,11 @@ data/
 # macOS specific
 .DS_Store
 */.DS_Store
-**/.DS_Store
+**/.DS_Store
+
+# Ignore QGIS generated files
+*.gpkg-shm
+*.gpkg-wal
+
+# tests
+test_data/output.tif
diff --git a/.idea/PyForestScan.iml b/.idea/PyForestScan.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/README.md b/README.md
@@ -58,7 +58,30 @@ To build locally and contribute to PyForestScan, you will need the following dep
 - PDAL and Python PDAL bindings
 - GDAL
 - Python
-- Python requirements (requirements.txt)
+- Python requirements (requirements.txt and requirements-dev.txt)
+
+## Testing
+
+PyForestScan uses `pytest` for running tests. To run the tests, you can follow the steps below:
+
+### Running Tests
+
+1. Install the development dependencies:
+   ```bash
+   pip install -r requirements-dev.txt
+   ```
+
+2. Run the tests using `pytest`:
+   ```bash
+   pytest
+   ```
+
+This will run all the test cases under the `tests/` directory. The tests include functionality checks for filters, forest metrics, and other core components of the PyForestScan library.
+
+You can also run specific tests by passing the test file or function name:
+```bash
+pytest tests/test_calculate.py
+```
 
 ## Contributing
 

diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb
diff --git a/pyforestscan/calculate.py b/pyforestscan/calculate.py
@@ -2,6 +2,55 @@
 from scipy.stats import entropy
 
 
+def generate_dtm(ground_points, resolution=2.0):
+    """
+    Generates a Digital Terrain Model (DTM) raster from classified ground points.
+
+    :param ground_points: list
+        Point cloud arrays of classified ground points.
+    :type ground_points: list
+    :param resolution: float, spatial resolution of the DTM in meters.
+    :return: tuple
+        A tuple containing the DTM as a 2D NumPy array and the spatial extent [x_min, x_max, y_min, y_max].
+    :rtype: tuple (numpy.ndarray, list)
+    :raises RuntimeError:
+        If there's an error during the DTM generation pipeline execution.
+    :raises ValueError:
+        If no ground points are found for DTM generation.
+    :raises KeyError:
+        If point cloud data is missing 'X', 'Y', or 'Z' fields.
+    """
+    #todo: add parameter to allow interpolation of NA values.
+    try:
+        x = np.array([pt['X'] for array in ground_points for pt in array])
+        y = np.array([pt['Y'] for array in ground_points for pt in array])
+        z = np.array([pt['Z'] for array in ground_points for pt in array])
+    except ValueError:
+        raise ValueError("Ground point cloud data missing 'X', 'Y', or 'Z' fields.")
+
+    x_min, x_max = x.min(), x.max()
+    y_min, y_max = y.min(), y.max()
+
+    x_bins = np.arange(x_min, x_max + resolution, resolution)
+    y_bins = np.arange(y_min, y_max + resolution, resolution)
+
+    x_indices = np.digitize(x, x_bins) - 1
+    y_indices = np.digitize(y, y_bins) - 1
+
+    dtm = np.full((len(x_bins)-1, len(y_bins)-1), np.nan)
+
+    for xi, yi, zi in zip(x_indices, y_indices, z):
+        if 0 <= xi < dtm.shape[0] and 0 <= yi < dtm.shape[1]:
+            if np.isnan(dtm[xi, yi]) or zi < dtm[xi, yi]:
+                dtm[xi, yi] = zi
+
+    dtm = np.nan_to_num(dtm, nan=-9999)
+
+    extent = [x_min, x_max, y_min, y_max]
+
+    return dtm, extent
+
+
 def assign_voxels(arr, voxel_resolution):
     """
     Assigns voxel grids to spatial data points based on the specified resolutions.
@@ -19,9 +68,15 @@ def assign_voxels(arr, voxel_resolution):
     """
     x_resolution, y_resolution, z_resolution = voxel_resolution
 
-    x = arr['X']
-    y = arr['Y']
-    z = arr['HeightAboveGround']
+    try:
+        x = arr['X']
+        y = arr['Y']
+        z = arr['HeightAboveGround']
+    except ValueError:
+        raise ValueError("Point cloud data missing 'X', 'Y', or 'HeightAboveGround' fields.")
+
+    if x.size == 0 or y.size == 0 or z.size == 0:
+        raise ValueError("Point cloud data contains no points.")
 
     x_min, x_max = x.min(), x.max()
     y_min, y_max = y.min(), y.max()
@@ -84,6 +139,8 @@ def calculate_pai(pad, min_height=1, max_height=None):
     """
     if max_height is None:
         max_height = pad.shape[2]
+    if min_height >= max_height:
+        raise ValueError("Minimum height index must be less than maximum height index.")
     pai = np.sum(pad[:, :, min_height:max_height], axis=2)
 
     return pai
@@ -116,3 +173,44 @@ def calculate_fhd(voxel_returns):
     fhd[sum_counts.squeeze() == 0] = np.nan
 
     return fhd
+
+
+def calculate_chm(arr, voxel_resolution):
+    """
+    Calculate Canopy Height Model (CHM) for a given voxel size.
+    The height is the highest HeightAboveGround value in each (x, y) voxel.
+
+    :param arr: Input array-like object containing point cloud data with 'X', 'Y', and 'HeightAboveGround' fields.
+    :type arr: numpy.ndarray
+    :param voxel_resolution:
+        The resolution for x and y dimensions of the voxel grid.
+    :type voxel_resolution: tuple of floats (x_resolution, y_resolution)
+
+    :return:
+        A tuple containing the CHM as a 2D numpy array and the spatial extent.
+    :rtype: tuple of (numpy.ndarray, list)
+    """
+    x_resolution, y_resolution = voxel_resolution[:2]
+    x = arr['X']
+    y = arr['Y']
+    z = arr['HeightAboveGround']
+
+    x_min, x_max = x.min(), x.max()
+    y_min, y_max = y.min(), y.max()
+
+    x_bins = np.arange(x_min, x_max + x_resolution, x_resolution)
+    y_bins = np.arange(y_min, y_max + y_resolution, y_resolution)
+
+    x_indices = np.digitize(x, x_bins) - 1
+    y_indices = np.digitize(y, y_bins) - 1
+
+    chm = np.full((len(x_bins)-1, len(y_bins)-1), np.nan)
+
+    for xi, yi, zi in zip(x_indices, y_indices, z):
+        if 0 <= xi < chm.shape[0] and 0 <= yi < chm.shape[1]:
+            if np.isnan(chm[xi, yi]) or zi > chm[xi, yi]:
+                chm[xi, yi] = zi
+
+    extent = [x_min, x_max, y_min, y_max]
+
+    return chm, extent
diff --git a/pyforestscan/filters.py b/pyforestscan/filters.py
@@ -1,5 +1,6 @@
 from pyforestscan.handlers import _build_pdal_pipeline
-from pyforestscan.pipeline import _filter_hag, _filter_ground
+from pyforestscan.pipeline import _filter_hag, _filter_ground, _filter_statistical_outlier, _filter_smrf, \
+    _filter_radius, _select_ground
 
 
 def filter_hag(arrays, lower_limit=0, upper_limit=None):
@@ -29,6 +30,116 @@ def filter_ground(arrays):
     :return: Processed point cloud arrays after removing ground points.
     :rtype: list
     """
-    # Build and execute the PDAL pipeline with the ground filter
     pipeline = _build_pdal_pipeline(arrays, [_filter_ground()])
     return pipeline.arrays
+
+
+def filter_select_ground(arrays):
+    """
+    Applies a filter to select ground points (classification 2) from the input arrays.
+
+    :param arrays: List of point cloud arrays to be processed.
+    :type arrays: list
+    :return: Processed point cloud arrays after removing ground points.
+    :rtype: list
+    """
+    pipeline = _build_pdal_pipeline(arrays, [_select_ground()])
+    return pipeline.arrays
+
+
+def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0):
+    """
+    Removes statistical outliers from the point cloud to enhance data quality.
+
+    :param arrays: list
+        List of point cloud arrays obtained from read_lidar.
+    :type point_cloud_arrays: list
+    :param mean_k: int, number of nearest neighbors for outlier removal.
+    :param multiplier: float, standard deviation multiplier for outlier removal.
+    :return: list
+        Cleaned array of point cloud data without outliers.
+    :rtype: list
+    :raises RuntimeError:
+        If there's an error during the pipeline execution.
+    :raises ValueError:
+        If no data is returned after outlier removal.
+    """
+    pipeline_stages = [
+        _filter_statistical_outlier(mean_k=mean_k, multiplier=multiplier)
+    ]
+
+    try:
+        pipeline = _build_pdal_pipeline(arrays, pipeline_stages)
+    except RuntimeError as e:
+        raise RuntimeError(f"Outlier Removal Pipeline Failed: {e}")
+
+    processed_arrays = pipeline.arrays
+
+    if not processed_arrays:
+        raise ValueError("No data returned after outlier removal.")
+
+    return processed_arrays
+
+
+def classify_ground_points(
+        arrays,
+        ignore_class="Classification[7:7]",
+        cell=1.0,
+        cut=0.0,
+        returns="last,only",
+        scalar=1.25,
+        slope=0.15,
+        threshold=0.5,
+        window=18.0
+):
+    """
+    Applies the SMRF filter to classify ground points in the point cloud.
+
+    :param arrays: list
+        Cleaned point cloud arrays after outlier removal.
+    :type arrays: list
+    :param ignore_class: str, classification codes to ignore during filtering.
+    :param cell: float, cell size in meters.
+    :param cut: float, cut net size (0 skips net cutting).
+    :param returns: str, return types to include in output ("first", "last", "intermediate", "only").
+    :param scalar: float, elevation scalar.
+    :param slope: float, slope threshold for ground classification.
+    :param threshold: float, elevation threshold.
+    :param window: float, max window size in meters.
+    :return: list
+        Point cloud arrays with classified ground points.
+    :rtype: list
+    :raises RuntimeError:
+        If there's an error during the pipeline execution.
+    :raises ValueError:
+        If no data is returned after SMRF classification.
+    """
+    pipeline_stages = [
+        _filter_smrf(
+            cell=cell,
+            cut=cut,
+            ignore=ignore_class,
+            returns=returns,
+            scalar=scalar,
+            slope=slope,
+            threshold=threshold,
+            window=window
+        )
+    ]
+
+    try:
+        pipeline = _build_pdal_pipeline(arrays, pipeline_stages)
+    except RuntimeError as e:
+        raise RuntimeError(f"SMRF Classification Pipeline Failed: {e}")
+
+    processed_arrays = pipeline.arrays
+
+    if not processed_arrays:
+        raise ValueError("No data returned after SMRF classification.")
+
+    return processed_arrays
+
+
+def downsample_poisson(arrays, thin_radius):
+    pipeline = _build_pdal_pipeline(arrays, [_filter_radius(thin_radius)])
+    return pipeline.arrays
diff --git a/pyforestscan/handlers.py b/pyforestscan/handlers.py
@@ -6,8 +6,8 @@
 
 from pyproj import CRS
 from shapely.geometry import MultiPolygon
-
 from pyforestscan.pipeline import _crop_polygon, _filter_radius, _hag_delaunay, _hag_raster
+from pyproj.exceptions import CRSError
 
 
 def simplify_crs(crs_list):
@@ -18,9 +18,19 @@ def simplify_crs(crs_list):
     :type crs_list: list
     :return: List of EPSG codes corresponding to the input CRS definitions.
     :rtype: list
-    :raises ValueError: If any of the CRS definitions cannot be converted to an EPSG code.
+    :raises CRSError: If any of the CRS definitions cannot be converted to an EPSG code.
     """
-    return [CRS(crs).to_epsg() for crs in crs_list]
+    epsg_codes = []
+    for crs in crs_list:
+        try:
+            crs_obj = CRS(crs)
+            epsg = crs_obj.to_epsg()
+            if epsg is None:
+                raise CRSError(f"Cannot convert CRS '{crs}' to an EPSG code.")
+            epsg_codes.append(epsg)
+        except CRSError as e:
+            raise CRSError(f"Error converting CRS '{crs}': {e}") from None
+    return epsg_codes
 
 
 def load_polygon_from_file(vector_file_path, index=0):
@@ -308,7 +318,7 @@ def write_las(arrays, output_file, srs=None, compress=True):
     pipeline.execute()
 
 
-def create_geotiff(layer, output_file, crs, spatial_extent):
+def create_geotiff(layer, output_file, crs, spatial_extent, nodata=-9999):
     """
     Creates a GeoTIFF file from the given data layer. Note, it performs a transpose on the layer.
 
@@ -320,6 +330,8 @@ def create_geotiff(layer, output_file, crs, spatial_extent):
     :type crs: str
     :param spatial_extent: The spatial extent of the data, defined as (x_min, x_max, y_min, y_max).
     :type spatial_extent: tuple
+    :param nodata: The value to use for NoData areas in the GeoTIFF. Defaults to -9999.
+    :type nodata: float or int, optional
     :return: None
     :raises rasterio.errors.RasterioError: If there is an error in creating the GeoTIFF.
     """
@@ -343,6 +355,7 @@ def create_geotiff(layer, output_file, crs, spatial_extent):
             count=1,
             dtype=layer.dtype.name,
             crs=crs,
-            transform=transform
+            transform=transform,
+            nodata=nodata
     ) as new_dataset:
         new_dataset.write(layer, 1)