Added subsampling functionality for coreg fitting.

GlacioHack · Apr 7, 2021 · 593ef00 · 593ef00
1 parent 625798b
commit 593ef00
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 1 deletion.
diff --git a/tests/test_coreg.py b/tests/test_coreg.py
@@ -9,6 +9,7 @@
 import copy
 import os
 import tempfile
+import time
 import warnings
 from typing import Any
 
@@ -304,3 +305,45 @@ def test_coreg_add(self):
         # Try to add two CoregPipelines
         bias5 = bias3 + bias3
         assert bias5.to_matrix()[2, 3] == bias * 4
+
+    def test_subsample(self):
+        warnings.simplefilter("error")
+
+        # Test subsampled bias correction
+        bias_sub = coreg.BiasCorr()
+
+        # Fit the bias using 50% of the unmasked data using a fraction
+        bias_sub.fit(**self.fit_params, subsample=0.5)
+        # Do the same but specify the pixel count instead.
+        # They are not perfectly equal (np.count_nonzero(self.mask) // 2 would be exact)
+        # But this would just repeat the subsample code, so that makes little sense to test.
+        bias_sub.fit(**self.fit_params, subsample=self.tba.data.size // 2)
+
+        # Do full bias corr to compare
+        bias_full = coreg.BiasCorr()
+        bias_full.fit(**self.fit_params)
+
+        # Check that the estimated biases are similar
+        assert abs(bias_sub._meta["bias"] - bias_full._meta["bias"]) < 0.1
+
+        # Test ICP with subsampling
+        icp_full = coreg.ICP(max_iterations=20)
+        icp_sub = coreg.ICP(max_iterations=20)
+
+        # Measure the start and stop time to get the duration
+        start_time = time.time()
+        icp_full.fit(**self.fit_params)
+        icp_full_duration = time.time() - start_time
+
+        # Do the same with 50% subsampling
+        start_time = time.time()
+        icp_sub.fit(**self.fit_params, subsample=0.5)
+        icp_sub_duration = time.time() - start_time
+
+        # Make sure that the subsampling increased performance
+        assert icp_full_duration > icp_sub_duration
+
+        # Calculate the difference in the full vs. subsampled ICP matrices
+        matrix_diff = np.abs(icp_full.to_matrix() - icp_sub.to_matrix())
+        # Check that the x/y/z differences do not exceed 30cm
+        assert np.count_nonzero(matrix_diff > 0.3) == 0
diff --git a/xdem/coreg.py b/xdem/coreg.py
@@ -1104,7 +1104,8 @@ def fit(self, reference_dem: Union[np.ndarray, np.ma.masked_array],
             dem_to_be_aligned: Union[np.ndarray, np.ma.masked_array],
             mask: Optional[np.ndarray] = None,
             transform: Optional[rio.transform.Affine] = None,
-            weights: Optional[np.ndarray] = None):
+            weights: Optional[np.ndarray] = None,
+            subsample: Union[float, int] = 1.0):
         """
         Estimate the coregistration transform on the given DEMs.
 
@@ -1113,6 +1114,7 @@ def fit(self, reference_dem: Union[np.ndarray, np.ma.masked_array],
         :param mask: Optional. 2D boolean array of areas to include in the analysis.
         :param transform: Optional. Transform of the reference_dem. Mandatory in some cases.
         :param weights: Optional. Per-pixel weights for the coregistration.
+        :param subsample: Subsample the input to increase performance. <1 is parsed as a fraction. >1 is a pixel count.
         """
         # Make sure that the mask has an expected format.
         if mask is not None:
@@ -1132,6 +1134,20 @@ def fit(self, reference_dem: Union[np.ndarray, np.ma.masked_array],
         # The full mask (inliers=True) is the inverse of the above masks and the provided mask.
         full_mask = (~ref_mask & ~tba_mask & (np.asarray(mask) if mask is not None else True)).squeeze()
 
+        # If subsample is not equal to one, subsampling should be performed.
+        if subsample != 1.0:
+            # If subsample is less than one, it is parsed as a fraction (e.g. 0.8 => retain 80% of the values)
+            if subsample < 1.0:
+                subsample = int(np.count_nonzero(full_mask) * (1 - subsample))
+
+            # Randomly pick N inliers in the full_mask where N=subsample
+            random_falses = np.random.choice(np.argwhere(full_mask.flatten()).squeeze(), int(subsample), replace=False)
+            # Convert the 1D indices to 2D indices
+            cols = (random_falses // full_mask.shape[0]).astype(int)
+            rows = random_falses % full_mask.shape[0]
+            # Set the N random inliers to be parsed as outliers instead.
+            full_mask[rows, cols] = False
+
         # The arrays to provide the functions will be ndarrays with NaNs for masked out areas.
         ref_dem = np.where(full_mask, np.asarray(reference_dem), np.nan).squeeze()
         tba_dem = np.where(full_mask, np.asarray(dem_to_be_aligned), np.nan).squeeze()