From 2c9f2b3fddfc227a9a4b85d9c8ff8f70c620fbbe Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 9 May 2024 22:17:42 -0800 Subject: [PATCH] Remove legacy NumPy random number generator (#511) --- dev-environment.yml | 4 +- .../comparison_plot_spatial_interpolation.py | 2 +- doc/source/code/spatialstats_standardizing.py | 4 +- .../spatialstats_stationarity_assumption.py | 8 +- environment.yml | 4 +- examples/advanced/plot_norm_regional_hypso.py | 5 +- requirements.txt | 2 +- tests/test_coreg/test_affine.py | 2 +- tests/test_coreg/test_base.py | 7 +- tests/test_coreg/test_biascorr.py | 8 +- tests/test_ddem.py | 6 +- tests/test_demcollection.py | 10 ++- tests/test_examples.py | 18 ++--- tests/test_filters.py | 10 ++- tests/test_fit.py | 12 +-- tests/test_spatialstats.py | 33 +++++---- tests/test_terrain.py | 9 ++- tests/test_volume.py | 3 +- xdem/coreg/base.py | 13 ++-- xdem/fit.py | 4 +- xdem/misc.py | 20 +++-- xdem/spatialstats.py | 73 ++++++------------- 22 files changed, 125 insertions(+), 132 deletions(-) diff --git a/dev-environment.yml b/dev-environment.yml index c8dddc43..369872e2 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -13,7 +13,7 @@ dependencies: - tqdm - scikit-image=0.* - scikit-gstat>=1.0 - - geoutils>=0.1.4,<0.2 + - geoutils>=0.1.5,<0.2 # Development-specific, to mirror manually in setup.cfg [options.extras_require]. - pip @@ -51,4 +51,4 @@ dependencies: - opencv-contrib-python-headless # To run CI against latest GeoUtils -# - git+https://github.com/rhugonnet/geoutils.git@fix_to_points +# - git+https://github.com/rhugonnet/geoutils.git diff --git a/doc/source/code/comparison_plot_spatial_interpolation.py b/doc/source/code/comparison_plot_spatial_interpolation.py index 47bf384e..c80e83e7 100644 --- a/doc/source/code/comparison_plot_spatial_interpolation.py +++ b/doc/source/code/comparison_plot_spatial_interpolation.py @@ -13,7 +13,7 @@ # The example DEMs are void-free, so let's make some random voids. ddem.data.mask = np.zeros_like(ddem.data, dtype=bool) # Reset the mask # Introduce 50000 nans randomly throughout the dDEM. -ddem.data.mask.ravel()[np.random.choice(ddem.data.size, 50000, replace=False)] = True +ddem.data.mask.ravel()[np.random.default_rng(42).choice(ddem.data.size, 50000, replace=False)] = True ddem.interpolate(method="linear") diff --git a/doc/source/code/spatialstats_standardizing.py b/doc/source/code/spatialstats_standardizing.py index aca744e9..8487f216 100644 --- a/doc/source/code/spatialstats_standardizing.py +++ b/doc/source/code/spatialstats_standardizing.py @@ -5,8 +5,8 @@ # Example x vector mu = 15 sig = 5 -np.random.seed(42) -y = np.random.normal(mu, sig, size=300) +rng = np.random.default_rng(42) +y = rng.normal(mu, sig, size=300) fig, ax1 = plt.subplots(figsize=(8, 3)) diff --git a/doc/source/code/spatialstats_stationarity_assumption.py b/doc/source/code/spatialstats_stationarity_assumption.py index 24279859..35d3d963 100644 --- a/doc/source/code/spatialstats_stationarity_assumption.py +++ b/doc/source/code/spatialstats_stationarity_assumption.py @@ -8,10 +8,10 @@ x = np.linspace(0, 1, 200) sig = 0.2 -np.random.seed(42) -y_rand1 = np.random.normal(0, sig, size=len(x)) -y_rand2 = np.random.normal(0, sig, size=len(x)) -y_rand3 = np.random.normal(0, sig, size=len(x)) +rng = np.random.default_rng(42) +y_rand1 = rng.normal(0, sig, size=len(x)) +y_rand2 = rng.normal(0, sig, size=len(x)) +y_rand3 = rng.normal(0, sig, size=len(x)) y_mean = np.array([0.5 * xval - 0.25 if xval > 0.5 else 0.5 * (1 - xval) - 0.25 for xval in x]) diff --git a/environment.yml b/environment.yml index 29c7780f..3096f22e 100644 --- a/environment.yml +++ b/environment.yml @@ -13,9 +13,9 @@ dependencies: - tqdm - scikit-image=0.* - scikit-gstat>=1.0 - - geoutils>=0.1.4,<0.2 + - geoutils>=0.1.5,<0.2 - pip # To run CI against latest GeoUtils # - pip: -# - git+https://github.com/rhugonnet/geoutils.git@fix_to_points +# - git+https://github.com/rhugonnet/geoutils.git diff --git a/examples/advanced/plot_norm_regional_hypso.py b/examples/advanced/plot_norm_regional_hypso.py index c6eacada..6a313c67 100644 --- a/examples/advanced/plot_norm_regional_hypso.py +++ b/examples/advanced/plot_norm_regional_hypso.py @@ -58,8 +58,9 @@ # %% # To test the method, we can generate a semi-random mask to assign nans to glacierized areas. # Let's remove 30% of the data. -np.random.seed(42) -random_nans = (xdem.misc.generate_random_field(dem_1990.shape, corr_size=5) > 0.7) & (glacier_index_map > 0) +random_nans = (xdem.misc.generate_random_field(dem_1990.shape, corr_size=5, random_state=42) > 0.7) & ( + glacier_index_map > 0 +) random_nans.plot() diff --git a/requirements.txt b/requirements.txt index a9f1cc7e..6b4f8df5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,5 +11,5 @@ scipy>=1.0,<1.13 tqdm scikit-image==0.* scikit-gstat>=1.0 -geoutils>=0.1.4,<0.2 +geoutils>=0.1.5,<0.2 pip diff --git a/tests/test_coreg/test_affine.py b/tests/test_coreg/test_affine.py index ebf8e7a5..4dd61b45 100644 --- a/tests/test_coreg/test_affine.py +++ b/tests/test_coreg/test_affine.py @@ -143,7 +143,7 @@ def test_coreg_example(self, verbose: bool = False) -> None: # Check the output metadata is always the same shifts = (nuth_kaab._meta["offset_east_px"], nuth_kaab._meta["offset_north_px"], nuth_kaab._meta["vshift"]) - assert shifts == pytest.approx((-0.463, -0.133, -1.9876264671765433)) + assert shifts == pytest.approx((-0.463, -0.1339999, -1.9922009)) def test_gradientdescending(self, subsample: int = 10000, inlier_mask: bool = True, verbose: bool = False) -> None: """ diff --git a/tests/test_coreg/test_base.py b/tests/test_coreg/test_base.py index 264e26a5..6b3ad654 100644 --- a/tests/test_coreg/test_base.py +++ b/tests/test_coreg/test_base.py @@ -95,7 +95,8 @@ def test_error_method(self) -> None: assert vshiftcorr.error(dem1, dem2, transform=affine, crs=crs, error_type="median") == -2 # Create random noise and see if the standard deviation is equal (it should) - dem3 = dem1.copy() + np.random.random(size=dem1.size).reshape(dem1.shape) + rng = np.random.default_rng(42) + dem3 = dem1.copy() + rng.random(size=dem1.size).reshape(dem1.shape) assert abs(vshiftcorr.error(dem1, dem3, transform=affine, crs=crs, error_type="std") - np.std(dem3)) < 1e-6 @pytest.mark.parametrize("subsample", [10, 10000, 0.5, 1]) # type: ignore @@ -104,8 +105,8 @@ def test_get_subsample_on_valid_mask(self, subsample: float | int) -> None: # Define a valid mask width = height = 50 - np.random.seed(42) - valid_mask = np.random.randint(low=0, high=2, size=(width, height), dtype=bool) + rng = np.random.default_rng(42) + valid_mask = rng.integers(low=0, high=2, size=(width, height), dtype=bool) # Define a class with a subsample and random_state in the metadata coreg = Coreg(meta={"subsample": subsample, "random_state": 42}) diff --git a/tests/test_coreg/test_biascorr.py b/tests/test_coreg/test_biascorr.py index f7a61168..26f019ce 100644 --- a/tests/test_coreg/test_biascorr.py +++ b/tests/test_coreg/test_biascorr.py @@ -42,7 +42,6 @@ class TestBiasCorr: ) # Convert DEMs to points with a bit of subsampling for speed-up - # TODO: Simplify once this GeoUtils issue is resolved: https://github.com/GlacioHack/geoutils/issues/499 tba_pts = tba.to_pointcloud(data_column_name="z", subsample=50000, random_state=42).ds ref_pts = ref.to_pointcloud(data_column_name="z", subsample=50000, random_state=42).ds @@ -328,6 +327,8 @@ def test_biascorr__bin_and_fit_1d(self, fit_args, fit_func, fit_optimizer, bin_s # Curve fit can be unhappy in certain circumstances for numerical estimation of covariance # We don't care for this test warnings.filterwarnings("ignore", message="Covariance of the parameters could not be estimated*") + # Apply the transform can create data exactly equal to the nodata + warnings.filterwarnings("ignore", category=UserWarning, message="Unmasked values equal to the nodata value*") # Create a bias correction object bcorr = biascorr.BiasCorr( @@ -421,7 +422,6 @@ def test_directionalbias__synthetic(self, fit_args, angle, nb_freq) -> None: xx = gu.raster.get_xy_rotated(self.ref, along_track_angle=angle)[0] # Get random parameters (3 parameters needed per frequency) - np.random.seed(42) params = np.array([(5, 3000, np.pi), (1, 300, 0), (0.5, 100, np.pi / 2)]).flatten() nb_freq = 1 params = params[0 : 3 * nb_freq] @@ -510,8 +510,8 @@ def test_deramp__synthetic(self, fit_args, order: int) -> None: nb_params = int((order + 1) * (order + 1)) # Get a random number of parameters - np.random.seed(42) - params = np.random.normal(size=nb_params) + rng = np.random.default_rng(42) + params = rng.normal(size=nb_params) # Create a synthetic bias and add to the DEM synthetic_bias = polynomial_2d((xx, yy), *params) diff --git a/tests/test_ddem.py b/tests/test_ddem.py index 7863e728..a5ae9f4b 100644 --- a/tests/test_ddem.py +++ b/tests/test_ddem.py @@ -52,7 +52,8 @@ def test_regional_hypso(self) -> None: """Test the regional hypsometric approach.""" ddem = self.ddem.copy() ddem.data.mask = np.zeros_like(ddem.data, dtype=bool) - ddem.data.mask.ravel()[np.random.choice(ddem.data.size, 50000, replace=False)] = True + rng = np.random.default_rng(42) + ddem.data.mask.ravel()[rng.choice(ddem.data.size, 50000, replace=False)] = True assert np.count_nonzero(ddem.data.mask) > 0 assert ddem.filled_data is None @@ -71,7 +72,8 @@ def test_local_hypso(self) -> None: ddem = self.ddem.copy() scott_1990 = self.outlines_1990.query("NAME == 'Scott Turnerbreen'") ddem.data.mask = np.zeros_like(ddem.data, dtype=bool) - ddem.data.mask.ravel()[np.random.choice(ddem.data.size, 50000, replace=False)] = True + rng = np.random.default_rng(42) + ddem.data.mask.ravel()[rng.choice(ddem.data.size, 50000, replace=False)] = True assert np.count_nonzero(ddem.data.mask) > 0 assert ddem.filled_data is None diff --git a/tests/test_demcollection.py b/tests/test_demcollection.py index b1bb3baf..544d195f 100644 --- a/tests/test_demcollection.py +++ b/tests/test_demcollection.py @@ -60,10 +60,11 @@ def test_init(self) -> None: # Simple check that the dV number is of a greater magnitude than the dH number. assert abs(cumulative_dv.iloc[-1]) > abs(cumulative_dh.iloc[-1]) + rng = np.random.default_rng(42) # Generate 10000 NaN values randomly in one of the dDEMs dems.ddems[0].data[ - np.random.randint(0, dems.ddems[0].data.shape[0], 100), - np.random.randint(0, dems.ddems[0].data.shape[1], 100), + rng.integers(0, dems.ddems[0].data.shape[0], 100), + rng.integers(0, dems.ddems[0].data.shape[1], 100), ] = np.nan # Check that the cumulative_dh function warns for NaNs with warnings.catch_warnings(): @@ -108,9 +109,10 @@ def test_ddem_interpolation(self) -> None: raise exception # Generate 10000 NaN values randomly in one of the dDEMs + rng = np.random.default_rng(42) dems.ddems[0].data[ - np.random.randint(0, dems.ddems[0].data.shape[0], 100), - np.random.randint(0, dems.ddems[0].data.shape[1], 100), + rng.integers(0, dems.ddems[0].data.shape[0], 100), + rng.integers(0, dems.ddems[0].data.shape[1], 100), ] = np.nan # Make sure that filled_data is not available anymore, since the data now has nans diff --git a/tests/test_examples.py b/tests/test_examples.py index 113d755a..d0aec93e 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -28,17 +28,17 @@ class TestExamples: @pytest.mark.parametrize( "rst_and_truevals", [ - (ref_dem, np.array([868.6489, 623.42194, 180.57921, 267.30765, 601.67615], dtype=np.float32)), - (tba_dem, np.array([875.2358, 625.0544, 182.9936, 272.6586, 606.2897], dtype=np.float32)), + (ref_dem, np.array([465.11816, 207.3236, 208.30563, 748.7337, 797.28644], dtype=np.float32)), + (tba_dem, np.array([464.6715, 213.7554, 207.8788, 760.8192, 797.3268], dtype=np.float32)), ( ddem, np.array( [ - -0.012023926, - -0.6956787, - 0.14024353, - 1.1026001, - -5.9224243, + 1.3182373, + -1.6629944, + 0.10473633, + -10.096802, + 2.4724731, ], dtype=np.float32, ), @@ -50,8 +50,8 @@ def test_array_content(self, rst_and_truevals: tuple[Raster, NDArrayf]) -> None: rst = rst_and_truevals[0] truevals = rst_and_truevals[1] - np.random.seed(42) - values = np.random.choice(rst.data.data.flatten(), size=5, replace=False) + rng = np.random.default_rng(42) + values = rng.choice(rst.data.data.flatten(), size=5, replace=False) assert values == pytest.approx(truevals) diff --git a/tests/test_filters.py b/tests/test_filters.py index 30c3c659..c6c98357 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -37,8 +37,9 @@ def test_gauss(self) -> None: # Test that it works with NaNs too nan_count = 1000 - cols = np.random.randint(0, high=self.dem_1990.width - 1, size=nan_count, dtype=int) - rows = np.random.randint(0, high=self.dem_1990.height - 1, size=nan_count, dtype=int) + rng = np.random.default_rng(42) + cols = rng.integers(0, high=self.dem_1990.width - 1, size=nan_count, dtype=int) + rows = rng.integers(0, high=self.dem_1990.height - 1, size=nan_count, dtype=int) dem_with_nans = np.copy(self.dem_1990.data).squeeze() dem_with_nans[rows, cols] = np.nan @@ -71,8 +72,9 @@ def test_dist_filter(self) -> None: # Add random outliers count = 1000 - cols = np.random.randint(0, high=self.dem_1990.width - 1, size=count, dtype=int) - rows = np.random.randint(0, high=self.dem_1990.height - 1, size=count, dtype=int) + rng = np.random.default_rng(42) + cols = rng.integers(0, high=self.dem_1990.width - 1, size=count, dtype=int) + rows = rng.integers(0, high=self.dem_1990.height - 1, size=count, dtype=int) ddem.data[rows, cols] = 5000 # Filter gross outliers diff --git a/tests/test_fit.py b/tests/test_fit.py index fbef0726..b14c9f86 100644 --- a/tests/test_fit.py +++ b/tests/test_fit.py @@ -53,7 +53,7 @@ def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: # Ignore sklearn convergence warnings warnings.filterwarnings("ignore", category=UserWarning, message="lbfgs failed to converge") - np.random.seed(42) + rng = np.random.default_rng(42) # Define x vector x = np.linspace(1, 10, 1000) @@ -61,7 +61,7 @@ def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: true_coefs = [-100, 5, 3, 2] y = np.polyval(np.flip(true_coefs), x).astype(np.float32) # Add some noise on top - y += np.random.normal(loc=0, scale=3, size=1000) + y += rng.normal(loc=0, scale=3, size=1000) # Add some outliers y[50:75] = 0.0 y[900:925] = 1000.0 @@ -103,8 +103,8 @@ def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: coefs4, deg4 = xdem.fit.robust_norder_polynomial_fit(x, y, estimator_name="Theil-Sen", random_state=42) assert deg4 == 3 # High degree coefficients should be well constrained - assert coefs4[2] == pytest.approx(true_coefs[2], abs=1) - assert coefs4[3] == pytest.approx(true_coefs[3], abs=1) + assert coefs4[2] == pytest.approx(true_coefs[2], abs=1.5) + assert coefs4[3] == pytest.approx(true_coefs[3], abs=1.5) # RANSAC also works coefs5, deg5 = xdem.fit.robust_norder_polynomial_fit(x, y, estimator_name="RANSAC", random_state=42) @@ -150,7 +150,7 @@ def test_robust_nfreq_sumsin_fit(self) -> None: def test_robust_nfreq_simsin_fit_noise_and_outliers(self) -> None: # Check robustness to outliers - np.random.seed(42) + rng = np.random.default_rng(42) # Define X vector x = np.linspace(0, 10, 1000) # Define exact sum of sinusoid signal @@ -158,7 +158,7 @@ def test_robust_nfreq_simsin_fit_noise_and_outliers(self) -> None: y = xdem.fit.sumsin_1d(x, *true_coefs) # Add some noise - y += np.random.normal(loc=0, scale=0.25, size=1000) + y += rng.normal(loc=0, scale=0.25, size=1000) # Add some outliers y[50:75] = -10 y[900:925] = 10 diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py index a6e3f170..45b4db8a 100644 --- a/tests/test_spatialstats.py +++ b/tests/test_spatialstats.py @@ -356,9 +356,9 @@ def test_get_perbin_nd_binning(self) -> None: df[var] = [xdem.spatialstats._pandas_str_to_interval(x) for x in df[var]] # Take 1000 random points in the array - np.random.seed(42) - xrand = np.random.randint(low=0, high=perbin_values.shape[0], size=1000) - yrand = np.random.randint(low=0, high=perbin_values.shape[1], size=1000) + rng = np.random.default_rng(42) + xrand = rng.integers(low=0, high=perbin_values.shape[0], size=1000) + yrand = rng.integers(low=0, high=perbin_values.shape[1], size=1000) for i in range(len(xrand)): @@ -372,7 +372,7 @@ def test_get_perbin_nd_binning(self) -> None: if np.logical_or.reduce((np.isnan(h), np.isnan(slp), np.isnan(asp))): continue - # Isolate the bin in the dataframe, should be only one + # Isolate the bin in the dataframe index_bin = np.logical_and.reduce( ( [h in interv for interv in df["elevation"]], @@ -380,6 +380,10 @@ def test_get_perbin_nd_binning(self) -> None: [asp in interv for interv in df["aspect"]], ) ) + # It might not exist in the binning intervals (if extreme values were not subsampled in test_nd_binning) + if np.count_nonzero(index_bin) == 0: + continue + # Otherwise there should be only one assert np.count_nonzero(index_bin) == 1 # Get the statistic value and verify that this was the one returned by the function @@ -513,7 +517,7 @@ def test_sample_multirange_variogram_default(self) -> None: df = xdem.spatialstats.sample_empirical_variogram(values=self.diff, subsample=10, random_state=42) # assert df["exp"][15] == pytest.approx(5.11900520324707, abs=1e-3) assert df["lags"][15] == pytest.approx(5120) - assert df["count"][15] == 5 + assert df["count"][15] == 2 # With a single run, no error can be estimated assert all(np.isnan(df.err_exp.values)) @@ -578,8 +582,6 @@ def test_sample_empirical_variogram_speed(self) -> None: extent = (np.min(coords[:, 0]), np.max(coords[:, 0]), np.min(coords[:, 1]), np.max(coords[:, 1])) # Shape shape = values.shape - # Random state - rnd = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(42))) keyword_arguments = {"subsample": subsample, "extent": extent, "shape": shape, "verbose": False} runs, samples, ratio_subsample = xdem.spatialstats._choose_cdist_equidistant_sampling_parameters( @@ -597,7 +599,8 @@ def test_sample_empirical_variogram_speed(self) -> None: samples=samples, ratio_subsample=ratio_subsample, runs=runs, - rnd=rnd, + # Now even for a n_variograms=1 we sample other integers for the random number generator + rnd=np.random.default_rng(42).choice(1, 1, replace=False), ) V = skgstat.Variogram( rems, @@ -759,8 +762,8 @@ def test_multirange_fit_performance(self) -> None: # Add some noise on top of it sig = 0.025 - np.random.seed(42) - y_noise = np.random.normal(0, sig, size=len(x)) + rng = np.random.default_rng(42) + y_noise = rng.normal(0, sig, size=len(x)) y_simu = y + y_noise sigma = np.ones(len(x)) * sig @@ -1156,16 +1159,16 @@ def test_number_effective_samples(self) -> None: rasterize_resolution=self.ref, random_state=42, ) - # The value should be nearly the same within 5% (the discretization grid is different so affects a tiny bit the + # The value should be nearly the same within 10% (the discretization grid is different so affects a tiny bit the # result) - assert neff3 == pytest.approx(neff2, rel=0.05) + assert neff3 == pytest.approx(neff2, rel=0.1) - # Check that the number of effective samples matches that of the circular approximation within 20% + # Check that the number of effective samples matches that of the circular approximation within 25% area_brom = np.sum(outlines_brom.ds.area.values) neff4 = xdem.spatialstats.number_effective_samples( area=area_brom, params_variogram_model=params_variogram_model ) - assert neff4 == pytest.approx(neff2, rel=0.2) + assert neff4 == pytest.approx(neff2, rel=0.25) # The circular approximation is always conservative, so should yield a smaller value assert neff4 < neff2 @@ -1311,7 +1314,7 @@ def test_patches_method_loop_quadrant(self) -> None: assert df_full.shape == (100, 5) # Check the sampling is always fixed for a random state - assert df_full["tile"].values[0] == "8_16" + assert df_full["tile"].values[0] == "47_17" # Check that all counts respect the default minimum percentage of 80% valid pixels assert all(df_full["count"].values > 0.8 * np.max(df_full["count"].values)) diff --git a/tests/test_terrain.py b/tests/test_terrain.py index e2b343a9..9097ec4a 100644 --- a/tests/test_terrain.py +++ b/tests/test_terrain.py @@ -172,8 +172,9 @@ def test_attribute_functions_against_gdaldem(self, attribute: str) -> None: raise exception # Introduce some nans + rng = np.random.default_rng(42) dem.data.mask = np.zeros_like(dem.data, dtype=bool) - dem.data.mask.ravel()[np.random.choice(dem.data.size, 50000, replace=False)] = True + dem.data.mask.ravel()[rng.choice(dem.data.size, 50000, replace=False)] = True # Validate that this doesn't raise weird warnings after introducing nans. functions[attribute](dem) @@ -257,8 +258,9 @@ def test_attribute_functions_against_richdem(self, attribute: str) -> None: raise exception # Introduce some nans + rng = np.random.default_rng(42) dem.data.mask = np.zeros_like(dem.data, dtype=bool) - dem.data.mask.ravel()[np.random.choice(dem.data.size, 50000, replace=False)] = True + dem.data.mask.ravel()[rng.choice(dem.data.size, 50000, replace=False)] = True # Validate that this doesn't raise weird warnings after introducing nans and that mask is preserved output = functions_richdem[attribute](dem) @@ -320,8 +322,9 @@ def test_curvatures(self, name: str) -> None: xdem.terrain.get_terrain_attribute(dem.data, attribute=name, resolution=(1.0, 2.0)) # Introduce some nans + rng = np.random.default_rng(42) dem.data.mask = np.zeros_like(dem.data, dtype=bool) - dem.data.mask.ravel()[np.random.choice(dem.data.size, 50000, replace=False)] = True + dem.data.mask.ravel()[rng.choice(dem.data.size, 50000, replace=False)] = True # Validate that this doesn't raise weird warnings after introducing nans. xdem.terrain.get_terrain_attribute(dem.data, attribute=name, resolution=dem.res) diff --git a/tests/test_volume.py b/tests/test_volume.py index 6aaf6bdc..da6ae918 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -230,7 +230,8 @@ def test_regional_hypsometric_interp(self) -> None: # Try the normalized regional hypsometric interpolation. # Synthesize random nans in 80% of the data. - ddem.data.mask.ravel()[np.random.choice(ddem.data.size, int(ddem.data.size * 0.80), replace=False)] = True + rng = np.random.default_rng(42) + ddem.data.mask.ravel()[rng.choice(ddem.data.size, int(ddem.data.size * 0.80), replace=False)] = True # Fill the dDEM using the de-normalized signal. filled_ddem = xdem.volume.norm_regional_hypsometric_interpolation( voided_ddem=ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map diff --git a/xdem/coreg/base.py b/xdem/coreg/base.py index af33ee5c..cc4c9e13 100644 --- a/xdem/coreg/base.py +++ b/xdem/coreg/base.py @@ -171,7 +171,8 @@ def _df_sampling_from_dem( # Avoid edge, and mask-out area in sampling width, length = dem.shape - i, j = np.random.randint(10, width - 10, npoints), np.random.randint(10, length - 10, npoints) + rng = np.random.default_rng() + i, j = rng.integers(10, width - 10, npoints), rng.integers(10, length - 10, npoints) mask = dem.data.mask # Get value @@ -1014,7 +1015,7 @@ class CoregDict(TypedDict, total=False): # Affine + BiasCorr classes subsample: int | float subsample_final: int - random_state: np.random.RandomState | np.random.Generator | int | None + random_state: int | np.random.Generator | None # BiasCorr classes generic metadata @@ -1147,7 +1148,7 @@ def fit( crs: rio.crs.CRS | None = None, z_name: str = "z", verbose: bool = False, - random_state: None | np.random.RandomState | np.random.Generator | int = None, + random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> CoregType: """ @@ -1349,7 +1350,7 @@ def residuals( transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, subsample: float | int = 1.0, - random_state: None | np.random.RandomState | np.random.Generator | int = None, + random_state: int | np.random.Generator | None = None, ) -> NDArrayf: """ Calculate the residual offsets (the difference) between two DEMs after applying the transformation. @@ -1772,7 +1773,7 @@ def fit( crs: rio.crs.CRS | None = None, z_name: str = "z", verbose: bool = False, - random_state: None | np.random.RandomState | np.random.Generator | int = None, + random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> CoregType: @@ -2041,7 +2042,7 @@ def fit( crs: rio.crs.CRS | None = None, z_name: str = "z", verbose: bool = False, - random_state: None | np.random.RandomState | np.random.Generator | int = None, + random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> CoregType: diff --git a/xdem/fit.py b/xdem/fit.py index 01b083ac..58690c62 100644 --- a/xdem/fit.py +++ b/xdem/fit.py @@ -329,7 +329,7 @@ def robust_norder_polynomial_fit( subsample: float | int = 1, linear_pkg: str = "scipy", verbose: bool = False, - random_state: None | np.random.RandomState | np.random.Generator | int = None, + random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> tuple[NDArrayf, int]: """ @@ -446,7 +446,7 @@ def robust_nfreq_sumsin_fit( cost_func: Callable[[NDArrayf], float] = soft_loss, subsample: float | int = 1, hop_length: float | None = None, - random_state: None | np.random.RandomState | np.random.Generator | int = None, + random_state: int | np.random.Generator | None = None, verbose: bool = False, **kwargs: Any, ) -> tuple[NDArrayf, int]: diff --git a/xdem/misc.py b/xdem/misc.py index b36ad838..a7f38c0b 100644 --- a/xdem/misc.py +++ b/xdem/misc.py @@ -28,24 +28,28 @@ from xdem._typing import NDArrayf -def generate_random_field(shape: tuple[int, int], corr_size: int) -> NDArrayf: +def generate_random_field( + shape: tuple[int, int], corr_size: int, random_state: int | np.random.Generator | None = None +) -> NDArrayf: """ Generate a semi-random gaussian field (to simulate a DEM or DEM error) :param shape: The output shape of the field. :param corr_size: The correlation size of the field. + :param random_state: Seed for random number generator. :examples: - >>> np.random.seed(1) - >>> generate_random_field((4, 5), corr_size=2).round(2) - array([[0.47, 0.5 , 0.56, 0.63, 0.65], - [0.49, 0.51, 0.56, 0.62, 0.64], - [0.56, 0.56, 0.57, 0.59, 0.59], - [0.57, 0.57, 0.57, 0.58, 0.58]]) + >>> generate_random_field((4, 5), corr_size=2, random_state=1).round(2) + array([[0.74, 0.74, 0.75, 0.75, 0.75], + [0.69, 0.69, 0.7 , 0.71, 0.71], + [0.51, 0.51, 0.54, 0.57, 0.58], + [0.45, 0.47, 0.5 , 0.53, 0.54]]) :returns: A numpy array of semi-random values from 0 to 1 """ + rng = np.random.default_rng(random_state) + if not _has_cv2: raise ValueError("Optional dependency needed. Install 'opencv'") @@ -53,7 +57,7 @@ def generate_random_field(shape: tuple[int, int], corr_size: int) -> NDArrayf: cv2.GaussianBlur( np.repeat( np.repeat( - np.random.randint(0, 255, (shape[0] // corr_size, shape[1] // corr_size), dtype="uint8"), + rng.integers(0, 255, (shape[0] // corr_size, shape[1] // corr_size), dtype="uint8"), corr_size, axis=0, ), diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py index 200873d1..5cdbcc86 100644 --- a/xdem/spatialstats.py +++ b/xdem/spatialstats.py @@ -917,25 +917,6 @@ def _create_ring_mask( return mask_ring -def _random_state_definition( - random_state: None | np.random.RandomState | int = None, -) -> np.random.RandomState | np.random.Generator: - """ - Define random state based on input - :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing) - :return: - """ - - if random_state is None: - rnd: np.random.RandomState | np.random.Generator = np.random.default_rng() - elif isinstance(random_state, np.random.RandomState): - rnd = random_state - else: - rnd = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(random_state))) - - return rnd - - def _subsample_wrapper( values: NDArrayf, coords: NDArrayf, @@ -944,7 +925,7 @@ def _subsample_wrapper( subsample_method: str = "pdist_ring", inside_radius: float = 0, outside_radius: float = None, - random_state: None | np.random.RandomState | int = None, + random_state: int | np.random.Generator | None = None, ) -> tuple[NDArrayf, NDArrayf]: """ (Not used by default) @@ -952,13 +933,13 @@ def _subsample_wrapper( """ nx, ny = shape - rnd = _random_state_definition(random_state=random_state) + rng = np.random.default_rng(random_state) # Subsample spatially for disk/ring methods if subsample_method in ["pdist_disk", "pdist_ring"]: # Select random center coordinates - center_x = rnd.choice(nx, 1)[0] - center_y = rnd.choice(ny, 1)[0] + center_x = rng.choice(nx, 1)[0] + center_y = rng.choice(ny, 1)[0] if subsample_method == "pdist_ring": subindex = _create_ring_mask( (nx, ny), center=(center_x, center_y), in_radius=inside_radius, out_radius=outside_radius @@ -974,7 +955,7 @@ def _subsample_wrapper( values_sp = values coords_sp = coords - index = subsample_array(values_sp, subsample=subsample, return_indices=True, random_state=rnd) + index = subsample_array(values_sp, subsample=subsample, return_indices=True, random_state=random_state) values_sub = values_sp[index[0]] coords_sub = coords_sp[index[0], :] @@ -1209,7 +1190,7 @@ def _get_cdist_empirical_variogram( # We set the samples to match the subsample argument if the method is random points kwargs["samples"] = kwargs.pop("subsample") - # Rename the "random_state" argument into "rnd", also used by skgstat Metric subclasses + # Rename the "random_state" argument into "rng", also used by skgstat Metric subclasses kwargs["rnd"] = kwargs.pop("random_state") # Define MetricSpace function to be used, fetch possible keywords arguments @@ -1295,7 +1276,7 @@ def sample_empirical_variogram( n_variograms: int = 1, n_jobs: int = 1, verbose: bool = False, - random_state: None | np.random.RandomState | int = None, + random_state: int | np.random.Generator | None = None, # **kwargs: **EmpiricalVariogramKArgs, # This will work in Python 3.12, fails in the meantime, we'll be able to # remove some type ignores from this function in the future **kwargs: int | list[float] | float | str | Any, @@ -1462,20 +1443,12 @@ def sample_empirical_variogram( # provide exactly the same sampling and results if random_state is not None: # Define the random state if only a seed is provided - if isinstance(random_state, np.random.RandomState): - rnd = random_state - elif isinstance(random_state, np.random.Generator): - rnd = np.random.RandomState(random_state) - else: - rnd = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(random_state))) + rng = np.random.default_rng(random_state) - # Create a list of child random states - if n_variograms == 1: - # No issue if there is only one variogram run - list_random_state: list[None | np.random.RandomState] = [rnd] - else: - # Otherwise, pass a list of seeds - list_random_state = list(rnd.choice(n_variograms, n_variograms, replace=False)) + # Create a list of child random states per number of variograms + list_random_state: list[None | np.random.Generator] = list( + rng.choice(n_variograms, n_variograms, replace=False) + ) else: list_random_state = [None for i in range(n_variograms)] @@ -1813,7 +1786,7 @@ def estimate_model_spatial_correlation( n_variograms: int = 1, n_jobs: int = 1, verbose: bool = False, - random_state: None | np.random.RandomState | int = None, + random_state: int | np.random.Generator | None = None, bounds: list[tuple[float, float]] = None, p0: list[float] = None, **kwargs: Any, @@ -1891,7 +1864,7 @@ def infer_spatial_correlation_from_stable( verbose: bool = False, bounds: list[tuple[float, float]] = None, p0: list[float] = None, - random_state: None | np.random.RandomState | int = None, + random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> tuple[pd.DataFrame, pd.DataFrame, Callable[[NDArrayf], NDArrayf]]: """ @@ -2249,7 +2222,7 @@ def neff_hugonnet_approx( params_variogram_model: pd.DataFrame, subsample: int = 1000, vectorized: bool = True, - random_state: None | np.random.RandomState | int = None, + random_state: int | np.random.Generator | None = None, ) -> float: """ Approximated number of effective samples derived from a double sum of covariance subsetted on one of the two sums, @@ -2270,7 +2243,7 @@ def neff_hugonnet_approx( """ # Define random state - rnd = _random_state_definition(random_state=random_state) + rng = np.random.default_rng(random_state) # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -2286,7 +2259,7 @@ def neff_hugonnet_approx( subsample = min(subsample, n) # Get random subset of points for one of the sums - rand_points = rnd.choice(n, size=subsample, replace=False) + rand_points = rng.choice(n, size=subsample, replace=False) # Now we compute the double covariance sum # Either using for-loop-version @@ -2761,7 +2734,7 @@ def _patches_loop_quadrants( statistics_in_patch: Iterable[Callable[[NDArrayf], np.floating[Any]] | str] = (np.nanmean,), statistic_between_patches: Callable[[NDArrayf], np.floating[Any]] = nmad, verbose: bool = False, - random_state: None | int | np.random.RandomState = None, + random_state: int | np.random.Generator | None = None, return_in_patch_statistics: bool = False, ) -> tuple[float, float, float] | tuple[float, float, float, pd.DataFrame]: """ @@ -2793,7 +2766,7 @@ def _patches_loop_quadrants( statistics_name = [f if isinstance(f, str) else f.__name__ for f in list_statistics_in_patch] # Define random state - rnd = _random_state_definition(random_state=random_state) + rng = np.random.default_rng(random_state) # Divide raster in quadrants where we can sample nx, ny = np.shape(values) @@ -2824,7 +2797,7 @@ def _patches_loop_quadrants( # Draw a random coordinate from the list of quadrants, select more than enough random points to avoid drawing # randomly and differencing lists several times - list_idx_quadrant = rnd.choice(len(list_quadrant), size=min(len(list_quadrant), 10 * remaining_nsamp)) + list_idx_quadrant = rng.choice(len(list_quadrant), size=min(len(list_quadrant), 10 * remaining_nsamp)) for idx_quadrant in list_idx_quadrant: @@ -2912,7 +2885,7 @@ def patches_method( verbose: bool = False, *, return_in_patch_statistics: Literal[False] = False, - random_state: None | int | np.random.RandomState = None, + random_state: int | np.random.Generator | None = None, ) -> pd.DataFrame: ... @@ -2934,7 +2907,7 @@ def patches_method( verbose: bool = False, *, return_in_patch_statistics: Literal[True], - random_state: None | int | np.random.RandomState = None, + random_state: int | np.random.Generator | None = None, ) -> tuple[pd.DataFrame, pd.DataFrame]: ... @@ -2954,7 +2927,7 @@ def patches_method( n_patches: int = 1000, verbose: bool = False, return_in_patch_statistics: bool = False, - random_state: None | int | np.random.RandomState = None, + random_state: int | np.random.Generator | None = None, ) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]: """