diff --git a/src/invert4geom/optimization.py b/src/invert4geom/optimization.py index 2952cb9b..4a56e5b0 100644 --- a/src/invert4geom/optimization.py +++ b/src/invert4geom/optimization.py @@ -14,24 +14,17 @@ import warnings import harmonica as hm +import joblib import numpy as np import optuna import pandas as pd +import psutil import xarray as xr from nptyping import NDArray from tqdm.autonotebook import tqdm - -from invert4geom import cross_validation, inversion, log, plotting, regional, utils - -import joblib - - -import psutil - - from tqdm_joblib import tqdm_joblib - +from invert4geom import cross_validation, inversion, log, plotting, regional, utils def logging_callback( @@ -921,9 +914,14 @@ def __call__(self, trial: optuna.trial) -> float: msg = "progressbar must be a boolean" # type: ignore[unreachable] raise ValueError(msg) + log.info( + "Performing Zref/Density CV with constraints split into test/train" + ) # for each fold, run CV scores = [] for i, _ in enumerate(pbar): + log.info("beginning fold %s of %s") + with utils.log_level(logging.WARN): grav_df = regional.regional_separation( grav_df=grav_df, @@ -1027,7 +1025,13 @@ def optimize_inversion_zref_density_contrast( or both at the same time. Provide upper and low limits for each parameter, number of trials and let Optuna choose the best parameter values for each trial or use a grid search to test all values between the limits in intervals of n_trials. The results - are saved to a pickle file with the best inversion results and the study. + are saved to a pickle file with the best inversion results and the study. If you + want to use a regional separation technique which utilizes constraints, such as + constraint point minimization, separate the constraints into testing and training + sets, and supply the training set to `regional_grav_kwargs` and the testing set to + `constraints_df` to use for scoring. If you want to automatically perform a K-Folds + CV for regional separation, use the function + `optimize_inversion_zref_density_contrast_kfolds`. Parameters ---------- @@ -1384,11 +1388,19 @@ def optimize_inversion_zref_density_contrast_kfolds( Perform a cross validation for the optimal zref and density contrast values same as function `optimize_inversion_zref_density_contrast`, but pass a dataframe of constraint points which contains folds of testing and training data (generated with - `cross_validation.split_test_train`) so for each iteration of the zref/density cross - validation, the regional separation is performed with 1 training fold, and the - scoring is performed with 1 testing fold. This is only useful if the regional - separation technique you supply via `regional_grav_kwargs` uses constraints points - for the estimations, such as constraint point minimization. + `cross_validation.split_test_train`). For each set of zref/density values, perform + a regional separation and inversion for each of the K folds in the constraints + dataframe. After all K folds are inverted, the mean of the K folds scores will be + the score for that set of parameters. Repeat this for all parameters. Within each + fold, the training constraints are used for the regional separation and the testing + constraints are used for scoring. This is only useful if the regional separation + technique you supply via `regional_grav_kwargs` uses constraints points + for the estimations, such as constraint point minimization. If using 20 sets of + density and zref values, and use 5 folds, this will run 100 inversions. It is more + efficient, but less accurate, to simple use a different regional estimation + technique, which doesn't require constraint points, to find the optimal zref and + density values. Then use these again in another inversion with the desired regional + separation technique. Parameters ---------- @@ -1453,7 +1465,7 @@ def optimize_inversion_zref_density_contrast_kfolds( "grid_search", ] } - # run the inversion workflow + # run the inversion workflow with the new best parameters with utils.log_level(logging.WARN): final_inversion_results = inversion.run_inversion_workflow( create_starting_prisms=True, diff --git a/src/invert4geom/plotting.py b/src/invert4geom/plotting.py index c316046d..8443b172 100644 --- a/src/invert4geom/plotting.py +++ b/src/invert4geom/plotting.py @@ -2,27 +2,18 @@ import typing +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np import optuna import pandas as pd - import plotly - -from IPython.display import clear_output -import matplotlib as mpl -import matplotlib.pyplot as plt - - - -import seaborn as sns - - import pyvista - - import scipy as sp +import seaborn as sns import verde as vd import xarray as xr +from IPython.display import clear_output from polartoolkit import maps from polartoolkit import utils as polar_utils @@ -55,7 +46,6 @@ def plot_2_parameter_cv_scores( """ sns.set_theme() - if cmap is None: cmap = sns.color_palette("mako", as_cmap=True) @@ -231,7 +221,6 @@ def plot_cv_scores( sns.set_theme() - df0 = pd.DataFrame({"scores": scores, "parameters": parameters}) df = df0.sort_values(by="parameters") @@ -285,8 +274,6 @@ def plot_convergence( sns.set_theme() - - # get misfit data at end of each iteration cols = [s for s in results.columns.to_list() if "_final_misfit" in s] iters = len(cols) @@ -393,8 +380,6 @@ def plot_dynamic_convergence( sns.set_theme() - - clear_output(wait=True) l2_norms = l2_norms.copy() @@ -711,7 +696,6 @@ def plot_inversion_iteration_results( size for constraint points, by default 1 """ - misfit_grids, topo_grids, corrections_grids = grids params = parameters.copy() @@ -1067,7 +1051,6 @@ def show_prism_layers( use kwarg `colors` to alter these colors, by default is "density" """ - # Plot with pyvista plotter = pyvista.Plotter( lighting="three_lights",