generated from ImperialCollegeLondon/pip-tools-template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #114 from ImperialCollegeLondon/sensitivity_analysis
Sensitivity analysis
- Loading branch information
Showing
13 changed files
with
549 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
"""Debug results by recalculating metrics. | ||
This script provides a way to load a model file from the default setup in | ||
experimenter.py and recalculate the metrics. This is useful for recreating | ||
how a metric is calculated to verify that it is being done correctly. In this | ||
example we reproduce code from `metric_utilities.py` to check how timeseries | ||
data are aligned and compared. | ||
""" | ||
from __future__ import annotations | ||
|
||
from pathlib import Path | ||
|
||
import geopandas as gpd | ||
import pandas as pd | ||
|
||
from swmmanywhere.graph_utilities import load_graph | ||
from swmmanywhere.metric_utilities import ( | ||
align_by_shape, | ||
best_outlet_match, | ||
dominant_outlet, | ||
extract_var, | ||
iterate_metrics, | ||
) | ||
from swmmanywhere.parameters import MetricEvaluation | ||
from swmmanywhere.swmmanywhere import load_config | ||
|
||
if __name__ == 'main': | ||
project = 'cranbrook' | ||
base = Path.home() / "Documents" / "data" / "swmmanywhere" | ||
config_path = base / project / f'{project}_hpc.yml' | ||
config = load_config(config_path, validation = False) | ||
config['base_dir'] = base / project | ||
real_dir = config['base_dir'] / 'real' | ||
|
||
model_number = 5523 | ||
|
||
model_dir = config['base_dir'] / 'bbox_1' / f'model_{model_number}' | ||
|
||
syn_results = pd.read_parquet(model_dir / 'results.parquet') | ||
real_results = pd.read_parquet(real_dir / 'real_results.parquet') | ||
|
||
syn_G = load_graph(model_dir / 'assign_id_graph.json') | ||
real_G = load_graph(real_dir / 'graph.json') | ||
|
||
syn_subcatchments = gpd.read_file(model_dir / 'subcatchments.geoparquet') | ||
real_subcatchments = gpd.read_file(real_dir / 'subcatchments.geojson') | ||
|
||
syn_metrics = iterate_metrics(syn_results, | ||
syn_subcatchments, | ||
syn_G, | ||
real_results, | ||
real_subcatchments, | ||
real_G, | ||
['grid_nse_flooding', | ||
'subcatchment_nse_flooding'], | ||
MetricEvaluation() | ||
) | ||
|
||
# Check outlet scale | ||
synthetic_results = syn_results.copy() | ||
real_results_ = real_results.copy() | ||
sg_syn, syn_outlet = best_outlet_match(syn_G, real_subcatchments) | ||
sg_real, real_outlet = dominant_outlet(real_G, real_results) | ||
|
||
# Check nnodes | ||
print(f'n syn nodes {len(sg_syn.nodes)}') | ||
print(f'n real nodes {len(sg_real.nodes)}') | ||
|
||
# Check contributing area | ||
#syn_subcatchments['impervious_area'].sum() / syn_subcatchments['area'].sum() | ||
#real_subcatchments['impervious_area'].sum() / real_subcatchments['area'].sum() | ||
variable = 'flooding' | ||
|
||
#e.g., subs | ||
results = align_by_shape(variable, | ||
synthetic_results = synthetic_results, | ||
real_results = real_results, | ||
shapes = real_subcatchments, | ||
synthetic_G = syn_G, | ||
real_G = real_G) | ||
|
||
# e.g., outlet | ||
if variable == 'flow': | ||
syn_ids = [d['id'] for u,v,d in syn_G.edges(data=True) | ||
if v == syn_outlet] | ||
real_ids = [d['id'] for u,v,d in real_G.edges(data=True) | ||
if v == real_outlet] | ||
else: | ||
syn_ids = list(sg_syn.nodes) | ||
real_ids = list(sg_real.nodes) | ||
synthetic_results['date'] = pd.to_datetime(synthetic_results['date']) | ||
real_results['date'] = pd.to_datetime(real_results['date']) | ||
|
||
# Help alignment | ||
synthetic_results["id"] = synthetic_results["id"].astype(str) | ||
real_results["id"] = real_results["id"].astype(str) | ||
syn_ids = [str(x) for x in syn_ids] | ||
real_ids = [str(x) for x in real_ids] | ||
# Extract data | ||
syn_data = extract_var(synthetic_results, variable) | ||
syn_data = syn_data.loc[syn_data["id"].isin(syn_ids)] | ||
syn_data = syn_data.groupby('date').value.sum() | ||
|
||
real_data = extract_var(real_results, variable) | ||
real_data = real_data.loc[real_data["id"].isin(real_ids)] | ||
real_data = real_data.groupby('date').value.sum() | ||
|
||
# Align data | ||
df = pd.merge(syn_data, | ||
real_data, | ||
left_index = True, | ||
right_index = True, | ||
suffixes=('_syn', '_real'), | ||
how='outer').sort_index() | ||
|
||
# Interpolate to time in real data | ||
df['value_syn'] = df.value_syn.interpolate().to_numpy() | ||
df = df.dropna(subset=['value_real']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
"""Perform sensitivity analysis on the results of the model runs.""" | ||
from __future__ import annotations | ||
|
||
from pathlib import Path | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from SALib.analyze import sobol | ||
from tqdm import tqdm | ||
|
||
from swmmanywhere.logging import logger | ||
from swmmanywhere.paper import experimenter | ||
from swmmanywhere.paper import plotting as swplt | ||
from swmmanywhere.preprocessing import check_bboxes | ||
from swmmanywhere.swmmanywhere import load_config | ||
|
||
# %% [markdown] | ||
# ## Initialise directories and load results | ||
# %% | ||
# Load the configuration file and extract relevant data | ||
if __name__ == 'main': | ||
project = 'cranbrook' | ||
base_dir = Path.home() / "Documents" / "data" / "swmmanywhere" | ||
config_path = base_dir / project / f'{project}_hpc.yml' | ||
config = load_config(config_path, validation = False) | ||
config['base_dir'] = base_dir / project | ||
objectives = config['metric_list'] | ||
parameters = config['parameters_to_sample'] | ||
|
||
# Load the results | ||
bbox = check_bboxes(config['bbox'], config['base_dir']) | ||
results_dir = config['base_dir'] / f'bbox_{bbox}' / 'results' | ||
fids = list(results_dir.glob('*_metrics.csv')) | ||
dfs = [pd.read_csv(fid) for fid in tqdm(fids, total = len(fids))] | ||
|
||
# Calculate how many processors were used | ||
nprocs = len(fids) | ||
|
||
# Concatenate the results | ||
df = pd.concat(dfs) | ||
|
||
# Log deltacon0 because it can be extremely large | ||
df['nc_deltacon0'] = np.log(df['nc_deltacon0']) | ||
df = df.sort_values(by = 'iter') | ||
|
||
# Make a directory to store plots in | ||
plot_fid = results_dir / 'plots' | ||
plot_fid.mkdir(exist_ok=True, parents=True) | ||
|
||
# %% [markdown] | ||
# ## Plot the objectives | ||
# %% | ||
# Highlight the behavioural indices | ||
# (i.e., KGE, NSE, PBIAS are in some preferred range) | ||
behavioral_indices = swplt.create_behavioral_indices(df) | ||
|
||
# Plot the objectives | ||
swplt.plot_objectives(df, | ||
parameters, | ||
objectives, | ||
behavioral_indices, | ||
plot_fid) | ||
|
||
# %% [markdown] | ||
# ## Perform Sensitivity Analysis | ||
# %% | ||
|
||
# Formulate the SALib problem | ||
problem = experimenter.formulate_salib_problem(parameters) | ||
|
||
# Calculate any missing samples | ||
n_ideal = pd.DataFrame( | ||
experimenter.generate_samples(parameters_to_select=parameters, | ||
N=2**config['sample_magnitude']) | ||
).iter.nunique() | ||
missing_iters = set(range(n_ideal)).difference(df.iter) | ||
if missing_iters: | ||
logger.warning(f"Missing {len(missing_iters)} iterations") | ||
|
||
# Perform the sensitivity analysis for groups | ||
problem['outputs'] = objectives | ||
rg = {objective: sobol.analyze(problem, | ||
df[objective].iloc[0: | ||
(2**(config['sample_magnitude'] + 1) * 10)] | ||
.values, | ||
print_to_console=False) | ||
for objective in objectives} | ||
|
||
# Perform the sensitivity analysis for parameters | ||
problemi = problem.copy() | ||
del problemi['groups'] | ||
ri = {objective: sobol.analyze(problemi, | ||
df[objective].values, | ||
print_to_console=False) | ||
for objective in objectives} | ||
|
||
# Barplot of sensitvitiy indices | ||
for r_, groups in zip([rg,ri], ['groups','parameters']): | ||
swplt.plot_sensitivity_indices(r_, | ||
objectives, | ||
plot_fid / f'{groups}_indices.png') |
Oops, something went wrong.