diff --git a/swmmanywhere/defs/basic_drainage_all_bits.inp b/swmmanywhere/defs/basic_drainage_all_bits.inp index a03ec8cb..bfd5fd89 100644 --- a/swmmanywhere/defs/basic_drainage_all_bits.inp +++ b/swmmanywhere/defs/basic_drainage_all_bits.inp @@ -15,7 +15,7 @@ START_DATE 01/01/2000 START_TIME 00:00:00 REPORT_START_DATE 01/01/2000 REPORT_START_TIME 00:00:00 -END_DATE 01/02/2000 +END_DATE 01/01/2000 END_TIME 23:59:00 SWEEP_START 1/1 SWEEP_END 12/31 diff --git a/swmmanywhere/graph_utilities.py b/swmmanywhere/graph_utilities.py index 133123ac..9a121d0f 100644 --- a/swmmanywhere/graph_utilities.py +++ b/swmmanywhere/graph_utilities.py @@ -535,6 +535,8 @@ def __call__(self, G: nx.Graph, # Derive subs_gdf = go.derive_subcatchments(G,temp_fid) + if os.getenv("SWMMANYWHERE_VERBOSE", "false").lower() == "true": + subs_gdf.to_file(addresses.subcatchments, driver='GeoJSON') # Calculate runoff coefficient (RC) if addresses.building.suffix in ('.geoparquet','.parquet'): @@ -763,9 +765,10 @@ class identify_outlets(BaseGraphFunction, required_node_attributes = ['x', 'y']): """identify_outlets class.""" - def __call__(self, G: nx.Graph, - outlet_derivation: parameters.OutletDerivation, - **kwargs) -> nx.Graph: + def __call__(self, + G: nx.Graph, + outlet_derivation: parameters.OutletDerivation, + **kwargs) -> nx.Graph: """Identify outlets in a combined river-street graph. This function identifies outlets in a combined river-street graph. An @@ -862,7 +865,10 @@ def __call__(self, G: nx.Graph, Runs a djiikstra-based algorithm to identify the shortest path from each node to its nearest outlet (weighted by the 'weight' edge value). The returned graph is one that only contains the edges that feature on the - shortest paths. + shortest paths. Street nodes that cannot be connected to any outlet (i.e., + they are a distance greater than `outlet_derivation.river_buffer_distance` + from any river node or any street node that is connected to an outlet) + are removed from the graph. Args: G (nx.Graph): A graph @@ -926,6 +932,11 @@ def __call__(self, G: nx.Graph, paths[neighbor] = paths[node] + [neighbor] # Push the neighbor to the heap heappush(heap, (alt_dist, neighbor)) + + # Remove nodes with no path to an outlet + for node in [node for node, path in paths.items() if not path]: + G.remove_node(node) + del paths[node], shortest_paths[node] edges_to_keep: set = set() for path in paths.values(): diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py index 402c4dbd..def653b4 100644 --- a/swmmanywhere/metric_utilities.py +++ b/swmmanywhere/metric_utilities.py @@ -387,11 +387,10 @@ def median_coef_by_group(results: pd.DataFrame, .sum() .reset_index() .groupby(gb_key) - .apply(lambda x: coef_func(x.value_real, x.value_sim)) - .median() + .apply(lambda x: coef_func(x.value_real, x.value_syn)) ) - - return val + val = val[np.isfinite(val)] + return val.median() def nodes_to_subs(G: nx.Graph, @@ -551,8 +550,13 @@ def align_by_shape(var, results = pd.merge(real_results[['date','sub_id','value']], synthetic_results[['date','sub_id','value']], on = ['date','sub_id'], - suffixes = ('_real', '_sim') + suffixes = ('_real', '_syn'), + how = 'outer' ) + + results['value_syn'] = results.value_syn.interpolate().to_numpy() + results = results.dropna(subset=['value_real']) + return results def create_grid(bbox: tuple, @@ -573,11 +577,17 @@ def create_grid(bbox: tuple, minx, miny, maxx, maxy = bbox if isinstance(scale, tuple): + if len(scale) != 2: + raise ValueError(f"""Scale must be a float or a tuple of length 2., + instead of length: {len(scale)}""") dx, dy = scale - else: + elif isinstance(scale, float) | isinstance(scale, int): dx = dy = scale + else: + raise ValueError(f"""Scale must be a float or a tuple of length 2, + instead of type {type(scale)}""") xmins = np.arange(minx, maxx, dx) - ymins = np.arange(minx, maxy, dy) + ymins = np.arange(miny, maxy, dy) grid = [{'geometry' : shapely.box(x, y, x + dx, y + dy), 'sub_id' : i} for i, (x, y) in enumerate(product(xmins, ymins))] @@ -857,7 +867,7 @@ def nc_laplacian_dist(synthetic_G: nx.Graph, return nc_compare(synthetic_G, real_G, 'lambda_dist', - k=10, + k=None, kind = 'laplacian') @metrics.register @@ -865,10 +875,10 @@ def nc_laplacian_norm_dist(synthetic_G: nx.Graph, real_G: nx.Graph, **kwargs) -> float: """Run the evaluated metric.""" - return nc_compare(synthetic_G, - real_G, + return nc_compare(synthetic_G.to_undirected(), + real_G.to_undirected(), 'lambda_dist', - k=10, + k=None, kind = 'laplacian_norm') @metrics.register @@ -876,10 +886,10 @@ def nc_adjacency_dist(synthetic_G: nx.Graph, real_G: nx.Graph, **kwargs) -> float: """Run the evaluated metric.""" - return nc_compare(synthetic_G, - real_G, + return nc_compare(synthetic_G.to_undirected(), + real_G.to_undirected(), 'lambda_dist', - k=10, + k=None, kind = 'adjacency') @metrics.register diff --git a/swmmanywhere/misc/debug_metrics.py b/swmmanywhere/misc/debug_metrics.py new file mode 100644 index 00000000..1e0372ef --- /dev/null +++ b/swmmanywhere/misc/debug_metrics.py @@ -0,0 +1,118 @@ +"""Debug results by recalculating metrics. + +This script provides a way to load a model file from the default setup in +experimenter.py and recalculate the metrics. This is useful for recreating +how a metric is calculated to verify that it is being done correctly. In this +example we reproduce code from `metric_utilities.py` to check how timeseries +data are aligned and compared. +""" +from __future__ import annotations + +from pathlib import Path + +import geopandas as gpd +import pandas as pd + +from swmmanywhere.graph_utilities import load_graph +from swmmanywhere.metric_utilities import ( + align_by_shape, + best_outlet_match, + dominant_outlet, + extract_var, + iterate_metrics, +) +from swmmanywhere.parameters import MetricEvaluation +from swmmanywhere.swmmanywhere import load_config + +if __name__ == 'main': + project = 'cranbrook' + base = Path.home() / "Documents" / "data" / "swmmanywhere" + config_path = base / project / f'{project}_hpc.yml' + config = load_config(config_path, validation = False) + config['base_dir'] = base / project + real_dir = config['base_dir'] / 'real' + + model_number = 5523 + + model_dir = config['base_dir'] / 'bbox_1' / f'model_{model_number}' + + syn_results = pd.read_parquet(model_dir / 'results.parquet') + real_results = pd.read_parquet(real_dir / 'real_results.parquet') + + syn_G = load_graph(model_dir / 'assign_id_graph.json') + real_G = load_graph(real_dir / 'graph.json') + + syn_subcatchments = gpd.read_file(model_dir / 'subcatchments.geoparquet') + real_subcatchments = gpd.read_file(real_dir / 'subcatchments.geojson') + + syn_metrics = iterate_metrics(syn_results, + syn_subcatchments, + syn_G, + real_results, + real_subcatchments, + real_G, + ['grid_nse_flooding', + 'subcatchment_nse_flooding'], + MetricEvaluation() + ) + + # Check outlet scale + synthetic_results = syn_results.copy() + real_results_ = real_results.copy() + sg_syn, syn_outlet = best_outlet_match(syn_G, real_subcatchments) + sg_real, real_outlet = dominant_outlet(real_G, real_results) + + # Check nnodes + print(f'n syn nodes {len(sg_syn.nodes)}') + print(f'n real nodes {len(sg_real.nodes)}') + + # Check contributing area + #syn_subcatchments['impervious_area'].sum() / syn_subcatchments['area'].sum() + #real_subcatchments['impervious_area'].sum() / real_subcatchments['area'].sum() + variable = 'flooding' + + #e.g., subs + results = align_by_shape(variable, + synthetic_results = synthetic_results, + real_results = real_results, + shapes = real_subcatchments, + synthetic_G = syn_G, + real_G = real_G) + + # e.g., outlet + if variable == 'flow': + syn_ids = [d['id'] for u,v,d in syn_G.edges(data=True) + if v == syn_outlet] + real_ids = [d['id'] for u,v,d in real_G.edges(data=True) + if v == real_outlet] + else: + syn_ids = list(sg_syn.nodes) + real_ids = list(sg_real.nodes) + synthetic_results['date'] = pd.to_datetime(synthetic_results['date']) + real_results['date'] = pd.to_datetime(real_results['date']) + + # Help alignment + synthetic_results["id"] = synthetic_results["id"].astype(str) + real_results["id"] = real_results["id"].astype(str) + syn_ids = [str(x) for x in syn_ids] + real_ids = [str(x) for x in real_ids] + # Extract data + syn_data = extract_var(synthetic_results, variable) + syn_data = syn_data.loc[syn_data["id"].isin(syn_ids)] + syn_data = syn_data.groupby('date').value.sum() + + real_data = extract_var(real_results, variable) + real_data = real_data.loc[real_data["id"].isin(real_ids)] + real_data = real_data.groupby('date').value.sum() + + # Align data + df = pd.merge(syn_data, + real_data, + left_index = True, + right_index = True, + suffixes=('_syn', '_real'), + how='outer').sort_index() + + # Interpolate to time in real data + df['value_syn'] = df.value_syn.interpolate().to_numpy() + df = df.dropna(subset=['value_real']) \ No newline at end of file diff --git a/swmmanywhere/paper/experimenter.py b/swmmanywhere/paper/experimenter.py index 69543854..2c01743e 100644 --- a/swmmanywhere/paper/experimenter.py +++ b/swmmanywhere/paper/experimenter.py @@ -166,6 +166,7 @@ def process_parameters(jobid: int, # Run the model config['model_number'] = ix + logger.info(f"Running swmmanywhere for model {ix}") address, metrics = swmmanywhere.swmmanywhere(config) if metrics is None: diff --git a/swmmanywhere/paper/perform_sa.py b/swmmanywhere/paper/perform_sa.py new file mode 100644 index 00000000..8a4a220e --- /dev/null +++ b/swmmanywhere/paper/perform_sa.py @@ -0,0 +1,101 @@ +"""Perform sensitivity analysis on the results of the model runs.""" +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import pandas as pd +from SALib.analyze import sobol +from tqdm import tqdm + +from swmmanywhere.logging import logger +from swmmanywhere.paper import experimenter +from swmmanywhere.paper import plotting as swplt +from swmmanywhere.preprocessing import check_bboxes +from swmmanywhere.swmmanywhere import load_config + +# %% [markdown] +# ## Initialise directories and load results +# %% +# Load the configuration file and extract relevant data +if __name__ == 'main': + project = 'cranbrook' + base_dir = Path.home() / "Documents" / "data" / "swmmanywhere" + config_path = base_dir / project / f'{project}_hpc.yml' + config = load_config(config_path, validation = False) + config['base_dir'] = base_dir / project + objectives = config['metric_list'] + parameters = config['parameters_to_sample'] + + # Load the results + bbox = check_bboxes(config['bbox'], config['base_dir']) + results_dir = config['base_dir'] / f'bbox_{bbox}' / 'results' + fids = list(results_dir.glob('*_metrics.csv')) + dfs = [pd.read_csv(fid) for fid in tqdm(fids, total = len(fids))] + + # Calculate how many processors were used + nprocs = len(fids) + + # Concatenate the results + df = pd.concat(dfs) + + # Log deltacon0 because it can be extremely large + df['nc_deltacon0'] = np.log(df['nc_deltacon0']) + df = df.sort_values(by = 'iter') + + # Make a directory to store plots in + plot_fid = results_dir / 'plots' + plot_fid.mkdir(exist_ok=True, parents=True) + + # %% [markdown] + # ## Plot the objectives + # %% + # Highlight the behavioural indices + # (i.e., KGE, NSE, PBIAS are in some preferred range) + behavioral_indices = swplt.create_behavioral_indices(df) + + # Plot the objectives + swplt.plot_objectives(df, + parameters, + objectives, + behavioral_indices, + plot_fid) + + # %% [markdown] + # ## Perform Sensitivity Analysis + # %% + + # Formulate the SALib problem + problem = experimenter.formulate_salib_problem(parameters) + + # Calculate any missing samples + n_ideal = pd.DataFrame( + experimenter.generate_samples(parameters_to_select=parameters, + N=2**config['sample_magnitude']) + ).iter.nunique() + missing_iters = set(range(n_ideal)).difference(df.iter) + if missing_iters: + logger.warning(f"Missing {len(missing_iters)} iterations") + + # Perform the sensitivity analysis for groups + problem['outputs'] = objectives + rg = {objective: sobol.analyze(problem, + df[objective].iloc[0: + (2**(config['sample_magnitude'] + 1) * 10)] + .values, + print_to_console=False) + for objective in objectives} + + # Perform the sensitivity analysis for parameters + problemi = problem.copy() + del problemi['groups'] + ri = {objective: sobol.analyze(problemi, + df[objective].values, + print_to_console=False) + for objective in objectives} + + # Barplot of sensitvitiy indices + for r_, groups in zip([rg,ri], ['groups','parameters']): + swplt.plot_sensitivity_indices(r_, + objectives, + plot_fid / f'{groups}_indices.png') \ No newline at end of file diff --git a/swmmanywhere/paper/plotting.py b/swmmanywhere/paper/plotting.py new file mode 100644 index 00000000..473cea84 --- /dev/null +++ b/swmmanywhere/paper/plotting.py @@ -0,0 +1,139 @@ +"""Plotting SWMManywhere. + +A module with some built in plotting for SWMManywhere. +""" +from __future__ import annotations + +from pathlib import Path + +import matplotlib.pyplot as plt +import pandas as pd +from SALib.plotting.bar import plot as barplot + + +def create_behavioral_indices(df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: + """Create behavioral indices for a dataframe. + + Args: + df (pd.DataFrame): A dataframe containing the results. + + Returns: + tuple[pd.Series, pd.Series]: A tuple of two series, the first is the + behavioural indices for 'strict' objectives (KGE/NSE), the second + is the behavioural indices for less strict objectives (PBIAS). + """ + behavioural_ind_nse = ((df.loc[:, df.columns.str.contains('nse')] > 0) & \ + (df.loc[:, df.columns.str.contains('nse')] < 1)).any(axis=1) + behavioural_ind_kge = ((df.loc[:, df.columns.str.contains('kge')] > -0.41) &\ + (df.loc[:, df.columns.str.contains('kge')] < 1)).any(axis=1) + behavioural_ind_bias = (df.loc[:, + df.columns.str.contains('bias')].abs() < 0.1 + ).any(axis=1) + return behavioural_ind_nse | behavioural_ind_kge, behavioural_ind_bias + +def plot_objectives(df: pd.DataFrame, + parameters: list[str], + objectives: list[str], + behavioral_indices: tuple[pd.Series, pd.Series], + plot_fid: Path): + """Plot the objectives. + + Args: + df (pd.DataFrame): A dataframe containing the results. + parameters (list[str]): A list of parameters to plot. + objectives (list[str]): A list of objectives to plot. + behavioral_indices (tuple[pd.Series, pd.Series]): A tuple of two series + see create_behavioral_indices. + plot_fid (Path): The directory to save the plots to. + """ + n_rows_cols = int(len(objectives)**0.5 + 1) + for parameter in parameters: + fig, axs = plt.subplots(n_rows_cols, n_rows_cols, figsize=(10, 10)) + for ax, objective in zip(axs.flat, objectives): + setup_axes(ax, df, parameter, objective, behavioral_indices) + add_threshold_lines(ax, + objective, + df[parameter].min(), + df[parameter].max()) + fig.tight_layout() + fig.suptitle(parameter) + + fig.savefig(plot_fid / f"{parameter.replace('_', '-')}.png", dpi=500) + plt.close(fig) + return fig + +def setup_axes(ax: plt.Axes, + df: pd.DataFrame, + parameter: str, + objective: str, + behavioral_indices: tuple[pd.Series, pd.Series] + ): + """Set up the axes for plotting. + + Args: + ax (plt.Axes): The axes to plot on. + df (pd.DataFrame): A dataframe containing the results. + parameter (list[str]): The parameter to plot. + objective (list[str]): The objective to plot. + behavioral_indices (tuple[pd.Series, pd.Series]): A tuple of two series + see create_behavioral_indices. + """ + ax.scatter(df[parameter], df[objective], s=0.5, c='b') + ax.scatter(df.loc[behavioral_indices[1], parameter], + df.loc[behavioral_indices[1], objective], s=2, c='c') + ax.scatter(df.loc[behavioral_indices[0], parameter], + df.loc[behavioral_indices[0], objective], s=2, c='r') + ax.set_yscale('symlog') + ax.set_title(objective) + ax.grid(True) + if 'nse' in objective: + ax.set_ylim([-10, 1]) + +def add_threshold_lines(ax, objective, xmin, xmax): + """Add threshold lines to the axes. + + Args: + ax (plt.Axes): The axes to plot on. + objective (list[str]): The objective to plot. + xmin (float): The minimum x value. + xmax (float): The maximum x value. + """ + thresholds = { + 'bias': [-0.1, 0.1], + 'nse': [0], + 'kge': [-0.41] + } + for key, values in thresholds.items(): + if key in objective: + for value in values: + ax.plot([xmin, xmax], [value, value], 'k--') + +def plot_sensitivity_indices(r_: dict[str, pd.DataFrame], + objectives: list[str], + plot_fid: Path): + """Plot the sensitivity indices. + + Args: + r_ (dict[str, pd.DataFrame]): A dictionary containing the sensitivity + indices as produced by SALib.analyze. + objectives (list[str]): A list of objectives to plot. + plot_fid (Path): The directory to save the plots to. + """ + f,axs = plt.subplots(len(objectives),1,figsize=(10,10)) + for ix, ax, (objective, r) in zip(range(len(objectives)), axs, r_.items()): + total, first, second = r.to_df() + total['sp'] = (total['ST'] - first['S1']) + barplot(total,ax=ax) + if ix == 0: + ax.set_title('Total - First') + if ix != len(objectives) - 1: + ax.set_xticklabels([]) + else: + ax.set_xticklabels([x.replace('_','\n') for x in total.index], + rotation = 0) + + ax.set_ylabel(objective,rotation = 0,labelpad=20) + ax.get_legend().remove() + f.tight_layout() + f.savefig(plot_fid) + plt.close(f) \ No newline at end of file diff --git a/swmmanywhere/parameters.py b/swmmanywhere/parameters.py index f7d86a18..05bbf187 100644 --- a/swmmanywhere/parameters.py +++ b/swmmanywhere/parameters.py @@ -95,19 +95,19 @@ class TopologyDerivation(BaseModel): unit = "-", description = "Constant to apply to surface slope in topo derivation") - chahinian_angle_scaling: float = Field(default = 1, + chahinian_angle_scaling: float = Field(default = 0, le = 1, ge = 0, unit = "-", description = "Constant to apply to chahinian angle in topo derivation") - length_scaling: float = Field(default = 1, + length_scaling: float = Field(default = 0.1, le = 1, ge = 0, unit = "-", description = "Constant to apply to length in topo derivation") - contributing_area_scaling: float = Field(default = 1, + contributing_area_scaling: float = Field(default = 0.1, le = 1, ge = 0, unit = "-", @@ -115,25 +115,25 @@ class TopologyDerivation(BaseModel): chahinian_slope_exponent: float = Field(default = 1, le = 2, - ge = -2, + ge = 0, unit = "-", description = "Exponent to apply to surface slope in topo derivation") chahinian_angle_exponent: float = Field(default = 1, le = 2, - ge = -2, + ge = 0, unit = "-", description = "Exponent to apply to chahinian angle in topo derivation") length_exponent: float = Field(default = 1, le = 2, - ge = -2, + ge = 0, unit = "-", description = "Exponent to apply to length in topo derivation") contributing_area_exponent: float = Field(default = 1, le = 2, - ge = -2, + ge = 0, unit = "-", description = "Exponent to apply to contributing area in topo derivation") diff --git a/swmmanywhere/post_processing.py b/swmmanywhere/post_processing.py index 655a9457..8d1c9e51 100644 --- a/swmmanywhere/post_processing.py +++ b/swmmanywhere/post_processing.py @@ -64,6 +64,7 @@ def synthetic_write(addresses: FilePaths): subs['id'] = subs['id'].astype(str) subs['subcatchment'] = subs['id'] + '-sub' subs['rain_gage'] = 1 # TODO revise when revising storms + subs['area'] /= 10000 # convert to ha # Edges edges['u'] = edges['u'].astype(str) @@ -96,7 +97,7 @@ def synthetic_write(addresses: FilePaths): # TODO automatically match units to storm.csv? event = {'name' : '1', 'unit' : 'mm', - 'interval' : '01:00', + 'interval' : '05:00', 'fid' : str(addresses.precipitation) } diff --git a/swmmanywhere/swmmanywhere.py b/swmmanywhere/swmmanywhere.py index 052fb687..eb84d837 100644 --- a/swmmanywhere/swmmanywhere.py +++ b/swmmanywhere/swmmanywhere.py @@ -35,6 +35,7 @@ def swmmanywhere(config: dict) -> tuple[Path, dict | None]: tuple[Path, dict | None]: The address of generated .inp and metrics. """ # Create the project structure + logger.info("Creating project structure.") addresses = preprocessing.create_project_structure(config['bbox'], config['project'], config['base_dir'], @@ -45,6 +46,7 @@ def swmmanywhere(config: dict) -> tuple[Path, dict | None]: setattr(addresses, key, val) # Run downloads + logger.info("Running downloads.") api_keys = yaml.safe_load(config['api_keys'].open('r')) preprocessing.run_downloads(config['bbox'], addresses, @@ -52,24 +54,28 @@ def swmmanywhere(config: dict) -> tuple[Path, dict | None]: ) # Identify the starting graph + logger.info("Iterating graphs.") if config['starting_graph']: G = load_graph(config['starting_graph']) else: G = preprocessing.create_starting_graph(addresses) # Load the parameters and perform any manual overrides + logger.info("Loading and setting parameters.") params = parameters.get_full_parameters() for category, overrides in config.get('parameter_overrides', {}).items(): for key, val in overrides.items(): setattr(params[category], key, val) # Iterate the graph functions + logger.info("Iterating graph functions.") G = iterate_graphfcns(G, config['graphfcn_list'], params, addresses) # Save the final graph + logger.info("Saving final graph and writing inp file.") go.graph_to_geojson(G, addresses.nodes, addresses.edges, @@ -80,17 +86,21 @@ def swmmanywhere(config: dict) -> tuple[Path, dict | None]: synthetic_write(addresses) # Run the model + logger.info("Running the synthetic model.") synthetic_results = run(addresses.inp, **config['run_settings']) if os.getenv("SWMMANYWHERE_VERBOSE", "false").lower() == "true": + logger.info("Writing synthetic results.") synthetic_results.to_parquet(addresses.model /\ f'results.{addresses.extension}') # Get the real results if config['real']['results']: + logger.info("Loading real results.") # TODO.. bit messy real_results = pd.read_parquet(config['real']['results']) elif config['real']['inp']: + logger.info("Running the real model.") real_results = run(config['real']['inp'], **config['run_settings']) if os.getenv("SWMMANYWHERE_VERBOSE", "false").lower() == "true": @@ -101,6 +111,7 @@ def swmmanywhere(config: dict) -> tuple[Path, dict | None]: return addresses.inp, None # Iterate the metrics + logger.info("Iterating metrics.") metrics = iterate_metrics(synthetic_results, gpd.read_file(addresses.subcatchments), G, @@ -109,7 +120,7 @@ def swmmanywhere(config: dict) -> tuple[Path, dict | None]: load_graph(config['real']['graph']), config['metric_list'], params['metric_evaluation']) - + logger.info("Metrics complete") return addresses.inp, metrics def check_top_level_paths(config: dict): @@ -206,11 +217,33 @@ def check_parameters_to_sample(config: dict): return config -def load_config(config_path: Path): +def check_starting_graph(config: dict): + """Check the starting graph in the config. + + Args: + config (dict): The configuration. + + Raises: + FileNotFoundError: If the starting graph path does not exist. + """ + # If no starting graph, return + if not config.get('starting_graph', None): + return config + + # Check the starting graph exists and convert to Path + config['starting_graph'] = Path(config['starting_graph']) + if not config['starting_graph'].exists(): + raise FileNotFoundError(f"""starting_graph not found at + {config['starting_graph']}""") + + return config + +def load_config(config_path: Path, validation: bool = True): """Load, validate, and convert Paths in a configuration file. Args: config_path (Path): The path to the configuration file. + validation (bool, optional): Whether to validate the configuration. Returns: dict: The configuration. @@ -224,6 +257,9 @@ def load_config(config_path: Path): # Load the config config = yaml.safe_load(f) + if not validation: + return config + # Validate the config jsonschema.validate(instance = config, schema = schema) @@ -239,6 +275,9 @@ def load_config(config_path: Path): # Check the parameters to sample config = check_parameters_to_sample(config) + # Check starting graph + config = check_starting_graph(config) + return config @@ -263,6 +302,7 @@ def run(model: Path, """ with pyswmm.Simulation(str(model)) as sim: sim.start() + logger.info(f"{model} initialised in pyswmm") # Define the variables to store variables = { @@ -314,5 +354,5 @@ def run(model: Path, 'id' : getattr(storevar['object'], storevar['id'])}) - + logger.info("Model run complete.") return pd.DataFrame(results) \ No newline at end of file diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py index e81d5eef..fe28413d 100644 --- a/tests/test_metric_utilities.py +++ b/tests/test_metric_utilities.py @@ -383,9 +383,9 @@ def test_design_params(): def test_netcomp_iterate(): """Test the netcomp metrics and iterate_metrics.""" netcomp_results = {'nc_deltacon0' : 0.00129408, - 'nc_laplacian_dist' : 36.334773, - 'nc_laplacian_norm_dist' : 1.932007, - 'nc_adjacency_dist' : 3.542749, + 'nc_laplacian_dist' : 155.428234, + 'nc_laplacian_norm_dist' : 0.752901, + 'nc_adjacency_dist' : 226.22945, 'nc_resistance_distance' : 8.098548, 'nc_vertex_edge_distance' : 0.132075} G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') diff --git a/tests/test_plotting.py b/tests/test_plotting.py new file mode 100644 index 00000000..910d46c8 --- /dev/null +++ b/tests/test_plotting.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from pathlib import Path +from tempfile import TemporaryDirectory + +import pandas as pd + +from swmmanywhere.paper import plotting + + +def test_create_behavioural_indices(): + """Test the create_behavioural_indices function.""" + # Create a DataFrame with some dummy data + data = { + 'nse1': [-1, 0, 0.5, 1, -2], + 'nse2': [-1, 0, 0.5, 1, -2], + 'kge1': [-2, -0.4, 0, 0.5, 1], + 'kge2': [-0.41, -0.5, 0, 0.5, 1], + 'bias1': [-0.2, -0.09, 0, 0.1, 0.2], + 'bias2': [-0.2, -0.1, 0, 0.1, 0.2], + } + df = pd.DataFrame(data) + + # Call create_behavioral_indices with the dummy DataFrame + strict_indices, less_strict_indices = plotting.create_behavioral_indices(df) + + # Check that the returned series have the expected values + assert (strict_indices == [False, True, True, True, False]).all() + assert (less_strict_indices == [False, True, True, False, False]).all() + +def test_plot_objectives(): + """Test the plot_objectives function.""" + # Create a DataFrame with some dummy data + data = { + 'param1': [1, 2, 3, 4, 5], + 'param2': [2, 3, 4, 5, 6], + 'obj1': [0.1, 0.2, 0.3, 0.4, 0.5], + 'obj2': [0.2, 0.3, 0.4, 0.5, 0.6], + } + df = pd.DataFrame(data) + + # Define the parameters and objectives + parameters = ['param1', 'param2'] + objectives = ['obj1', 'obj2'] + + # Create dummy behavioral indices + strict_indices = pd.Series([True, True, False, False, True]) + less_strict_indices = pd.Series([True, False, True, False, True]) + behavioral_indices = (strict_indices, less_strict_indices) + + with TemporaryDirectory() as temp_dir: + # Call plot_objectives with the dummy DataFrame + plotting.plot_objectives(df, + parameters, + objectives, + behavioral_indices, + Path(temp_dir)) + + assert (Path(temp_dir) / 'param1.png').exists() + assert (Path(temp_dir) / 'param2.png').exists() + +def test_plot_sensitivity_indices(): + """Test the plot_sensitivity_indices function.""" + # Create a dictionary with some dummy data + class Data: + # Mock up SALib output + params = ['param_1', 'param_2', 'param_3', 'param_4', 'param_5'] + total = pd.DataFrame({'ST': [0.1, 0.2, 0.3, 0.4, 0.5]}, + index=params) + first = pd.DataFrame({'S1': [0.05, 0.1, 0.15, 0.2, 0.25]}, + index=params) + second = pd.DataFrame({'S2': [0.05, 0.1, 0.15, 0.2, 0.25]}, + index=params) + + def to_df(self): + return (self.total, self.first, self.second) + data = Data() + r_ = {'obj1': data, 'obj2': data} + + # Define the objectives + objectives = ['obj1','obj2'] + with TemporaryDirectory() as temp_dir: + fid = Path(temp_dir) / 'indices.png' + # Call plot_sensitivity_indices with the dummy dictionary + plotting.plot_sensitivity_indices(r_, objectives, fid) + + assert fid.exists() diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py index 68c414f7..acd6b150 100644 --- a/tests/test_post_processing.py +++ b/tests/test_post_processing.py @@ -116,7 +116,7 @@ def generate_data_dict(): rain_fid = 'storm.dat' event = {'name' : '1', 'unit' : 'mm', - 'interval' : '01:00', + 'interval' : '05:00', 'fid' : rain_fid} symbol = {'x' : 0, 'y' : 0, @@ -160,6 +160,11 @@ def test_synthetic_write(): comparison_file = addresses.model / "model_base.inp" template_fid = Path(__file__).parent.parent / 'swmmanywhere' / 'defs' /\ 'basic_drainage_all_bits.inp' + + # Manually convert to ha to make comparable to synthetic_write + data_dict['subs']['area'] /= 10000 + + # Write the model with data_dict_to_inp stt.data_dict_to_inp(stt.format_to_swmm_dict(**data_dict), template_fid, comparison_file) @@ -175,8 +180,8 @@ def test_synthetic_write(): diff = difflib.unified_diff( file1.readlines(), file2.readlines(), - fromfile=new_input_file, - tofile=comparison_file, + fromfile=str(new_input_file), + tofile=str(comparison_file), ) print(''.join(diff)) assert are_files_identical, "The files are not identical"