diff --git a/dev-requirements.txt b/dev-requirements.txt index 006102ed..852b7e5a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # pip-compile --extra=dev --output-file=dev-requirements.txt @@ -17,8 +17,10 @@ annotated-types==0.6.0 attrs==23.2.0 # via # fiona + # jsonschema # pytest-mypy # rasterio + # referencing build==1.0.3 # via pip-tools cdsapi==0.6.1 @@ -74,8 +76,6 @@ dill==0.3.7 # via multiprocess distlib==0.3.8 # via virtualenv -exceptiongroup==1.2.0 - # via pytest fastparquet==2023.10.1 # via swmmanywhere (pyproject.toml) filelock==3.13.1 @@ -100,6 +100,10 @@ geopandas==0.14.2 # swmmanywhere (pyproject.toml) geopy==2.4.1 # via swmmanywhere (pyproject.toml) +gitdb==4.0.11 + # via gitpython +gitpython==3.1.42 + # via swmmanywhere (pyproject.toml) identify==2.5.33 # via pre-commit idna==3.6 @@ -110,6 +114,10 @@ iniconfig==2.0.0 # via pytest joblib==1.3.2 # via swmmanywhere (pyproject.toml) +jsonschema==4.21.1 + # via swmmanywhere (pyproject.toml) +jsonschema-specifications==2023.12.1 + # via jsonschema julian==0.14 # via pyswmm kiwisolver==1.4.5 @@ -253,12 +261,20 @@ rasterio==1.3.9 # pysheds # rioxarray # swmmanywhere (pyproject.toml) +referencing==0.33.0 + # via + # jsonschema + # jsonschema-specifications requests==2.31.0 # via # cdsapi # osmnx rioxarray==0.15.1 # via swmmanywhere (pyproject.toml) +rpds-py==0.18.0 + # via + # jsonschema + # referencing ruff==0.1.11 # via swmmanywhere (pyproject.toml) salib==1.4.7 @@ -281,20 +297,14 @@ six==1.16.0 # via # fiona # python-dateutil +smmap==5.0.1 + # via gitdb snuggs==1.4.7 # via rasterio swmm-toolkit==0.15.3 # via pyswmm tifffile==2024.1.30 # via scikit-image -tomli==2.0.1 - # via - # build - # coverage - # mypy - # pip-tools - # pyproject-hooks - # pytest toolz==0.12.1 # via cytoolz tqdm==4.66.2 diff --git a/pyproject.toml b/pyproject.toml index a3c4e1a3..be65c610 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "geopy", "GitPython", "joblib", + "jsonschema", "loguru", "matplotlib", "netcdf4", @@ -94,4 +95,7 @@ skip = "swmmanywhere/defs/iso_converter.yml,*.inp" ignore-words-list = "gage,gages" [tool.refurb] -ignore = [184] +ignore = [ + 184, # Because some frankly bizarre suggestions + 109 # Because pyyaml doesn't support tuples + ] diff --git a/requirements.txt b/requirements.txt index 533f956a..32ffe117 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # pip-compile @@ -17,7 +17,9 @@ annotated-types==0.6.0 attrs==23.2.0 # via # fiona + # jsonschema # rasterio + # referencing cdsapi==0.6.1 # via swmmanywhere (pyproject.toml) certifi==2023.11.17 @@ -80,12 +82,20 @@ geopandas==0.14.2 # swmmanywhere (pyproject.toml) geopy==2.4.1 # via swmmanywhere (pyproject.toml) +gitdb==4.0.11 + # via gitpython +gitpython==3.1.42 + # via swmmanywhere (pyproject.toml) idna==3.6 # via requests imageio==2.33.1 # via scikit-image joblib==1.3.2 # via swmmanywhere (pyproject.toml) +jsonschema==4.21.1 + # via swmmanywhere (pyproject.toml) +jsonschema-specifications==2023.12.1 + # via jsonschema julian==0.14 # via pyswmm kiwisolver==1.4.5 @@ -195,12 +205,20 @@ rasterio==1.3.9 # pysheds # rioxarray # swmmanywhere (pyproject.toml) +referencing==0.33.0 + # via + # jsonschema + # jsonschema-specifications requests==2.31.0 # via # cdsapi # osmnx rioxarray==0.15.1 # via swmmanywhere (pyproject.toml) +rpds-py==0.18.0 + # via + # jsonschema + # referencing salib==1.4.7 # via swmmanywhere (pyproject.toml) scikit-image==0.22.0 @@ -221,6 +239,8 @@ six==1.16.0 # via # fiona # python-dateutil +smmap==5.0.1 + # via gitdb snuggs==1.4.7 # via rasterio swmm-toolkit==0.15.3 diff --git a/swmmanywhere/defs/schema.yml b/swmmanywhere/defs/schema.yml new file mode 100644 index 00000000..3653df92 --- /dev/null +++ b/swmmanywhere/defs/schema.yml @@ -0,0 +1,33 @@ +type: object +properties: + base_dir: {type: string} + project: {type: string} + bbox: {type: array, items: {type: number}, minItems: 4, maxItems: 4} + api_keys: {type: string} + run_settings: + type: object + properties: + reporting_iters: {type: integer, minimum: 1} + duration: {type: number} + storevars: + type: array + items: + type: string + enum: [flooding, flow, depth, runoff] + real: + type: ['object', 'null'] + properties: + inp: {type: string} + graph: {type: string} + subcatchments: {type: string} + results: {type: ['string', 'null']} + required: [graph, subcatchments] + anyOf: + - required: [inp] + - required: [results] + starting_graph: {type: ['string', 'null']} + graphfcn_list: {type: array, items: {type: string}} + metric_list: {type: array, items: {type: string}} + address_overrides: {type: ['object', 'null']} + parameter_overrides: {type: ['object', 'null']} +required: [base_dir, project, bbox, api_keys, graphfcn_list] \ No newline at end of file diff --git a/swmmanywhere/geospatial_utilities.py b/swmmanywhere/geospatial_utilities.py index c0b0021e..f5df9859 100644 --- a/swmmanywhere/geospatial_utilities.py +++ b/swmmanywhere/geospatial_utilities.py @@ -258,7 +258,7 @@ def reproject_graph(G: nx.Graph, G_new.nodes[u]['y']], [G_new.nodes[v]['x'], G_new.nodes[v]['y']]]) - + G_new.graph['crs'] = target_crs return G_new @@ -751,20 +751,23 @@ def edges_to_features(G: nx.Graph): return features def graph_to_geojson(graph: nx.Graph, - fid: Path, + fid_nodes: Path, + fid_edges: Path, crs: str): """Write a graph to a GeoJSON file. Args: graph (nx.Graph): The input graph. - fid (Path): The filepath to save the GeoJSON file. + fid_nodes (Path): The filepath to save the nodes GeoJSON file. + fid_edges (Path): The filepath to save the edges GeoJSON file. crs (str): The CRS of the graph. """ graph = graph.copy() nodes = nodes_to_features(graph) edges = edges_to_features(graph) - for iterable, label in zip([nodes, edges], ['nodes', 'edges']): + for iterable, fid in zip([nodes, edges], + [fid_nodes, fid_edges]): geojson = { 'type': 'FeatureCollection', 'features' : iterable, @@ -775,7 +778,6 @@ def graph_to_geojson(graph: nx.Graph, } } } - fid_ = fid.with_stem(fid.stem + f'_{label}').with_suffix('.geojson') - with fid_.open('w') as output_file: + with fid.open('w') as output_file: json.dump(geojson, output_file, indent=2) \ No newline at end of file diff --git a/swmmanywhere/graph_utilities.py b/swmmanywhere/graph_utilities.py index 72abbdf0..aa755a0d 100644 --- a/swmmanywhere/graph_utilities.py +++ b/swmmanywhere/graph_utilities.py @@ -157,9 +157,32 @@ def get_osmid_id(data: dict) -> Hashable: id_ = id_[0] return id_ +def iterate_graphfcns(G: nx.Graph, + graphfcn_list: list[str], + params: dict, + addresses: parameters.FilePaths) -> nx.Graph: + """Iterate a list of graph functions over a graph. + + Args: + G (nx.Graph): The graph to iterate over. + graphfcn_list (list[str]): A list of graph functions to iterate. + params (dict): A dictionary of parameters to pass to the graph + functions. + addresses (parameters.FilePaths): A FilePaths parameter object + + Returns: + nx.Graph: The graph after the graph functions have been applied. + """ + not_exists = [g for g in graphfcn_list if g not in graphfcns] + if not_exists: + raise ValueError(f"Graphfcns are not registered:\n{', '.join(not_exists)}") + for function in graphfcn_list: + G = graphfcns[function](G, addresses = addresses, **params) + logger.info(f"graphfcn: {function} completed.") + return G + @register_graphfcn class assign_id(BaseGraphFunction, - required_edge_attributes = ['osmid'], adds_edge_attributes = ['id'] ): """assign_id class.""" @@ -180,8 +203,12 @@ def __call__(self, Returns: G (nx.Graph): The same graph with an ID assigned to each edge """ + edge_ids: set[str] = set() for u, v, data in G.edges(data=True): - data['id'] = get_osmid_id(data) + data['id'] = f'{u}-{v}' + if data['id'] in edge_ids: + logger.warning(f"Duplicate edge ID: {data['id']}") + edge_ids.add(data['id']) return G @register_graphfcn @@ -423,7 +450,7 @@ def __call__(self, G: nx.Graph, subs_gdf = go.derive_subcatchments(G,temp_fid) # Calculate runoff coefficient (RC) - if addresses.building.suffix == '.parquet': + if addresses.building.suffix in ('.geoparquet','.parquet'): buildings = gpd.read_parquet(addresses.building) else: buildings = gpd.read_file(addresses.building) @@ -451,7 +478,7 @@ def __call__(self, G: nx.Graph, @register_graphfcn class set_elevation(BaseGraphFunction, required_node_attributes = ['x', 'y'], - adds_node_attributes = ['elevation']): + adds_node_attributes = ['surface_elevation']): """set_elevation class.""" def __call__(self, G: nx.Graph, @@ -477,12 +504,12 @@ def __call__(self, G: nx.Graph, y, addresses.elevation) elevations_dict = {id_: elev for id_, elev in zip(G.nodes, elevations)} - nx.set_node_attributes(G, elevations_dict, 'elevation') + nx.set_node_attributes(G, elevations_dict, 'surface_elevation') return G @register_graphfcn class set_surface_slope(BaseGraphFunction, - required_node_attributes = ['elevation'], + required_node_attributes = ['surface_elevation'], adds_edge_attributes = ['surface_slope']): """set_surface_slope class.""" @@ -502,7 +529,8 @@ def __call__(self, G: nx.Graph, """ G = G.copy() # Compute the slope for each edge - slope_dict = {(u, v, k): (G.nodes[u]['elevation'] - G.nodes[v]['elevation']) + slope_dict = {(u, v, k): (G.nodes[u]['surface_elevation'] - \ + G.nodes[v]['surface_elevation']) / d['length'] for u, v, k, d in G.edges(data=True, keys=True)} @@ -977,8 +1005,9 @@ def process_successors(G: nx.Graph, @register_graphfcn class pipe_by_pipe(BaseGraphFunction, - required_edge_attributes = ['length', 'elevation'], - required_node_attributes = ['contributing_area', 'elevation'], + required_edge_attributes = ['length'], + required_node_attributes = ['contributing_area', + 'surface_elevation'], adds_edge_attributes = ['diameter'], adds_node_attributes = ['chamber_floor_elevation']): """pipe_by_pipe class.""" @@ -1015,7 +1044,7 @@ def __call__(self, G (nx.Graph): A graph """ G = G.copy() - surface_elevations = {n : d['elevation'] for n, d in G.nodes(data=True)} + surface_elevations = nx.get_node_attributes(G, 'surface_elevation') topological_order = list(nx.topological_sort(G)) chamber_floor = {} edge_diams: dict[tuple[Hashable,Hashable,int],float] = {} diff --git a/swmmanywhere/metric_utilities.py b/swmmanywhere/metric_utilities.py index 10ea285d..6a3d61b2 100644 --- a/swmmanywhere/metric_utilities.py +++ b/swmmanywhere/metric_utilities.py @@ -55,6 +55,42 @@ def __getattr__(self, name): metrics = MetricRegistry() +def iterate_metrics(synthetic_results: pd.DataFrame, + synthetic_subs: gpd.GeoDataFrame, + synthetic_G: nx.Graph, + real_results: pd.DataFrame, + real_subs: gpd.GeoDataFrame, + real_G: nx.Graph, + metric_list: list[str]) -> dict[str, float]: + """Iterate a list of metrics over a graph. + + Args: + synthetic_results (pd.DataFrame): The synthetic results. + synthetic_subs (gpd.GeoDataFrame): The synthetic subcatchments. + synthetic_G (nx.Graph): The synthetic graph. + real_results (pd.DataFrame): The real results. + real_subs (gpd.GeoDataFrame): The real subcatchments. + real_G (nx.Graph): The real graph. + metric_list (list[str]): A list of metrics to iterate. + + Returns: + dict[str, float]: The results of the metrics. + """ + not_exists = [m for m in metric_list if m not in metrics] + if not_exists: + raise ValueError(f"Metrics are not registered:\n{', '.join(not_exists)}") + + kwargs = { + "synthetic_results": synthetic_results, + "synthetic_subs": synthetic_subs, + "synthetic_G": synthetic_G, + "real_results": real_results, + "real_subs": real_subs, + "real_G": real_G, + } + + return {m : metrics[m](**kwargs) for m in metric_list} + def extract_var(df: pd.DataFrame, var: str) -> pd.DataFrame: """Extract var from a dataframe.""" @@ -80,11 +116,11 @@ def align_calc_nse(synthetic_results: pd.DataFrame, # Extract data syn_data = extract_var(synthetic_results, variable) - syn_data = syn_data.loc[syn_data.object.isin(syn_ids)] + syn_data = syn_data.loc[syn_data.id.isin(syn_ids)] syn_data = syn_data.groupby('date').value.sum() real_data = extract_var(real_results, variable) - real_data = real_data.loc[real_data.object.isin(real_ids)] + real_data = real_data.loc[real_data.id.isin(real_ids)] real_data = real_data.groupby('date').value.sum() # Align data @@ -224,7 +260,7 @@ def dominant_outlet(G: nx.DiGraph, subgraph of the graph of nodes that drain to that outlet. Args: - G (nx.Graph): The graph. + G (nx.DiGraph): The graph. results (pd.DataFrame): The results, which include a 'flow' and 'id' column. @@ -240,8 +276,8 @@ def dominant_outlet(G: nx.DiGraph, # Identify the outlet with the highest flow outlet_flows = results.loc[(results.variable == 'flow') & - (results.object.isin(outlet_arcs))] - max_outlet_arc = outlet_flows.groupby('object').value.median().idxmax() + (results.id.isin(outlet_arcs))] + max_outlet_arc = outlet_flows.groupby('id').value.median().idxmax() max_outlet = [v for u,v,d in G.edges(data=True) if d['id'] == max_outlet_arc][0] @@ -390,12 +426,12 @@ def _f(x): return np.trapz(x.value,x.duration) syn_flooding = extract_var(synthetic_results, - 'flooding').groupby('object').apply(_f) + 'flooding').groupby('id').apply(_f) syn_area = synthetic_subs.impervious_area.sum() syn_tot = syn_flooding.sum() / syn_area real_flooding = extract_var(real_results, - 'flooding').groupby('object').apply(_f) + 'flooding').groupby('id').apply(_f) real_area = real_subs.impervious_area.sum() real_tot = real_flooding.sum() / real_area @@ -456,7 +492,6 @@ def outlet_nse_flow(synthetic_G: nx.Graph, syn_arc, real_arc) - @metrics.register def outlet_nse_flooding(synthetic_G: nx.Graph, synthetic_results: pd.DataFrame, diff --git a/swmmanywhere/parameters.py b/swmmanywhere/parameters.py index 0ebad60d..b0b90f57 100644 --- a/swmmanywhere/parameters.py +++ b/swmmanywhere/parameters.py @@ -129,18 +129,6 @@ def check_weights(cls, values): raise ValueError(f"Missing {weight}_exponent") return values -# TODO move this to tests and run it if we're happy with this way of doing things -class NewTopo(TopologyDerivation): - """Demo for changing weights that should break the validator.""" - weights: list = Field(default = ['chahinian_slope', - 'chahinian_angle', - 'length', - 'contributing_area', - 'test'], - min_items = 1, - unit = "-", - description = "Weights for topo derivation") - class HydraulicDesign(BaseModel): """Parameters for hydraulic design.""" diameters: list = Field(default = np.linspace(0.15,3,int((3-0.15)/0.075) + 1), @@ -249,8 +237,20 @@ def _generate_bbox(self): def _generate_model(self): return self._generate_property(f'model_{self.model_number}', 'bbox') + def _generate_inp(self): + return self._generate_property(f'model_{self.model_number}.inp', + 'model') def _generate_subcatchments(self): - return self._generate_property(f'subcatchments.{self.extension}', + return self._generate_property(f'subcatchments.geo{self.extension}', + 'model') + def _generate_graph(self): + return self._generate_property(f'graph.{self.extension}', + 'model') + def _generate_nodes(self): + return self._generate_property(f'nodes.geo{self.extension}', + 'model') + def _generate_edges(self): + return self._generate_property(f'edges.geo{self.extension}', 'model') def _generate_download(self): return self._generate_property('download', @@ -264,7 +264,7 @@ def _generate_street(self): def _generate_elevation(self): return self._generate_property('elevation.tif', 'download') def _generate_building(self): - return self._generate_property(f'building.{self.extension}', + return self._generate_property(f'building.geo{self.extension}', 'download') def _generate_precipitation(self): return self._generate_property(f'precipitation.{self.extension}', diff --git a/swmmanywhere/post_processing.py b/swmmanywhere/post_processing.py index 45c3f905..bf75b89a 100644 --- a/swmmanywhere/post_processing.py +++ b/swmmanywhere/post_processing.py @@ -16,12 +16,14 @@ import pandas as pd import yaml +from swmmanywhere.parameters import FilePaths -def synthetic_write(model_dir: Path): + +def synthetic_write(addresses: FilePaths): """Load synthetic data and write to SWMM input file. Loads nodes, edges and subcatchments from synthetic data, assumes that - these are all located in `model_dir`. Fills in appropriate default values + these are all located in `addresses`. Fills in appropriate default values for many SWMM parameters. More parameters are available to edit (see defs/swmm_conversion.yml). Identifies outfalls and automatically ensures that they have only one link to them (as is required by SWMM). Formats @@ -29,17 +31,17 @@ def synthetic_write(model_dir: Path): a SWMM input (.inp) file. Args: - model_dir (str): model directory address. Assumes a format along the - lines of 'model_2', where the number is the model number. + addresses (FilePaths): A dictionary of file paths. """ # TODO these node/edge names are probably not good or extendible defulats # revisit once overall software architecture is more clear. - nodes = gpd.read_file(model_dir / 'pipe_by_pipe_nodes.geojson') - edges = gpd.read_file(model_dir / 'pipe_by_pipe_edges.geojson') - subs = gpd.read_file(model_dir / 'subcatchments.geojson') - + nodes = gpd.read_file(addresses.nodes) + edges = gpd.read_file(addresses.edges) + subs = gpd.read_file(addresses.subcatchments) + subs = subs.loc[subs.id.isin(nodes.id)] + # Extract SWMM relevant data - edges = edges[['u','v','diameter','length']] + edges = edges[['id','u','v','diameter','length']] nodes = nodes[['id', 'x', 'y', @@ -83,22 +85,20 @@ def synthetic_write(model_dir: Path): new_edges = edges.iloc[0:outfalls.shape[0]].copy() new_edges['u'] = outfalls['id'].str.replace('_outfall','').values new_edges['v'] = outfalls['id'].values + new_edges['id'] = [f'{u}-{v}' for u,v in zip(new_edges['u'], new_edges['v'])] new_edges['diameter'] = 15 # TODO .. big pipe to enable all outfall... new_edges['length'] = 1 # Append new edges edges = pd.concat([edges, new_edges], ignore_index = True) - # Name all edges - edges['id'] = edges.u.astype(str) + '-' + edges.v.astype(str) - # Create event # TODO will need some updating if multiple rain gages # TODO automatically match units to storm.csv? event = {'name' : '1', 'unit' : 'mm', 'interval' : '01:00', - 'fid' : 'storm.dat' # overwritten at runtime + 'fid' : str(addresses.precipitation) } # Locate raingage(s) on the map @@ -111,10 +111,6 @@ def synthetic_write(model_dir: Path): existing_input_file = Path(__file__).parent / 'defs' /\ 'basic_drainage_all_bits.inp' - # New input file - model_number = model_dir.name.split('_')[-1] - new_input_file = model_dir / f'model_{model_number}.inp' - # Format to dict data_dict = format_to_swmm_dict(nodes, outfalls, @@ -124,7 +120,7 @@ def synthetic_write(model_dir: Path): symbol) # Write new input file - data_dict_to_inp(data_dict, existing_input_file, new_input_file) + data_dict_to_inp(data_dict, existing_input_file, addresses.inp) def overwrite_section(data: np.ndarray, diff --git a/swmmanywhere/preprocessing.py b/swmmanywhere/preprocessing.py index 527413bc..995653dd 100644 --- a/swmmanywhere/preprocessing.py +++ b/swmmanywhere/preprocessing.py @@ -145,7 +145,7 @@ def write_df(df: pd.DataFrame | gpd.GeoDataFrame, df (DataFrame): DataFrame to write to a file. fid (Path): Path to the file. """ - if fid.suffix == '.parquet': + if fid.suffix in ('.geoparquet','.parquet'): df.to_parquet(fid) elif fid.suffix == '.json': if isinstance(df, gpd.GeoDataFrame): @@ -169,7 +169,7 @@ def prepare_precipitation(bbox: tuple[float, float, float, float], precip = go.reproject_df(precip, source_crs, target_crs) write_df(precip, addresses.precipitation) -def prepare_elvation(bbox: tuple[float, float, float, float], +def prepare_elevation(bbox: tuple[float, float, float, float], addresses: parameters.FilePaths, api_keys: dict[str, str], target_crs: str): @@ -256,7 +256,7 @@ def run_downloads(bbox: tuple[float, float, float, float], prepare_precipitation(bbox, addresses, api_keys, target_crs) # Download elevation data - prepare_elvation(bbox, addresses, api_keys, target_crs) + prepare_elevation(bbox, addresses, api_keys, target_crs) # Download building data prepare_building(bbox, addresses, target_crs) diff --git a/swmmanywhere/swmmanywhere.py b/swmmanywhere/swmmanywhere.py index a8afb1db..89923163 100644 --- a/swmmanywhere/swmmanywhere.py +++ b/swmmanywhere/swmmanywhere.py @@ -5,8 +5,193 @@ """ from pathlib import Path +import geopandas as gpd +import jsonschema import pandas as pd import pyswmm +import yaml + +import swmmanywhere.geospatial_utilities as go +from swmmanywhere import preprocessing +from swmmanywhere.graph_utilities import iterate_graphfcns, load_graph, save_graph +from swmmanywhere.logging import logger +from swmmanywhere.metric_utilities import iterate_metrics +from swmmanywhere.parameters import get_full_parameters +from swmmanywhere.post_processing import synthetic_write + + +def swmmanywhere(config: dict): + """Run SWMManywhere processes. + + This function runs the SWMManywhere processes, including downloading data, + preprocessing the graphfcns, running the model, and comparing the results + to real data using metrics. + + Args: + config (dict): The loaded config as a dict. + + Returns: + pd.DataFrame: A DataFrame containing the results. + """ + # Create the project structure + addresses = preprocessing.create_project_structure(config['bbox'], + config['project'], + config['base_dir'] + ) + + for key, val in config.get('address_overrides', {}).items(): + setattr(addresses, key, val) + + # Run downloads + api_keys = yaml.safe_load(config['api_keys'].open('r')) + preprocessing.run_downloads(config['bbox'], + addresses, + api_keys + ) + + # Identify the starting graph + if config['starting_graph']: + G = load_graph(config['starting_graph']) + else: + G = preprocessing.create_starting_graph(addresses) + + # Load the parameters and perform any manual overrides + parameters = get_full_parameters() + for category, overrides in config.get('parameter_overrides', {}).items(): + for key, val in overrides.items(): + setattr(parameters[category], key, val) + + # Iterate the graph functions + G = iterate_graphfcns(G, + config['graphfcn_list'], + parameters, + addresses) + + # Save the final graph + go.graph_to_geojson(G, + addresses.nodes, + addresses.edges, + G.graph['crs'] + ) + save_graph(G, addresses.graph) + # Write to .inp + synthetic_write(addresses) + + # Run the model + synthetic_results = run(addresses.inp, + **config['run_settings']) + + # Get the real results + if config['real']['results']: + # TODO.. bit messy + real_results = pd.read_parquet(config['real']['results']) + elif config['real']['inp']: + real_results = run(config['real']['inp'], + **config['run_settings']) + else: + logger.info("No real network provided, returning SWMM .inp file.") + return addresses.inp + + # Iterate the metrics + metrics = iterate_metrics(synthetic_results, + gpd.read_file(addresses.subcatchments), + G, + real_results, + gpd.read_file(config['real']['subcatchments']), + load_graph(config['real']['graph']), + config['metric_list']) + + return metrics + +def check_top_level_paths(config: dict): + """Check the top level paths in the config. + + Args: + config (dict): The configuration. + + Raises: + FileNotFoundError: If a top level path does not exist. + """ + for key in ['base_dir', 'api_keys']: + if not Path(config[key]).exists(): + raise FileNotFoundError(f"{key} not found at {config[key]}") + config[key] = Path(config[key]) + return config + +def check_address_overrides(config: dict): + """Check the address overrides in the config. + + Args: + config (dict): The configuration. + + Raises: + FileNotFoundError: If an address override path does not exist. + """ + overrides = config.get('address_overrides', None) + + if not overrides: + return config + + for key, path in overrides.items(): + if not Path(path).exists(): + raise FileNotFoundError(f"{key} not found at {path}") + config['address_overrides'][key] = Path(path) + return config + +def check_real_network_paths(config: dict): + """Check the paths to the real network in the config. + + Args: + config (dict): The configuration. + + Raises: + FileNotFoundError: If a real network path does not exist. + """ + real = config.get('real', None) + + if not real: + return config + + for key, path in real.items(): + if not isinstance(path, str): + continue + if not Path(path).exists(): + raise FileNotFoundError(f"{key} not found at {path}") + config['real'][key] = Path(path) + + return config + +def load_config(config_path: Path): + """Load, validate, and convert Paths in a configuration file. + + Args: + config_path (Path): The path to the configuration file. + + Returns: + dict: The configuration. + """ + # Load the schema + schema_fid = Path(__file__).parent / 'defs' / 'schema.yml' + with schema_fid.open('r') as file: + schema = yaml.safe_load(file) + + with config_path.open('r') as f: + # Load the config + config = yaml.safe_load(f) + + # Validate the config + jsonschema.validate(instance = config, schema = schema) + + # Check top level paths + config = check_top_level_paths(config) + + # Check address overrides + config = check_address_overrides(config) + + # Check real network paths + config = check_real_network_paths(config) + + return config def run(model: Path, diff --git a/tests/test_data/building.geoparquet b/tests/test_data/building.geoparquet new file mode 100644 index 00000000..a758595f Binary files /dev/null and b/tests/test_data/building.geoparquet differ diff --git a/tests/test_data/demo_config.yml b/tests/test_data/demo_config.yml new file mode 100644 index 00000000..16b7e5f4 --- /dev/null +++ b/tests/test_data/demo_config.yml @@ -0,0 +1,61 @@ +base_dir: /path/to/base/directory +project: demo +bbox: [0.04020, 51.55759, 0.09826, 51.62050] +api_keys: /path/to/api/keys.yml +run_settings: + reporting_iters: 100 + duration: 86400 + storevars: [flooding, flow] +real: + inp: /path/to/real/model.inp + graph: /path/to/real/graph.json + subcatchments: /path/to/real/subcatchments.geojson + results: null +starting_graph: null +graphfcn_list: + - assign_id + - format_osmnx_lanes + - double_directed + - split_long_edges + - calculate_contributing_area + - set_elevation + - set_surface_slope + - set_chahinian_slope + - set_chahinian_angle + - calculate_weights + - identify_outlets + - derive_topology + - pipe_by_pipe + - assign_id +metric_list: + - nc_deltacon0 + - nc_laplacian_dist + - nc_laplacian_norm_dist + - nc_adjacency_dist + - nc_vertex_edge_distance + - nc_resistance_distance + - bias_flood_depth + - kstest_edge_betweenness + - kstest_betweenness + - outlet_nse_flow + - outlet_nse_flooding +parameter_overrides: + hydraulic_design: + diameters: + - 0.15 + - 0.225 + - 0.3 + - 0.375 + - 0.45 + - 0.525 + - 0.6 + - 0.675 + - 0.75 + - 0.825 + - 0.9 + - 1.125 + - 1.35 + - 1.5 + - 1.95 + - 3.0 +address_overrides: null \ No newline at end of file diff --git a/tests/test_geospatial_utilities.py b/tests/test_geospatial_utilities.py index 5c8a1017..6e6c048b 100644 --- a/tests/test_geospatial_utilities.py +++ b/tests/test_geospatial_utilities.py @@ -378,7 +378,10 @@ def test_graph_to_geojson(): crs = G.graph['crs'] with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) - go.graph_to_geojson(G, temp_path / 'graph.geojson', crs) + go.graph_to_geojson(G, + temp_path / 'graph_nodes.geojson', + temp_path / 'graph_edges.geojson', + crs) gdf = gpd.read_file(temp_path / 'graph_nodes.geojson') assert gdf.crs == crs assert gdf.shape[0] == len(G.nodes) diff --git a/tests/test_graph_utilities.py b/tests/test_graph_utilities.py index 21925fac..7601fdba 100644 --- a/tests/test_graph_utilities.py +++ b/tests/test_graph_utilities.py @@ -13,7 +13,7 @@ from swmmanywhere import parameters from swmmanywhere.graph_utilities import graphfcns as gu -from swmmanywhere.graph_utilities import load_graph, save_graph +from swmmanywhere.graph_utilities import iterate_graphfcns, load_graph, save_graph def load_street_network(): @@ -40,7 +40,7 @@ def test_assign_id(): G = gu.assign_id(G) for u, v, data in G.edges(data=True): assert 'id' in data.keys() - assert isinstance(data['id'], int) + assert isinstance(data['id'], str) def test_double_directed(): """Test the double_directed function.""" @@ -116,9 +116,9 @@ def test_set_elevation_and_slope(): addresses.elevation = Path(__file__).parent / 'test_data' / 'elevation.tif' G = gu.set_elevation(G, addresses) for id_, data in G.nodes(data=True): - assert 'elevation' in data.keys() - assert math.isfinite(data['elevation']) - assert data['elevation'] > 0 + assert 'surface_elevation' in data.keys() + assert math.isfinite(data['surface_elevation']) + assert data['surface_elevation'] > 0 G = gu.set_surface_slope(G) for u, v, data in G.edges(data=True): @@ -228,7 +228,7 @@ def test_pipe_by_pipe(): """Test the pipe_by_pipe function.""" G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') for ix, (u,d) in enumerate(G.nodes(data=True)): - d['elevation'] = ix + d['surface_elevation'] = ix d['contributing_area'] = ix params = parameters.HydraulicDesign() @@ -241,4 +241,20 @@ def test_pipe_by_pipe(): for u, d in G.nodes(data=True): assert 'chamber_floor_elevation' in d.keys() assert math.isfinite(d['chamber_floor_elevation']) - \ No newline at end of file + +def test_iterate_graphfcns(): + """Test the iterate_graphfcns function.""" + G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') + params = parameters.get_full_parameters() + addresses = parameters.FilePaths(base_dir = None, + project_name = None, + bbox_number = None, + model_number = None) + G = iterate_graphfcns(G, + ['assign_id', + 'format_osmnx_lanes'], + params, + addresses) + for u, v, d in G.edges(data=True): + assert 'id' in d.keys() + assert 'width' in d.keys() diff --git a/tests/test_metric_utilities.py b/tests/test_metric_utilities.py index 60350e0f..eabd19f2 100644 --- a/tests/test_metric_utilities.py +++ b/tests/test_metric_utilities.py @@ -54,14 +54,14 @@ def test_bias_flood_depth(): """Test the bias_flood_depth metric.""" # Create synthetic and real data synthetic_results = pd.DataFrame({ - 'object': ['obj1', 'obj1','obj2','obj2'], + 'id': ['obj1', 'obj1','obj2','obj2'], 'value': [10, 20, 5, 2], 'variable': 'flooding', 'date' : pd.to_datetime(['2021-01-01 00:00:00','2021-01-01 00:05:00', '2021-01-01 00:00:00','2021-01-01 00:05:00']) }) real_results = pd.DataFrame({ - 'object': ['obj1', 'obj1','obj2','obj2'], + 'id': ['obj1', 'obj1','obj2','obj2'], 'value': [15, 25, 10, 20], 'variable': 'flooding', 'date' : pd.to_datetime(['2021-01-01 00:00:00','2021-01-01 00:05:00', @@ -131,19 +131,19 @@ def test_outlet_nse_flow(): subs = get_subs() # Mock results - results = pd.DataFrame([{'object' : 4253560, + results = pd.DataFrame([{'id' : 4253560, 'variable' : 'flow', 'value' : 10, 'date' : pd.to_datetime('2021-01-01').date()}, - {'object' : '', + {'id' : '', 'variable' : 'flow', 'value' : 5, 'date' : pd.to_datetime('2021-01-01').date()}, - {'object' : 4253560, + {'id' : 4253560, 'variable' : 'flow', 'value' : 5, 'date' : pd.to_datetime('2021-01-01 00:00:05')}, - {'object' : '', + {'id' : '', 'variable' : 'flow', 'value' : 2, 'date' : pd.to_datetime('2021-01-01 00:00:05')}]) @@ -173,7 +173,7 @@ def test_outlet_nse_flow(): new_outlet, 'outlet') G_.remove_node(12354833) - results_.loc[results_.object == 4253560, 'object'] = 725226531 + results_.loc[results_.id == 4253560, 'id'] = 725226531 # Calculate NSE (mean results) val = mu.metrics.outlet_nse_flow(synthetic_G = G_, @@ -200,35 +200,35 @@ def test_outlet_nse_flooding(): subs = get_subs() # Mock results - results = pd.DataFrame([{'object' : 4253560, + results = pd.DataFrame([{'id' : 4253560, 'variable' : 'flow', 'value' : 10, 'date' : pd.to_datetime('2021-01-01 00:00:00')}, - {'object' : 4253560, + {'id' : 4253560, 'variable' : 'flow', 'value' : 5, 'date' : pd.to_datetime('2021-01-01 00:00:05')}, - {'object' : 25472468, + {'id' : 25472468, 'variable' : 'flooding', 'value' : 4.5, 'date' : pd.to_datetime('2021-01-01 00:00:00')}, - {'object' : 770549936, + {'id' : 770549936, 'variable' : 'flooding', 'value' : 5, 'date' : pd.to_datetime('2021-01-01 00:00:00')}, - {'object' : 109753, + {'id' : 109753, 'variable' : 'flooding', 'value' : 10, 'date' : pd.to_datetime('2021-01-01 00:00:00')}, - {'object' : 25472468, + {'id' : 25472468, 'variable' : 'flooding', 'value' : 0, 'date' : pd.to_datetime('2021-01-01 00:00:05')}, - {'object' : 770549936, + {'id' : 770549936, 'variable' : 'flooding', 'value' : 5, 'date' : pd.to_datetime('2021-01-01 00:00:05')}, - {'object' : 109753, + {'id' : 109753, 'variable' : 'flooding', 'value' : 15, 'date' : pd.to_datetime('2021-01-01 00:00:05')}]) @@ -243,7 +243,7 @@ def test_outlet_nse_flooding(): # Calculate NSE (mean results) results_ = results.copy() - results_.loc[results_.object.isin([770549936, 25472468]),'value'] = [14.5 / 4] * 4 + results_.loc[results_.id.isin([770549936, 25472468]),'value'] = [14.5 / 4] * 4 val = mu.metrics.outlet_nse_flooding(synthetic_G = G, synthetic_results = results_, real_G = G, @@ -279,24 +279,38 @@ def test_outlet_nse_flooding(): real_subs = subs) assert val == 0.0 -def test_netcomp(): - """Test the netcomp metrics.""" +def test_netcomp_iterate(): + """Test the netcomp metrics and iterate_metrics.""" netcomp_results = {'nc_deltacon0' : 0.00129408, 'nc_laplacian_dist' : 36.334773, 'nc_laplacian_norm_dist' : 1.932007, 'nc_adjacency_dist' : 3.542749, 'nc_resistance_distance' : 8.098548, 'nc_vertex_edge_distance' : 0.132075} - - for func, val in netcomp_results.items(): - G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') - val_ = getattr(mu.metrics, func)(synthetic_G = G, real_G = G) - assert val_ == 0.0, func - - G_ = load_graph(Path(__file__).parent / 'test_data' / 'street_graph.json') - val_ = getattr(mu.metrics, func)(synthetic_G = G_, real_G = G) - assert np.isclose(val, val_), func - + G = load_graph(Path(__file__).parent / 'test_data' / 'graph_topo_derived.json') + metrics = mu.iterate_metrics(synthetic_G = G, + synthetic_subs = None, + synthetic_results = None, + real_G = G, + real_subs = None, + real_results = None, + metric_list = netcomp_results.keys()) + for metric, val in metrics.items(): + assert metric in netcomp_results + assert np.isclose(val, 0) + + G_ = load_graph(Path(__file__).parent / 'test_data' / 'street_graph.json') + metrics = mu.iterate_metrics(synthetic_G = G_, + synthetic_subs = None, + synthetic_results = None, + real_G = G, + real_subs = None, + real_results = None, + metric_list = netcomp_results.keys()) + for metric, val in metrics.items(): + assert metric in netcomp_results + assert np.isclose(val, netcomp_results[metric]) + def test_subcatchment_nse_flooding(): """Test the outlet_nse_flow metric.""" # Load data @@ -379,4 +393,4 @@ def test_subcatchment_nse_flooding(): real_G = G, real_results = results, real_subs = subs) - assert val == 1.0 \ No newline at end of file + assert val == 1.0 diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py index ff3f4020..901395e4 100644 --- a/tests/test_post_processing.py +++ b/tests/test_post_processing.py @@ -9,6 +9,7 @@ import pyswmm from shapely import geometry as sgeom +from swmmanywhere import parameters from swmmanywhere import post_processing as stt fid = Path(__file__).parent.parent / 'swmmanywhere' / 'defs' /\ @@ -131,25 +132,30 @@ def test_synthetic_write(): data_dict = generate_data_dict() with tempfile.TemporaryDirectory() as model_dir: model_dir = Path(model_dir) - model_dir = model_dir / 'model_1' - model_dir.mkdir() + addresses = parameters.FilePaths(base_dir = model_dir, + project_name = 'test', + bbox_number = 1, + extension = 'json', + model_number = 0) + addresses.model.mkdir(parents=True, exist_ok=True) + addresses.precipitation = 'storm.dat' # Write the model with synthetic_write nodes = gpd.GeoDataFrame(data_dict['nodes']) nodes.geometry = gpd.points_from_xy(nodes.x, nodes.y) - nodes.to_file(model_dir / 'pipe_by_pipe_nodes.geojson') + nodes.to_file(addresses.nodes) nodes = nodes.set_index('id') edges = gpd.GeoDataFrame(pd.DataFrame(data_dict['conduits']).iloc[[0]]) edges.geometry = [sgeom.LineString([nodes.loc[u,'geometry'], nodes.loc[v,'geometry']]) for u,v in zip(edges.u, edges.v)] - edges.to_file(model_dir / 'pipe_by_pipe_edges.geojson') + edges.to_file(addresses.edges) subs = data_dict['subs'].copy() subs['subcatchment'] = ['node1'] - subs.to_file(model_dir / 'subcatchments.geojson') - stt.synthetic_write(model_dir) + subs.to_file(addresses.subcatchments) + stt.synthetic_write(addresses) # Write the model with data_dict_to_inp - comparison_file = model_dir / "model_base.inp" + comparison_file = addresses.model / "model_base.inp" template_fid = Path(__file__).parent.parent / 'swmmanywhere' / 'defs' /\ 'basic_drainage_all_bits.inp' stt.data_dict_to_inp(stt.format_to_swmm_dict(**data_dict), @@ -157,7 +163,7 @@ def test_synthetic_write(): comparison_file) # Compare - new_input_file = model_dir / "model_1.inp" + new_input_file = addresses.inp are_files_identical = filecmp.cmp(new_input_file, comparison_file, shallow=False) diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 4b6708c4..fdaba97b 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -18,9 +18,9 @@ def test_getattr(): assert addresses.bbox == addresses.project / 'bbox_1' assert addresses.download == addresses.bbox / 'download' assert addresses.elevation == addresses.download / 'elevation.tif' - assert addresses.building == addresses.download / 'building.parquet' + assert addresses.building == addresses.download / 'building.geoparquet' assert addresses.model == addresses.bbox / 'model_1' - assert addresses.subcatchments == addresses.model / 'subcatchments.parquet' + assert addresses.subcatchments == addresses.model / 'subcatchments.geoparquet' assert addresses.precipitation == addresses.download / 'precipitation.parquet' addresses.elevation = filepath diff --git a/tests/test_swmmanywhere.py b/tests/test_swmmanywhere.py index b854c230..aab18b5b 100644 --- a/tests/test_swmmanywhere.py +++ b/tests/test_swmmanywhere.py @@ -1,6 +1,11 @@ """Tests for the main module.""" +import tempfile from pathlib import Path +import jsonschema +import pytest +import yaml + from swmmanywhere import __version__, swmmanywhere @@ -32,4 +37,99 @@ def test_run(): assert results_.shape[0] < results.shape[0] model.with_suffix('.out').unlink() - model.with_suffix('.rpt').unlink() \ No newline at end of file + model.with_suffix('.rpt').unlink() + +def test_swmmanywhere(): + """Test the swmmanywhere function.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Load the config + test_data_dir = Path(__file__).parent / 'test_data' + defs_dir = Path(__file__).parent.parent / 'swmmanywhere' / 'defs' + with (test_data_dir / 'demo_config.yml').open('r') as f: + config = yaml.safe_load(f) + + # Set some test values + base_dir = Path(temp_dir) + config['base_dir'] = str(base_dir) + config['bbox'] = [0.05428,51.55847,0.07193,51.56726] + config['address_overrides'] = { + 'building': str(test_data_dir / 'building.geoparquet'), + 'precipitation': str(defs_dir / 'storm.dat') + } + + config['run_settings']['duration'] = 1000 + api_keys = {'nasadem_key' : 'b206e65629ac0e53d599e43438560d28'} + with open(base_dir / 'api_keys.yml', 'w') as f: + yaml.dump(api_keys, f) + config['api_keys'] = str(base_dir / 'api_keys.yml') + + # Fill the real dict with unused paths to avoid filevalidation errors + config['real']['subcatchments'] = str(defs_dir / 'storm.dat') + config['real']['inp'] = str(defs_dir / 'storm.dat') + config['real']['graph'] = str(defs_dir / 'storm.dat') + + # Write the config + with open(base_dir / 'test_config.yml', 'w') as f: + yaml.dump(config, f) + + # Load and test validation of the config + config = swmmanywhere.load_config(base_dir / 'test_config.yml') + + # Set the test config to just use the generated data + model_dir = base_dir / 'demo' / 'bbox_1' / 'model_1' + config['real']['subcatchments'] = model_dir / 'subcatchments.geoparquet' + config['real']['inp'] = model_dir / 'model_1.inp' + config['real']['graph'] = model_dir / 'graph.parquet' + + # Run swmmanywhere + swmmanywhere.swmmanywhere(config) + +def test_load_config_file_validation(): + """Test the file validation of the config.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_data_dir = Path(__file__).parent / 'test_data' + defs_dir = Path(__file__).parent.parent / 'swmmanywhere' / 'defs' + base_dir = Path(temp_dir) + + # Test file not found + with pytest.raises(FileNotFoundError) as exc_info: + swmmanywhere.load_config(base_dir / 'test_config.yml') + assert "test_config.yml" in str(exc_info.value) + + with (test_data_dir / 'demo_config.yml').open('r') as f: + config = yaml.safe_load(f) + + # Correct and avoid filevalidation errors + config['real'] = None + + # Fill with unused paths to avoid filevalidation errors + config['base_dir'] = str(defs_dir / 'storm.dat') + config['api_keys'] = str(defs_dir / 'storm.dat') + + with open(base_dir / 'test_config.yml', 'w') as f: + yaml.dump(config, f) + + config = swmmanywhere.load_config(base_dir / 'test_config.yml') + assert isinstance(config, dict) + +def test_load_config_schema_validation(): + """Test the schema validation of the config.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_data_dir = Path(__file__).parent / 'test_data' + base_dir = Path(temp_dir) + + # Load the config + with (test_data_dir / 'demo_config.yml').open('r') as f: + config = yaml.safe_load(f) + + # Make an edit not to schema + config['base_dir'] = 1 + + with open(base_dir / 'test_config.yml', 'w') as f: + yaml.dump(config, f) + + # Test schema validation + with pytest.raises(jsonschema.exceptions.ValidationError) as exc_info: + swmmanywhere.load_config(base_dir / 'test_config.yml') + assert "null" in str(exc_info.value) +