Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

159 interpolation bug #160

Merged
merged 9 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 59 additions & 20 deletions swmmanywhere/metric_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def pbias(y: np.ndarray,

.. math::

pbias = \\frac{{\sum(synthetic) - \sum(real)}}{{\sum(real)}}
pbias = \\frac{{\mean(synthetic) - \mean(real)}}{{\mean(real)}}

where:
- :math:`synthetic` is the synthetic data,
Expand All @@ -230,10 +230,10 @@ def pbias(y: np.ndarray,
Returns:
float: The PBIAS value.
"""
total_observed = y.sum()
total_observed = y.mean()
if total_observed == 0:
return np.inf
return (yhat.sum() - total_observed) / total_observed
return (yhat.mean() - total_observed) / total_observed

@register_coef
def nse(y: np.ndarray,
Expand Down Expand Up @@ -281,7 +281,8 @@ def align_calc_coef(synthetic_results: pd.DataFrame,
coef_func: Callable = nse) -> float:
"""Align and calculate coef_func.

Aggregate synthetic and real results by date for specifics ids.
Aggregate synthetic and real results by date for specifics ids (i.e., sum
up over all ids - so we are only comparing timeseries for one aggregation).
Align the synthetic and real dates and calculate the coef_func metric
of the variable over time. In cases where the synthetic
data is does not overlap the real data, the value is interpolated.
Expand Down Expand Up @@ -371,7 +372,8 @@ def median_coef_by_group(results: pd.DataFrame,

Calculate the median coef_func value of a variable over time
for each group in the results dataframe, and return the median of these
values.
values. Assumes that the results dataframe has a 'value_real' and 'value_syn'
and that these properly line up.

Args:
results (pd.DataFrame): The results dataframe.
Expand All @@ -383,9 +385,6 @@ def median_coef_by_group(results: pd.DataFrame,
"""
val = (
results
.groupby(['date',gb_key])
.sum()
.reset_index()
.groupby(gb_key)
.apply(lambda x: coef_func(x.value_real, x.value_syn))
)
Expand Down Expand Up @@ -512,10 +511,12 @@ def align_by_shape(var,
real_results: pd.DataFrame,
shapes: gpd.GeoDataFrame,
synthetic_G: nx.Graph,
real_G: nx.Graph) -> pd.DataFrame:
real_G: nx.Graph,
key: str = 'sub_id') -> pd.DataFrame:
"""Align by subcatchment.

Align synthetic and real results by shape and return the results.
Align synthetic and real results by shape and return the results. If multiple
ids exist in the same shape, these are aggregated by sum.

Args:
var (str): The variable to align.
Expand All @@ -524,6 +525,7 @@ def align_by_shape(var,
shapes (gpd.GeoDataFrame): The shapes to align by (e.g., grid or real_subs).
synthetic_G (nx.Graph): The synthetic graph.
real_G (nx.Graph): The real graph.
key (str): The column to align by.
"""
synthetic_joined = nodes_to_subs(synthetic_G, shapes)
real_joined = nodes_to_subs(real_G, shapes)
Expand All @@ -541,20 +543,41 @@ def align_by_shape(var,

# Align data
synthetic_results = pd.merge(synthetic_results,
synthetic_joined[['id','sub_id']],
synthetic_joined[['id',key]],
on='id')
synthetic_gb = (
synthetic_results
.groupby(['date',key])
.value
.sum()
.reset_index()
)
real_results = pd.merge(real_results,
real_joined[['id','sub_id']],
real_joined[['id',key]],
on='id')

results = pd.merge(real_results[['date','sub_id','value']],
synthetic_results[['date','sub_id','value']],
on = ['date','sub_id'],
real_gb = (
real_results
.groupby(['date',key])
.value
.sum()
.reset_index()
)
results = pd.merge(real_gb[['date',key,'value']],
synthetic_gb[['date',key,'value']],
on = ['date',key],
suffixes = ('_real', '_syn'),
how = 'outer'
)

results['value_syn'] = results.value_syn.interpolate().to_numpy()
syn_interp = (
results
.groupby(key)
.apply(func = lambda x : x.set_index('date')[['value_syn']].interpolate())
.reset_index()
)
results = pd.merge(results.drop('value_syn', axis=1),
syn_interp,
on = ['sub_id','date'])
results = results.dropna(subset=['value_real'])

return results
Expand Down Expand Up @@ -750,7 +773,12 @@ def outlet(synthetic_results: pd.DataFrame,
sg_syn, syn_outlet = best_outlet_match(synthetic_G, real_subs)
sg_real, real_outlet = dominant_outlet(real_G, real_results)

allowable_var = ['nmanholes', 'npipes', 'length', 'flow', 'flooding']
allowable_var = ['nmanholes',
'diameter',
'npipes',
'length',
'flow',
'flooding']
if var not in allowable_var:
raise ValueError(f"Invalid variable {var}. Can be {allowable_var}")

Expand All @@ -766,10 +794,20 @@ def outlet(synthetic_results: pd.DataFrame,
# Calculate the coefficient based on the total length of the pipes
return coef_func(
np.array(
list(nx.get_edge_attributes(sg_real, 'length').values())
sum(nx.get_edge_attributes(sg_real, var).values())
),
np.array(
sum(nx.get_edge_attributes(sg_syn, var).values())
)
)
if var == 'diameter':
# Calculate the coefficient based on the average diameter of the pipes
return coef_func(
np.array(
list(nx.get_edge_attributes(sg_real, var).values())
),
np.array(
list(nx.get_edge_attributes(sg_syn, 'length').values())
list(nx.get_edge_attributes(sg_syn, var).values())
)
)
if var == 'flow':
Expand Down Expand Up @@ -836,6 +874,7 @@ def new_metric(**kwargs):
metrics.register(metric_factory('outlet_pbias_length'))
metrics.register(metric_factory('outlet_pbias_npipes'))
metrics.register(metric_factory('outlet_pbias_nmanholes'))
metrics.register(metric_factory('outlet_pbias_diameter'))

metrics.register(metric_factory('outlet_nse_flooding'))
metrics.register(metric_factory('outlet_kge_flooding'))
Expand Down
18 changes: 16 additions & 2 deletions tests/test_data/demo_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,21 @@ graphfcn_list:
- fix_geometries # Ensure geometries present before printing
- assign_id # Final pass to ensure consistent 'id' and remove duplicates
metric_list:
- outlet_nse_flow
- outlet_kge_flow
- outlet_pbias_flow
- outlet_pbias_length
- outlet_pbias_npipes
- outlet_pbias_nmanholes
- outlet_nse_flooding
- outlet_kge_flooding
- outlet_pbias_flooding
- grid_nse_flooding
- grid_kge_flooding
- grid_pbias_flooding
- subcatchment_nse_flooding
- subcatchment_kge_flooding
- subcatchment_pbias_flooding
- nc_deltacon0
- nc_laplacian_dist
- nc_laplacian_norm_dist
Expand All @@ -46,8 +61,7 @@ metric_list:
- bias_flood_depth
- kstest_edge_betweenness
- kstest_betweenness
- outlet_nse_flow
- outlet_nse_flooding
- outlet_kstest_diameters
parameter_overrides:
hydraulic_design:
diameters:
Expand Down
1 change: 1 addition & 0 deletions tests/test_metric_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ def test_design_params():

# Target results
design_results = {'outlet_kstest_diameters' : 0.0625,
'outlet_pbias_diameter': 0.0625,
'outlet_pbias_length' : -0.15088965,
'outlet_pbias_nmanholes' : -0.05,
'outlet_pbias_npipes' : -0.15789473}
Expand Down
Loading