Skip to content

Commit

Permalink
111 align dtypes of variables returned by different kw functions (#127)
Browse files Browse the repository at this point in the history
* reformatted code tidalindicators, added test for hwlw tidalindicators

* converted overschrijding from dataframes to series

* converted gemiddeldgetij from dataframes to series

* reformatting code

* renamed tidalindicators and slotgemiddelden series index from None to "period"

* updated whatsnew
  • Loading branch information
veenstrajelmer authored Aug 29, 2024
1 parent 017fb2c commit e70132c
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 502 deletions.
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- improvements to output csv files in [#109](https://github.com/Deltares-research/kenmerkendewaarden/pull/109)
- drop duplicate times in `kw.read_measurements()` in [#116](https://github.com/Deltares-research/kenmerkendewaarden/pull/116)
- use NETCDF4_CLASSIC format to reduce file sizes written by `kw.retrieve_measurements()` in [#119](https://github.com/Deltares-research/kenmerkendewaarden/pull/119)
- align output of all methods in [#127](https://github.com/Deltares-research/kenmerkendewaarden/pull/127)

### Fix
- implemented workaround for pandas 2.2.0 with different rounding behaviour in [#69](https://github.com/Deltares-research/kenmerkendewaarden/pull/69)
Expand Down
12 changes: 7 additions & 5 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,11 @@ def initiate_dist_with_hydra_nl(station):
if os.path.exists(file_hydra_nl):
df_hydra_nl = pd.read_table(file_hydra_nl, encoding='latin-1', decimal=',', header=0)
df_hydra_nl.index = 1/df_hydra_nl['Terugkeertijd [jaar]']
df_hydra_nl.index.name = 'frequency'
df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]']
df_hydra_nl = df_hydra_nl[['values']]
df_hydra_nl.attrs['station'] = station
dist_dict['Hydra-NL'] = df_hydra_nl
dist_dict['Hydra-NL'] = df_hydra_nl['values']
return dist_dict

def add_validation_dist(dist_dict, dist_type, station):
Expand All @@ -275,10 +276,11 @@ def add_validation_dist(dist_dict, dist_type, station):
df_validation = df_validation.rename({"value":"values"},axis=1)
df_validation['values'] /= 100
df_validation = df_validation.set_index("value_Tfreq", drop=True)
df_validation.index.name = 'frequency'
df_validation.attrs['station'] = station
dist_dict['validation'] = df_validation
dist_dict['validation'] = df_validation['values']

Tfreqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200,
freqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200,
1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000]

if compute_overschrijding and data_pd_HWLW_all is not None:
Expand All @@ -291,7 +293,7 @@ def add_validation_dist(dist_dict, dist_type, station):
dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station)
dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
clip_physical_break=True, dist=dist_exc_hydra,
interp_freqs=Tfreqs_interested)
interp_freqs=freqs_interested)
add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
dist_exc['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'))
# dist_exc['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}_gecombineerd.csv'))
Expand All @@ -303,7 +305,7 @@ def add_validation_dist(dist_dict, dist_type, station):
# 2. Deceedance
dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
clip_physical_break=True, inverse=True,
interp_freqs=Tfreqs_interested)
interp_freqs=freqs_interested)
add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
dist_dec['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'))
# dist_dec['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}_gecombineerd.csv'))
Expand Down
49 changes: 24 additions & 25 deletions kenmerkendewaarden/gemiddeldgetij.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,9 @@ def calc_gemiddeldgetij(

# combine in single dictionary
gemgetij_dict = {}
gemgetij_dict["mean"] = prediction_av
gemgetij_dict["spring"] = prediction_sp
gemgetij_dict["neap"] = prediction_np
gemgetij_dict["mean"] = prediction_av['values']
gemgetij_dict["spring"] = prediction_sp['values']
gemgetij_dict["neap"] = prediction_np['values']

return gemgetij_dict

Expand Down Expand Up @@ -310,21 +310,21 @@ def plot_gemiddeldgetij(
prediction_av_raw = gemgetij_dict_raw["mean"]
prediction_sp_raw = gemgetij_dict_raw["spring"]
prediction_np_raw = gemgetij_dict_raw["neap"]
prediction_av_raw["values"].plot(
prediction_av_raw.plot(
ax=ax,
linestyle="--",
color=cmap(0),
linewidth=0.7,
label="gemiddeldgetij mean (raw)",
)
prediction_sp_raw["values"].plot(
prediction_sp_raw.plot(
ax=ax,
linestyle="--",
color=cmap(1),
linewidth=0.7,
label="gemiddeldgetij spring (raw)",
)
prediction_np_raw["values"].plot(
prediction_np_raw.plot(
ax=ax,
linestyle="--",
color=cmap(2),
Expand All @@ -335,11 +335,11 @@ def plot_gemiddeldgetij(
prediction_av_corr = gemgetij_dict["mean"]
prediction_sp_corr = gemgetij_dict["spring"]
prediction_np_corr = gemgetij_dict["neap"]
prediction_av_corr["values"].plot(ax=ax, color=cmap(0), label="gemiddeldgetij mean")
prediction_sp_corr["values"].plot(
prediction_av_corr.plot(ax=ax, color=cmap(0), label="gemiddeldgetij mean")
prediction_sp_corr.plot(
ax=ax, color=cmap(1), label="gemiddeldgetij spring"
)
prediction_np_corr["values"].plot(ax=ax, color=cmap(2), label="gemiddeldgetij neap")
prediction_np_corr.plot(ax=ax, color=cmap(2), label="gemiddeldgetij neap")

ax.set_title(f"getijkrommes for {station}")
ax.legend(loc=4)
Expand All @@ -366,9 +366,9 @@ def get_gemgetij_components(data_pd_meas):
# =============================================================================
# Hatyan analyse voor 10 jaar (alle componenten voor gemiddelde getijcyclus) #TODO: maybe use original 4y period/componentfile instead? SA/SM should come from 19y analysis
# =============================================================================
const_list = hatyan.get_const_list_hatyan(
"year"
) # components should not be reduced, since higher harmonics are necessary

# components should not be reduced, since higher harmonics are necessary
const_list = hatyan.get_const_list_hatyan("year")
hatyan_settings_ana = dict(
nodalfactors=True,
fu_alltimes=False,
Expand Down Expand Up @@ -430,16 +430,18 @@ def get_gemgetij_components(data_pd_meas):
.str.isalpha()
)
comp_iM = comp_frommeasurements_avg.loc[bool_endswithiM]
comp_av.loc[comp_higherharmonics, "A"] = np.sqrt(
(comp_iM["A"] ** 2).sum()
) # kwadraatsom
# kwadraatsom
comp_av.loc[comp_higherharmonics, "A"] = np.sqrt((comp_iM["A"] ** 2).sum())

comp_av.loc["A0"] = comp_frommeasurements_avg.loc["A0"]

# values are different than 1991.0 document and differs per station while the
# document states "Zoals te verwachten is de verhouding per component tussen deze
# wortel en de oorspronkelijke amplitude voor alle plaatsen gelijk"
logger.debug(
"verhouding tussen originele en kwadratensom componenten:\n"
f"{comp_av/comp_frommeasurements_avg.loc[components_av]}"
) # values are different than 1991.0 document and differs per station while the document states "Zoals te verwachten is de verhouding per component tussen deze wortel en de oorspronkelijke amplitude voor alle plaatsen gelijk"
)

return comp_frommeasurements_avg, comp_av

Expand All @@ -464,18 +466,16 @@ def reshape_signal(ts, ts_ext, HW_goal, LW_goal, tP_goal=None):
bool_HW = ts_ext["HWLWcode"] == 1
idx_HW = np.where(bool_HW)[0]
timesHW = ts_ext.index[idx_HW]
timesLW = ts_ext.index[
idx_HW[:-1] + 1
] # assuming alternating 1,2,1 or 1,3,1, this is always valid in this workflow
# assuming alternating 1,2,1 or 1,3,1, this is always valid in this workflow
timesLW = ts_ext.index[idx_HW[:-1] + 1]

# crop from first to last HW (rest is not scaled anyway)
ts_time_firstHW = ts_ext[bool_HW].index[0]
ts_time_lastHW = ts_ext[bool_HW].index[-1]
ts_corr = ts.copy().loc[ts_time_firstHW:ts_time_lastHW]

ts_corr["timedelta"] = (
ts_corr.index
) # this is necessary since datetimeindex with freq is not editable, and Series is editable
# this is necessary since datetimeindex with freq is not editable, and Series is editable
ts_corr["timedelta"] = (ts_corr.index)
for i in np.arange(0, len(timesHW) - 1):
HW1_val = ts_corr.loc[timesHW[i], "values"]
HW2_val = ts_corr.loc[timesHW[i + 1], "values"]
Expand All @@ -492,9 +492,8 @@ def reshape_signal(ts, ts_ext, HW_goal, LW_goal, tP_goal=None):
temp2 = (
ts_corr.loc[timesLW[i] : timesHW[i + 1], "values"] - LW_val
) / TR2_val * TR_goal + LW_goal
temp = pd.concat(
[temp1, temp2.iloc[1:]]
) # .iloc[1:] since timesLW[i] is in both timeseries (values are equal)
# .iloc[1:] since timesLW[i] is in both timeseries (values are equal)
temp = pd.concat([temp1, temp2.iloc[1:]])
ts_corr["values_new"] = temp

tide_HWtoHW = ts_corr.loc[timesHW[i] : timesHW[i + 1]]
Expand Down
5 changes: 3 additions & 2 deletions kenmerkendewaarden/havengetallen.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,14 @@ def calc_havengetallen(
"""
raise_extremes_with_aggers(df_ext)
ser_ext = df_ext["values"]

# check if coverage is high enough for havengetallen
if min_coverage is not None:
# TODO: compute_actual_counts only returns years for which there are no nans, so will have different length than expected counts if there is an all-nan year
# TODO: if we supply 4 years of complete data instead of 10 years, no error is raised
df_actual_counts_peryear = compute_actual_counts(df_ext, freq="Y")
df_expected_counts_peryear = compute_expected_counts(df_ext, freq="Y")
df_actual_counts_peryear = compute_actual_counts(ser_ext, freq="Y")
df_expected_counts_peryear = compute_expected_counts(ser_ext, freq="Y")
df_min_counts_peryear = df_expected_counts_peryear * min_coverage
bool_coverage_toolow = df_actual_counts_peryear < df_min_counts_peryear
df_debug = pd.DataFrame(
Expand Down
Loading

0 comments on commit e70132c

Please sign in to comment.