From 00f67fb2a0700bc7d4651c122a2c89fc89793dd4 Mon Sep 17 00:00:00 2001 From: veenstrajelmer <60435591+veenstrajelmer@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:57:28 +0200 Subject: [PATCH] 140 adjustments for dashboard (#142) * updated names of outputfiles * add slotgemiddelde hw/lw to plot (and csv writing) * export station locations csv * fixed tests * add tidalrange to slotgemiddelden * updated readme * updated whatsnew --- README.md | 4 +- docs/whats-new.md | 4 ++ examples/KWK_getcheckdata.py | 11 ++++ examples/KWK_process.py | 83 +++++++++++++-------------- kenmerkendewaarden/data_retrieve.py | 2 + kenmerkendewaarden/gemiddeldgetij.py | 3 +- kenmerkendewaarden/havengetallen.py | 17 ++++-- kenmerkendewaarden/overschrijding.py | 60 +++++++++---------- kenmerkendewaarden/slotgemiddelden.py | 19 +++--- kenmerkendewaarden/tidalindicators.py | 2 +- tests/test_overschrijding.py | 46 +++++++-------- tests/test_slotgemiddelden.py | 46 +++++++++------ 12 files changed, 162 insertions(+), 135 deletions(-) diff --git a/README.md b/README.md index 38bb39b..738fff1 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,6 @@ De methodieken in deze repository hebben nog geen definitieve status en zijn daa ## installation - download Miniforge3 from [the miniforge github](https://github.com/conda-forge/miniforge?tab=readme-ov-file#download) and install it with the recommended settings. - open Miniforge Prompt -- `conda create --name kw_env python=3.11 git -y` +- `conda create --name kw_env python -y` - `conda activate kw_env` -- `pip install git+https://github.com/Deltares-research/kenmerkendewaarden` +- `pip install kenmerkendewaarden` diff --git a/docs/whats-new.md b/docs/whats-new.md index f8052a1..76b9483 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -2,6 +2,10 @@ ## UNRELEASED +### Feat +- from meters to centimeters and other updates for dashboard in [#142](https://github.com/Deltares-research/kenmerkendewaarden/pull/142) +- added tidalrange to slotgemiddelden in [#142](https://github.com/Deltares-research/kenmerkendewaarden/pull/142) + ### Fix - accomodate updated astrog datetime handling in hatyan 2.9.0 in [#141](https://github.com/Deltares-research/kenmerkendewaarden/pull/141) diff --git a/examples/KWK_getcheckdata.py b/examples/KWK_getcheckdata.py index f43d24b..5b0fb9b 100644 --- a/examples/KWK_getcheckdata.py +++ b/examples/KWK_getcheckdata.py @@ -22,6 +22,7 @@ derive_stats = False plot_meas = False plot_stations = False +write_stations_table = False start_date = pd.Timestamp(1870, 1, 1, tz="UTC+01:00") end_date = pd.Timestamp(2024, 1, 1, tz="UTC+01:00") @@ -137,3 +138,13 @@ fig, ax = kw.plot_stations(station_list=station_list_map, crs=28992, add_labels=False) fig.savefig(os.path.join(dir_base,'stations_map.png'), dpi=200) + + +### WRITE CSV WITH STATION CODE/X/Y/EPSG +if write_stations_table: + # TODO: consider making retrieve_catalog public + from kenmerkendewaarden.data_retrieve import retrieve_catalog + locs_meas_ts_all, _, _ = retrieve_catalog(crs=4326) + locs_ts = locs_meas_ts_all.loc[locs_meas_ts_all.index.isin(station_list)] + file_csv = os.path.join(dir_base, "station_locations.csv") + locs_ts[["Locatie_MessageID","X","Y","Coordinatenstelsel","Naam"]].to_csv(file_csv) diff --git a/examples/KWK_process.py b/examples/KWK_process.py index e9c84a7..2888037 100644 --- a/examples/KWK_process.py +++ b/examples/KWK_process.py @@ -29,9 +29,9 @@ dir_meas = os.path.join(dir_base,'measurements_wl_18700101_20240101') dir_indicators = os.path.join(dir_base,f'out_tidalindicators_{year_slotgem}') -dir_slotgem = os.path.join(dir_base,f'out_slotgem_{year_slotgem}') +dir_slotgem = os.path.join(dir_base,f'out_slotgemiddelden_{year_slotgem}') dir_havget = os.path.join(dir_base,f'out_havengetallen_{year_slotgem}') -dir_gemgetij = os.path.join(dir_base,f'out_gemgetij_{year_slotgem}') +dir_gemgetij = os.path.join(dir_base,f'out_gemiddeldgetij_{year_slotgem}') dir_overschrijding = os.path.join(dir_base,f'out_overschrijding_{year_slotgem}') os.makedirs(dir_indicators, exist_ok=True) os.makedirs(dir_slotgem, exist_ok=True) @@ -86,7 +86,7 @@ nap_correction=nap_correction, drop_duplicates=drop_duplicates) if df_meas_all is not None: #crop measurement data - df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))#,onlyfull=False) + df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10))) # extremes are used for slotgemiddelden, havengetallen, overschrijding df_ext_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True, @@ -116,12 +116,14 @@ # merge dictionaries dict_wltidalindicators.update(dict_HWLWtidalindicators) - # csv for monthly indicators - dict_wltidalindicators['wl_mean_permonth'].to_csv(os.path.join(dir_indicators,f'meanwl_permonth_{current_station}.txt')) + # csv for yearlymonthly indicators + for key in ['wl_mean_peryear','wl_mean_permonth']: + file_csv = os.path.join(dir_indicators, f'kw{year_slotgem}-{key}-{current_station}.csv') + dict_wltidalindicators[key].to_csv(file_csv, float_format='%.3f') # plot fig, ax = kw.plot_tidalindicators(dict_wltidalindicators) - fig.savefig(os.path.join(dir_indicators,f'tidal_indicators_{current_station}')) + fig.savefig(os.path.join(dir_indicators,f'kw{year_slotgem}-tidalindicators-{current_station}.png')) @@ -146,11 +148,15 @@ fig1, ax1 = kw.plot_slotgemiddelden(slotgemiddelden_valid, slotgemiddelden_all) ax1.set_xlim(fig_alltimes_ext) - # plot and write slotgemiddelde value (for waterlevels only), the slotgemiddelde is the last value of the model fit - slotgemiddelden_valid['HW_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanHW_{current_station}.txt')) - slotgemiddelden_valid['LW_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanLW_{current_station}.txt')) - slotgemiddelden_valid['wl_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanwl_{current_station}.txt')) - slotgemiddelden_valid['wl_model_fit'].to_csv(os.path.join(dir_slotgem,f'modelfit_{current_station}.txt')) + # write slotgemiddelden to csv, the slotgemiddelde is the last value of the model fit + key_list = ["wl_mean_peryear", "wl_model_fit", + "HW_mean_peryear", "HW_model_fit", + "LW_mean_peryear", "LW_model_fit", + "tidalrange_mean_peryear", "tidalrange_model_fit", + ] + for key in key_list: + file_csv = os.path.join(dir_slotgem, f'kw{year_slotgem}-{key}-{current_station}.csv') + slotgemiddelden_valid[key].to_csv(file_csv, float_format='%.3f') # get and plot validation timeseries (yearly mean wl/HW/LW) station_name_dict = {'HOEKVHLD':'hoek', @@ -160,15 +166,15 @@ file_yearmeanHW = os.path.join(dir_meas_gemHWLWwlAB,f'{station_name_dict[current_station]}_hw.txt') file_yearmeanLW = os.path.join(dir_meas_gemHWLWwlAB,f'{station_name_dict[current_station]}_lw.txt') file_yearmeanwl = os.path.join(dir_meas_gemHWLWwlAB,f'{station_name_dict[current_station]}_Z.txt') - yearmeanHW = pd.read_csv(file_yearmeanHW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')/100 - yearmeanLW = pd.read_csv(file_yearmeanLW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')/100 - yearmeanwl = pd.read_csv(file_yearmeanwl, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')/100 + yearmeanHW = pd.read_csv(file_yearmeanHW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime') + yearmeanLW = pd.read_csv(file_yearmeanLW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime') + yearmeanwl = pd.read_csv(file_yearmeanwl, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime') ax1.plot(yearmeanHW['values'],'+g', zorder=0) ax1.plot(yearmeanLW['values'],'+g', zorder=0) ax1.plot(yearmeanwl['values'],'+g',label='yearmean validation', zorder=0) ax1.legend(loc=2) - fig1.savefig(os.path.join(dir_slotgem,f'yearly_values_{current_station}')) + fig1.savefig(os.path.join(dir_slotgem,f'kw{year_slotgem}-slotgemiddelden-{current_station}')) @@ -180,14 +186,15 @@ # plot hwlw per timeclass including median fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen) - fig.savefig(os.path.join(dir_havget,f'HWLW_pertijdsklasse_inclmedianline_{current_station}')) + fig.savefig(os.path.join(dir_havget,f'kw{year_slotgem}-HWLW_pertijdsklasse-{current_station}.png')) # plot aardappelgrafiek fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen=df_havengetallen) - fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}')) + fig.savefig(os.path.join(dir_havget, f'kw{year_slotgem}-aardappelgrafiek-{current_station}.png')) #write to csv - df_havengetallen.to_csv(os.path.join(dir_havget, f'havengetallen_{year_slotgem}_{current_station}.csv'),float_format='%.3f') + file_csv = os.path.join(dir_havget, f'kw{year_slotgem}-havengetallen-{current_station}.csv') + df_havengetallen.to_csv(file_csv, float_format='%.3f') @@ -208,35 +215,24 @@ freq=pred_freq, nb=0, nf=4, scale_extremes=True, scale_period=True) + # TODO: the shape of the validation lines are different, so compare krommes to gele boekje instead + # p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211 fig, ax = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, tick_hours=6) - - # plot validation lines if available - # TODO: the shape is different, so compare krommes to gele boekje instead of validation data - dir_vali_krommen = r'p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211' - for tidaltype in ["gemgetij","springtij","doodtij"]: - file_vali_getijkromme = os.path.join(dir_vali_krommen,f'{tidaltype}kromme_{current_station}_havengetallen{year_slotgem}.csv') - if not os.path.exists(file_vali_getijkromme): - continue - df_vali_getij = pd.read_csv(file_vali_getijkromme, index_col=0, parse_dates=True) - # convert from datetimes to timedeltas to get it in the same plot (we used datetimes before) - df_vali_getij.index = df_vali_getij.index - df_vali_getij.index[0] - ax.plot(df_vali_getij['Water Level [m]'], color='grey', zorder=0, label=f'validation KW2020 {tidaltype}') - ax.legend(loc=4) - fig.savefig(os.path.join(dir_gemgetij,f'gemgetij_trefHW_{current_station}')) + fig.savefig(os.path.join(dir_gemgetij,f'kw{year_slotgem}-gemiddeldgetij-{current_station}.png')) # write corrected timeseries to csv files # TODO: better representation of negative timedeltas requested in https://github.com/pandas-dev/pandas/issues/17232#issuecomment-2205579156, maybe convert timedeltaIndex to minutes instead? - for key in gemgetij_corr.keys(): - file_csv = os.path.join(dir_gemgetij, f'Getijkromme_{key}_{current_station}_slotgem{year_slotgem}.csv') + for key in ['mean', 'spring', 'neap']: + file_csv = os.path.join(dir_gemgetij, f'kw{year_slotgem}-gemiddeldgetij_{key}-{current_station}.csv') gemgetij_corr[key].to_csv(file_csv, float_format='%.3f') # plot BOI figure and compare to KW2020 fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, tick_hours=12) - fig_boi.savefig(os.path.join(dir_gemgetij,f'gemspringdoodtijkromme_BOI_{current_station}_slotgem{year_slotgem}.png')) + fig_boi.savefig(os.path.join(dir_gemgetij,f'kw{year_slotgem}-gemiddeldgetij_BOI-{current_station}.png')) # write BOI timeseries to csv files - for key in gemgetij_corr_boi.keys(): - file_boi_csv = os.path.join(dir_gemgetij, f'Getijkromme_BOI_{key}_{current_station}_slotgem{year_slotgem}.csv') + for key in ['mean', 'spring', 'neap']: + file_boi_csv = os.path.join(dir_gemgetij, f'kw{year_slotgem}-gemiddeldgetij_BOI_{key}-{current_station}.csv') gemgetij_corr_boi[key].to_csv(file_boi_csv, float_format='%.3f') @@ -262,7 +258,7 @@ def initiate_dist_with_hydra_nl(station): df_hydra_nl = pd.read_table(file_hydra_nl, encoding='latin-1', decimal=',', header=0) df_hydra_nl.index = 1/df_hydra_nl['Terugkeertijd [jaar]'] df_hydra_nl.index.name = 'frequency' - df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]'] + df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]'] * 100 df_hydra_nl = df_hydra_nl[['values']] df_hydra_nl.attrs['station'] = station dist_dict['Hydra-NL'] = df_hydra_nl['values'] @@ -276,7 +272,6 @@ def add_validation_dist(dist_dict, dist_type, station): file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station_names_vali_dict[station]}.csv') df_validation = pd.read_csv(file_validation, sep=';') df_validation = df_validation.rename({"value":"values"},axis=1) - df_validation['values'] /= 100 df_validation = df_validation.set_index("value_Tfreq", drop=True) df_validation.index.name = 'frequency' df_validation.attrs['station'] = station @@ -297,18 +292,18 @@ def add_validation_dist(dist_dict, dist_type, station): clip_physical_break=True, dist=dist_exc_hydra, interp_freqs=freqs_interested) add_validation_dist(dist_exc, dist_type='exceedance', station=current_station) - dist_exc['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv')) + dist_exc['geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'kw{year_slotgem}-exceedance-{current_station}.csv')) fig, ax = kw.plot_overschrijding(dist_exc) - ax.set_ylim(0,5.5) - fig.savefig(os.path.join(dir_overschrijding, f'Exceedance_lines_{current_station}.png')) + ax.set_ylim(0,550) + fig.savefig(os.path.join(dir_overschrijding, f'kw{year_slotgem}-exceedance-{current_station}.png')) # 2. Deceedance dist_dec = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None, clip_physical_break=True, inverse=True, interp_freqs=freqs_interested) add_validation_dist(dist_dec, dist_type='deceedance', station=current_station) - dist_dec['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv')) + dist_dec['geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'kw{year_slotgem}-deceedance-{current_station}.csv')) fig, ax = kw.plot_overschrijding(dist_dec) - fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png')) + fig.savefig(os.path.join(dir_overschrijding, f'kw{year_slotgem}-deceedance-{current_station}.png')) diff --git a/kenmerkendewaarden/data_retrieve.py b/kenmerkendewaarden/data_retrieve.py index 01fa2f7..6bc92fc 100644 --- a/kenmerkendewaarden/data_retrieve.py +++ b/kenmerkendewaarden/data_retrieve.py @@ -429,6 +429,8 @@ def read_measurements( return ds_meas df_meas = xarray_to_hatyan(ds_meas) + # back to centimeters + df_meas['values'] *= 100 if drop_duplicates: df_meas = drop_duplicate_times(df_meas) diff --git a/kenmerkendewaarden/gemiddeldgetij.py b/kenmerkendewaarden/gemiddeldgetij.py index 4851dbc..c07012c 100644 --- a/kenmerkendewaarden/gemiddeldgetij.py +++ b/kenmerkendewaarden/gemiddeldgetij.py @@ -345,7 +345,8 @@ def plot_gemiddeldgetij( ax.legend(loc=4) ax.grid() ax.set_xlabel("time since high water") - + ax.set_ylabel("water level [cm]") + # fix timedelta ticks ax.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter_improved()) # put ticks at intervals of multiples of 3 and 6, resulting in whole seconds diff --git a/kenmerkendewaarden/havengetallen.py b/kenmerkendewaarden/havengetallen.py index a4e7e8e..4467bfe 100644 --- a/kenmerkendewaarden/havengetallen.py +++ b/kenmerkendewaarden/havengetallen.py @@ -314,6 +314,13 @@ def plot_HWLW_pertimeclass(df_ext: pd.DataFrame, df_havengetallen: pd.DataFrame) ) ax4.plot(HWLW_culmhr_summary["LW_delay_median"].dt.total_seconds() / 3600, ".-") ax4.set_xlim([0 - 0.5, 12 - 0.5]) + + ax1.set_ylabel("water level [cm]") + ax2.set_ylabel("water level [cm]") + ax3.set_ylabel("time delay [hours]") + ax4.set_ylabel("time delay [hours]") + ax3.set_xlabel("culmination time class [hours]") + ax4.set_xlabel("culmination time class [hours]") fig.tight_layout() axs = np.array(((ax1, ax2), (ax3, ax4))) @@ -342,8 +349,8 @@ def plot_aardappelgrafiek(df_havengetallen: pd.DataFrame): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7.5, 4), sharex=False) ax1.set_title("HW") - ax1.set_xlabel("maansverloop in uu:mm:ss") - ax1.set_ylabel("waterstand in m t.o.v. NAP") + ax1.set_xlabel("maansverloop [uu:mm:ss]") + ax1.set_ylabel("waterstand [cm]") ax1.plot( HWLW_culmhr_summary["HW_delay_median"], HWLW_culmhr_summary["HW_values_median"], @@ -352,8 +359,8 @@ def plot_aardappelgrafiek(df_havengetallen: pd.DataFrame): ax1.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter_improved()) ax1.grid() ax2.set_title("LW") - ax2.set_xlabel("maansverloop in uu:mm:ss") - ax2.set_ylabel("waterstand in m t.o.v. NAP") + ax2.set_xlabel("maansverloop [uu:mm:ss]") + ax2.set_ylabel("waterstand [cm]") ax2.plot( HWLW_culmhr_summary["LW_delay_median"], HWLW_culmhr_summary["LW_values_median"], @@ -379,7 +386,7 @@ def plot_aardappelgrafiek(df_havengetallen: pd.DataFrame): ax1_ylimmean = np.mean(ax1.get_ylim()) ax2_ylimmean = np.mean(ax2.get_ylim()) xlimrange = 2 * 3600e9 # in nanoseconds - ylimrange = 1 + ylimrange = 100 # in centimeters ax1.set_xlim([ax1_xlimmean - xlimrange / 2, ax1_xlimmean + xlimrange / 2]) ax2.set_xlim([ax2_xlimmean - xlimrange / 2, ax2_xlimmean + xlimrange / 2]) ax1.set_ylim([ax1_ylimmean - ylimrange / 2, ax1_ylimmean + ylimrange / 2]) diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py index d5e8832..0b53620 100644 --- a/kenmerkendewaarden/overschrijding.py +++ b/kenmerkendewaarden/overschrijding.py @@ -64,7 +64,7 @@ def calc_overschrijding( in case of rule_type='break', float in case of rule_type='linear'. The default is None. interp_freqs : list, optional The frequencies to interpolate to, providing this will result in a - "Geinterpoleerd" key in the returned dictionary. The default is None. + "geinterpoleerd" key in the returned dictionary. The default is None. Returns ------- @@ -90,7 +90,7 @@ def calc_overschrijding( dist = {} logger.info(f"Calculate unfiltered distribution (inverse={inverse})") - dist["Ongefilterd"] = distribution(ser_extrema, inverse=inverse) + dist["ongefilterd"] = distribution(ser_extrema, inverse=inverse) # TODO: re-enable filter for river discharge peaks #TODO: ext is geschikt voor getij, maar bij hoge afvoergolf wil je alleen het echte extreem. Er is dan een treshold per station nodig, is nodig om de rivierafvoerpiek te kunnen duiden. @@ -102,21 +102,21 @@ def calc_overschrijding( ser_extrema_filt = filter_with_threshold(ser_extrema, ser_peaks, threshold) else: ser_extrema_filt = ser_extrema.copy() - dist['Gefilterd'] = distribution(ser_extrema_filt.copy()) + dist['gefilterd'] = distribution(ser_extrema_filt.copy()) """ logger.info("Calculate filtered distribution with trendanalysis") ser_trend = apply_trendanalysis( ser_extrema, rule_type=rule_type, rule_value=rule_value ) - dist["Trendanalyse"] = distribution(ser_trend.copy(), inverse=inverse) - - logger.info("Fit Weibull to filtered distribution with trendanalysis") - idx_maxfreq_trend = get_threshold_rowidx(dist["Trendanalyse"]) - treshold_value = dist["Trendanalyse"].iloc[idx_maxfreq_trend] - treshold_Tfreq = dist["Trendanalyse"].index[idx_maxfreq_trend] - dist["Weibull"] = get_weibull( - dist["Trendanalyse"].copy(), + dist["trendanalyse"] = distribution(ser_trend.copy(), inverse=inverse) + + logger.info("Fit weibull to filtered distribution with trendanalysis") + idx_maxfreq_trend = get_threshold_rowidx(dist["trendanalyse"]) + treshold_value = dist["trendanalyse"].iloc[idx_maxfreq_trend] + treshold_Tfreq = dist["trendanalyse"].index[idx_maxfreq_trend] + dist["weibull"] = get_weibull( + dist["trendanalyse"].copy(), threshold=treshold_value, Tfreqs=np.logspace(-5, np.log10(treshold_Tfreq), 5000), inverse=inverse, @@ -129,15 +129,15 @@ def calc_overschrijding( else: logger.info("Blend trend and weibull") ser_hydra = None - dist["Gecombineerd"] = blend_distributions( - ser_trend=dist["Trendanalyse"].copy(), - ser_weibull=dist["Weibull"].copy(), + dist["gecombineerd"] = blend_distributions( + ser_trend=dist["trendanalyse"].copy(), + ser_weibull=dist["weibull"].copy(), ser_hydra=ser_hydra, ) if interp_freqs is not None: - dist["Geinterpoleerd"] = interpolate_interested_Tfreqs( - dist["Gecombineerd"], Tfreqs=interp_freqs + dist["geinterpoleerd"] = interpolate_interested_Tfreqs( + dist["gecombineerd"], Tfreqs=interp_freqs ) return dist @@ -457,7 +457,7 @@ def blend_distributions( ser_blended1 = ser_trend.iloc[:idx_maxfreq_trend].copy() ser_weibull = ser_weibull.loc[ser_weibull.index < ser_blended1.index[-1]].copy() - # Weibull to Hydra + # weibull to Hydra if ser_hydra is not None: ser_hydra = ser_hydra.sort_index(ascending=False) @@ -555,14 +555,14 @@ def plot_overschrijding(dist: dict): station = station_attrs[0] color_map = { - "Ongefilterd": "b", - "Gefilterd": "orange", - "Trendanalyse": "g", - "Weibull": "r", + "ongefilterd": "b", + "gefilterd": "orange", + "trendanalyse": "g", + "weibull": "r", "Hydra-NL": "m", "Hydra-NL met modelonzekerheid": "cyan", - "Gecombineerd": "k", - "Geinterpoleerd": "lime", + "gecombineerd": "k", + "geinterpoleerd": "lime", } fig, ax = plt.subplots(figsize=(8, 6)) @@ -572,9 +572,9 @@ def plot_overschrijding(dist: dict): c = color_map[k] else: c = None - if k == "Gecombineerd": + if k == "gecombineerd": ax.plot(dist[k], "--", label=k, c=c) - elif k == "Geinterpoleerd": + elif k == "geinterpoleerd": ax.plot(dist[k], "o", label=k, c=c, markersize=5) else: ax.plot(dist[k], label=k, c=c) @@ -584,7 +584,7 @@ def plot_overschrijding(dist: dict): ax.set_xscale("log") ax.set_xlim([1e-5, 1e3]) ax.invert_xaxis() - ax.set_ylabel("Waterlevel [m]") + ax.set_ylabel("water level [cm]") ax.legend(fontsize="medium", loc="lower right") ax.xaxis.set_minor_locator( ticker.LogLocator( @@ -592,13 +592,9 @@ def plot_overschrijding(dist: dict): ) ) ax.xaxis.set_minor_formatter(ticker.NullFormatter()), - ax.yaxis.set_minor_locator( - ticker.MultipleLocator(0.1) - ) # this was 10, but now meters instead of cm + ax.yaxis.set_minor_locator(ticker.MultipleLocator(10)) # centimeters ax.yaxis.set_minor_formatter(ticker.NullFormatter()), - ax.yaxis.set_major_formatter( - ticker.FormatStrFormatter("%.2f") - ) # to force 2 decimal places + ax.yaxis.set_major_formatter(ticker.FormatStrFormatter("%.2f")) # 2 decimal places ax.grid(visible=True, which="major"), ax.grid(visible=True, which="minor", ls=":") ax.set_axisbelow(True) fig.tight_layout() diff --git a/kenmerkendewaarden/slotgemiddelden.py b/kenmerkendewaarden/slotgemiddelden.py index 04fd3f0..5ea5680 100644 --- a/kenmerkendewaarden/slotgemiddelden.py +++ b/kenmerkendewaarden/slotgemiddelden.py @@ -48,7 +48,8 @@ def calc_slotgemiddelden( Returns ------- slotgemiddelden_dict : dict - dictionary with yearly means and model fits, optionally also for extremes. + dictionary with yearly means and model fits, optionally also for extremes + and corresponding tidal range. """ # initialize dict @@ -63,9 +64,6 @@ def calc_slotgemiddelden( # calculate yearly means dict_wltidalindicators = calc_wltidalindicators(df_meas, min_coverage=min_coverage) wl_mean_peryear = dict_wltidalindicators["wl_mean_peryear"] - # convert periodindex to datetimeindex - # TODO: alternatively let fit_models support periodindex - # wl_mean_peryear.index = wl_mean_peryear.index.to_timestamp() slotgemiddelden_dict["wl_mean_peryear"] = wl_mean_peryear # clip part of mean timeseries before physical break to supply to model @@ -93,21 +91,24 @@ def calc_slotgemiddelden( ) HW_mean_peryear = dict_HWLWtidalindicators["HW_mean_peryear"] LW_mean_peryear = dict_HWLWtidalindicators["LW_mean_peryear"] - # HW_mean_peryear.index = HW_mean_peryear.index.to_timestamp() - # LW_mean_peryear.index = LW_mean_peryear.index.to_timestamp() + tidalrange_mean_peryear = HW_mean_peryear - LW_mean_peryear slotgemiddelden_dict["HW_mean_peryear"] = HW_mean_peryear slotgemiddelden_dict["LW_mean_peryear"] = LW_mean_peryear + slotgemiddelden_dict["tidalrange_mean_peryear"] = tidalrange_mean_peryear # clip part of mean timeseries before physical break to supply to model if clip_physical_break: HW_mean_peryear = clip_timeseries_physical_break(HW_mean_peryear) LW_mean_peryear = clip_timeseries_physical_break(LW_mean_peryear) + tidalrange_mean_peryear = clip_timeseries_physical_break(tidalrange_mean_peryear) # fit linear models over yearly mean values pred_pd_HW = fit_models(HW_mean_peryear, with_nodal=with_nodal) pred_pd_LW = fit_models(LW_mean_peryear, with_nodal=with_nodal) + pred_pd_tidalrange = fit_models(tidalrange_mean_peryear, with_nodal=with_nodal) slotgemiddelden_dict["HW_model_fit"] = pred_pd_HW slotgemiddelden_dict["LW_model_fit"] = pred_pd_LW + slotgemiddelden_dict["tidalrange_model_fit"] = pred_pd_tidalrange return slotgemiddelden_dict @@ -170,7 +171,7 @@ def plot_slotgemiddelden( ".k", label=f"slotgemiddelde for {slotgem_time_value.index.year[0]}", ) - + # plot timeseries of average extremes if slotgemiddelden_dict_all is not None: # compare station attributes @@ -198,8 +199,10 @@ def plot_slotgemiddelden( LW_model_fit = slotgemiddelden_dict["LW_model_fit"] ax.plot(HW_model_fit, ".-", color=cmap(0), label=None) ax.plot(LW_model_fit, ".-", color=cmap(0), label=None) + ax.plot(slotgemiddelden_dict["HW_model_fit"].iloc[[-1]],".k") + ax.plot(slotgemiddelden_dict["LW_model_fit"].iloc[[-1]],".k") - ax.set_ylabel("waterstand [m]") + ax.set_ylabel("water level [cm]") ax.set_title(f"yearly mean HW/wl/LW for {station}") ax.grid() ax.legend(loc=2) diff --git a/kenmerkendewaarden/tidalindicators.py b/kenmerkendewaarden/tidalindicators.py index 945cf11..1e728bf 100644 --- a/kenmerkendewaarden/tidalindicators.py +++ b/kenmerkendewaarden/tidalindicators.py @@ -291,7 +291,7 @@ def plot_tidalindicators(dict_indicators: dict): plot_pd_series(dict_indicators, ax) ax.grid() ax.legend(loc=1) - ax.set_ylabel("water level [m]") + ax.set_ylabel("water level [cm]") ax.set_title(f"tidal indicators for {station}") fig.tight_layout() return fig, ax diff --git a/tests/test_overschrijding.py b/tests/test_overschrijding.py index 95c7ee7..2bc00db 100644 --- a/tests/test_overschrijding.py +++ b/tests/test_overschrijding.py @@ -57,14 +57,14 @@ def test_calc_overschrijding(df_ext_12_2010_2014): ) expected_keys = [ - "Ongefilterd", - "Trendanalyse", - "Weibull", - "Gecombineerd", - "Geinterpoleerd", + "ongefilterd", + "trendanalyse", + "weibull", + "gecombineerd", + "geinterpoleerd", ] assert set(dist.keys()) == set(expected_keys) - assert np.allclose(dist["Geinterpoleerd"].index, Tfreqs_interested) + assert np.allclose(dist["geinterpoleerd"].index, Tfreqs_interested) expected_values = np.array( [ 1.93, @@ -79,7 +79,7 @@ def test_calc_overschrijding(df_ext_12_2010_2014): 4.00701551, ] ) - assert np.allclose(dist["Geinterpoleerd"].values, expected_values) + assert np.allclose(dist["geinterpoleerd"].values, expected_values) @pytest.mark.unittest @@ -110,14 +110,14 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014): expected_keys = [ "Hydra-NL", - "Ongefilterd", - "Trendanalyse", - "Weibull", - "Gecombineerd", - "Geinterpoleerd", + "ongefilterd", + "trendanalyse", + "weibull", + "gecombineerd", + "geinterpoleerd", ] assert set(dist.keys()) == set(expected_keys) - assert np.allclose(dist["Geinterpoleerd"].index, Tfreqs_interested) + assert np.allclose(dist["geinterpoleerd"].index, Tfreqs_interested) expected_values = np.array( [ 1.93, @@ -132,7 +132,7 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014): 4.3095, ] ) - assert np.allclose(dist["Geinterpoleerd"].values, expected_values) + assert np.allclose(dist["geinterpoleerd"].values, expected_values) @pytest.mark.unittest @@ -157,14 +157,14 @@ def test_calc_overschrijding_rule_type_break(df_ext_12_2010_2014): ) expected_keys = [ - "Ongefilterd", - "Trendanalyse", - "Weibull", - "Gecombineerd", - "Geinterpoleerd", + "ongefilterd", + "trendanalyse", + "weibull", + "gecombineerd", + "geinterpoleerd", ] assert set(dist.keys()) == set(expected_keys) - assert np.allclose(dist["Geinterpoleerd"].index, Tfreqs_interested) + assert np.allclose(dist["geinterpoleerd"].index, Tfreqs_interested) expected_values = np.array( [ 1.93, @@ -179,7 +179,7 @@ def test_calc_overschrijding_rule_type_break(df_ext_12_2010_2014): 3.90661043, ] ) - assert np.allclose(dist["Geinterpoleerd"].values, expected_values) + assert np.allclose(dist["geinterpoleerd"].values, expected_values) @pytest.mark.unittest @@ -239,7 +239,7 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014): ] ) assert np.allclose( - dist_normal["Geinterpoleerd"].values, expected_values_normal + dist_normal["geinterpoleerd"].values, expected_values_normal ) expected_values_clip = np.array( [ @@ -256,7 +256,7 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014): ] ) assert np.allclose( - dist_clip["Geinterpoleerd"].values, expected_values_clip + dist_clip["geinterpoleerd"].values, expected_values_clip ) diff --git a/tests/test_slotgemiddelden.py b/tests/test_slotgemiddelden.py index e87054f..b89f6bd 100644 --- a/tests/test_slotgemiddelden.py +++ b/tests/test_slotgemiddelden.py @@ -61,21 +61,22 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): "LW_mean_peryear", "HW_model_fit", "LW_model_fit", + "tidalrange_mean_peryear", + "tidalrange_model_fit", ] expected_keys_noext = ["wl_mean_peryear", "wl_model_fit"] assert set(slotgemiddelden_dict_inclext.keys()) == set(expected_keys_inclext) assert set(slotgemiddelden_dict_noext.keys()) == set(expected_keys_noext) # assertion of values - wl_mean_peryear_expected = np.array( - [0.07960731, 0.08612119, 0.0853051, 0.07010864, 0.10051922] - ) - hw_mean_peryear_expected = np.array( - [1.13968839, 1.12875177, 1.13988685, 1.1415461, 1.18998584] - ) - lw_mean_peryear_expected = np.array( - [-0.60561702, -0.59089362, -0.59342291, -0.61334278, -0.58024113] - ) + wl_mean_peryear_expected = np.array([0.07960731, 0.08612119, 0.0853051, + 0.07010864, 0.10051922]) + hw_mean_peryear_expected = np.array([1.13968839, 1.12875177, 1.13988685, + 1.1415461, 1.18998584]) + lw_mean_peryear_expected = np.array([-0.60561702, -0.59089362, -0.59342291, + -0.61334278, -0.58024113]) + range_mean_peryear_expected = np.array([1.74530541, 1.71964539, 1.73330976, + 1.75488888, 1.77022697]) assert np.allclose( slotgemiddelden_dict_inclext["wl_mean_peryear"].values, wl_mean_peryear_expected ) @@ -85,16 +86,19 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): assert np.allclose( slotgemiddelden_dict_inclext["LW_mean_peryear"].values, lw_mean_peryear_expected ) - - wl_model_fit_expected = np.array( - [0.0141927, 0.08612119, 0.0853051, 0.07010864, 0.10051922, 0.23137634] - ) - hw_model_fit_expected = np.array( - [1.05295416, 1.12875177, 1.13988685, 1.1415461, 1.18998584, 1.336182] - ) - lw_model_fit_expected = np.array( - [-0.67420399, -0.59089362, -0.59342291, -0.61334278, -0.58024113, -0.42969074] - ) + assert np.allclose( + slotgemiddelden_dict_inclext["tidalrange_mean_peryear"].values, + range_mean_peryear_expected + ) + + wl_model_fit_expected = np.array([0.0141927, 0.08612119, 0.0853051, + 0.07010864, 0.10051922, 0.23137634]) + hw_model_fit_expected = np.array([1.05295416, 1.12875177, 1.13988685, + 1.1415461, 1.18998584, 1.336182]) + lw_model_fit_expected = np.array([-0.67420399, -0.59089362, -0.59342291, + -0.61334278, -0.58024113, -0.42969074]) + range_model_fit_expected = np.array([1.72715816, 1.71964539, 1.73330976, + 1.75488888, 1.77022697, 1.76587273]) assert np.allclose( slotgemiddelden_dict_inclext["wl_model_fit"].values, wl_model_fit_expected ) @@ -104,6 +108,10 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): assert np.allclose( slotgemiddelden_dict_inclext["LW_model_fit"].values, lw_model_fit_expected ) + assert np.allclose( + slotgemiddelden_dict_inclext["tidalrange_model_fit"].values, + range_model_fit_expected + ) @pytest.mark.unittest