140 adjustments for dashboard (#142)

* updated names of outputfiles * add slotgemiddelde hw/lw to plot (and csv writing) * export station locations csv * fixed tests * add tidalrange to slotgemiddelden * updated readme * updated whatsnew
Deltares-research · Sep 12, 2024 · 00f67fb · 00f67fb
1 parent f792586
commit 00f67fb
Show file tree

Hide file tree

Showing 12 changed files with 162 additions and 135 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,6 @@ De methodieken in deze repository hebben nog geen definitieve status en zijn daa
 ## installation
 - download Miniforge3 from [the miniforge github](https://github.com/conda-forge/miniforge?tab=readme-ov-file#download) and install it with the recommended settings.
 - open Miniforge Prompt
-- `conda create --name kw_env python=3.11 git -y`
+- `conda create --name kw_env python -y`
 - `conda activate kw_env`
-- `pip install git+https://github.com/Deltares-research/kenmerkendewaarden`
+- `pip install kenmerkendewaarden`
diff --git a/docs/whats-new.md b/docs/whats-new.md
@@ -2,6 +2,10 @@
 
 ## UNRELEASED
 
+### Feat
+- from meters to centimeters and other updates for dashboard in [#142](https://github.com/Deltares-research/kenmerkendewaarden/pull/142)
+- added tidalrange to slotgemiddelden in [#142](https://github.com/Deltares-research/kenmerkendewaarden/pull/142)
+
 ### Fix
 - accomodate updated astrog datetime handling in hatyan 2.9.0 in [#141](https://github.com/Deltares-research/kenmerkendewaarden/pull/141)
 

diff --git a/examples/KWK_getcheckdata.py b/examples/KWK_getcheckdata.py
@@ -22,6 +22,7 @@
 derive_stats = False
 plot_meas = False
 plot_stations = False
+write_stations_table = False
 
 start_date = pd.Timestamp(1870, 1, 1, tz="UTC+01:00")
 end_date = pd.Timestamp(2024, 1, 1, tz="UTC+01:00")
@@ -137,3 +138,13 @@
     fig, ax = kw.plot_stations(station_list=station_list_map, crs=28992, add_labels=False)
     fig.savefig(os.path.join(dir_base,'stations_map.png'), dpi=200)
 
+
+
+### WRITE CSV WITH STATION CODE/X/Y/EPSG
+if write_stations_table:
+    # TODO: consider making retrieve_catalog public
+    from kenmerkendewaarden.data_retrieve import retrieve_catalog
+    locs_meas_ts_all, _, _ = retrieve_catalog(crs=4326)
+    locs_ts = locs_meas_ts_all.loc[locs_meas_ts_all.index.isin(station_list)]
+    file_csv = os.path.join(dir_base, "station_locations.csv")
+    locs_ts[["Locatie_MessageID","X","Y","Coordinatenstelsel","Naam"]].to_csv(file_csv)
diff --git a/examples/KWK_process.py b/examples/KWK_process.py
@@ -29,9 +29,9 @@
 dir_meas = os.path.join(dir_base,'measurements_wl_18700101_20240101')
 
 dir_indicators = os.path.join(dir_base,f'out_tidalindicators_{year_slotgem}')
-dir_slotgem = os.path.join(dir_base,f'out_slotgem_{year_slotgem}')
+dir_slotgem = os.path.join(dir_base,f'out_slotgemiddelden_{year_slotgem}')
 dir_havget = os.path.join(dir_base,f'out_havengetallen_{year_slotgem}')
-dir_gemgetij = os.path.join(dir_base,f'out_gemgetij_{year_slotgem}')
+dir_gemgetij = os.path.join(dir_base,f'out_gemiddeldgetij_{year_slotgem}')
 dir_overschrijding = os.path.join(dir_base,f'out_overschrijding_{year_slotgem}')
 os.makedirs(dir_indicators, exist_ok=True)
 os.makedirs(dir_slotgem, exist_ok=True)
@@ -86,7 +86,7 @@
                                        nap_correction=nap_correction, drop_duplicates=drop_duplicates)
     if df_meas_all is not None:
         #crop measurement data
-        df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))#,onlyfull=False)
+        df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))
 
     # extremes are used for slotgemiddelden, havengetallen, overschrijding
     df_ext_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
@@ -116,12 +116,14 @@
         # merge dictionaries
         dict_wltidalindicators.update(dict_HWLWtidalindicators)
 
-        # csv for monthly indicators
-        dict_wltidalindicators['wl_mean_permonth'].to_csv(os.path.join(dir_indicators,f'meanwl_permonth_{current_station}.txt'))
+        # csv for yearlymonthly indicators
+        for key in ['wl_mean_peryear','wl_mean_permonth']:
+            file_csv = os.path.join(dir_indicators, f'kw{year_slotgem}-{key}-{current_station}.csv')
+            dict_wltidalindicators[key].to_csv(file_csv, float_format='%.3f')
 
         # plot
         fig, ax = kw.plot_tidalindicators(dict_wltidalindicators)
-        fig.savefig(os.path.join(dir_indicators,f'tidal_indicators_{current_station}'))
+        fig.savefig(os.path.join(dir_indicators,f'kw{year_slotgem}-tidalindicators-{current_station}.png'))
 
 
 
@@ -146,11 +148,15 @@
         fig1, ax1 = kw.plot_slotgemiddelden(slotgemiddelden_valid, slotgemiddelden_all)
         ax1.set_xlim(fig_alltimes_ext)
 
-        # plot and write slotgemiddelde value (for waterlevels only), the slotgemiddelde is the last value of the model fit
-        slotgemiddelden_valid['HW_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanHW_{current_station}.txt'))
-        slotgemiddelden_valid['LW_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanLW_{current_station}.txt'))
-        slotgemiddelden_valid['wl_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanwl_{current_station}.txt'))
-        slotgemiddelden_valid['wl_model_fit'].to_csv(os.path.join(dir_slotgem,f'modelfit_{current_station}.txt'))
+        # write slotgemiddelden to csv, the slotgemiddelde is the last value of the model fit
+        key_list = ["wl_mean_peryear", "wl_model_fit",
+                    "HW_mean_peryear", "HW_model_fit",
+                    "LW_mean_peryear", "LW_model_fit",
+                    "tidalrange_mean_peryear", "tidalrange_model_fit",
+                    ]
+        for key in key_list:
+            file_csv = os.path.join(dir_slotgem, f'kw{year_slotgem}-{key}-{current_station}.csv')
+            slotgemiddelden_valid[key].to_csv(file_csv, float_format='%.3f')
 
         # get and plot validation timeseries (yearly mean wl/HW/LW)
         station_name_dict = {'HOEKVHLD':'hoek',
@@ -160,15 +166,15 @@
             file_yearmeanHW = os.path.join(dir_meas_gemHWLWwlAB,f'{station_name_dict[current_station]}_hw.txt')
             file_yearmeanLW = os.path.join(dir_meas_gemHWLWwlAB,f'{station_name_dict[current_station]}_lw.txt')
             file_yearmeanwl = os.path.join(dir_meas_gemHWLWwlAB,f'{station_name_dict[current_station]}_Z.txt')
-            yearmeanHW = pd.read_csv(file_yearmeanHW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')/100
-            yearmeanLW = pd.read_csv(file_yearmeanLW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')/100
-            yearmeanwl = pd.read_csv(file_yearmeanwl, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')/100
+            yearmeanHW = pd.read_csv(file_yearmeanHW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')
+            yearmeanLW = pd.read_csv(file_yearmeanLW, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')
+            yearmeanwl = pd.read_csv(file_yearmeanwl, sep='\\s+', skiprows=1, names=['datetime','values'], parse_dates=['datetime'], na_values=-999.9, index_col='datetime')
             ax1.plot(yearmeanHW['values'],'+g', zorder=0)
             ax1.plot(yearmeanLW['values'],'+g', zorder=0)
             ax1.plot(yearmeanwl['values'],'+g',label='yearmean validation', zorder=0)
             ax1.legend(loc=2)
 
-        fig1.savefig(os.path.join(dir_slotgem,f'yearly_values_{current_station}'))
+        fig1.savefig(os.path.join(dir_slotgem,f'kw{year_slotgem}-slotgemiddelden-{current_station}'))
 
 
 
@@ -180,14 +186,15 @@
 
         # plot hwlw per timeclass including median
         fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen)
-        fig.savefig(os.path.join(dir_havget,f'HWLW_pertijdsklasse_inclmedianline_{current_station}'))
+        fig.savefig(os.path.join(dir_havget,f'kw{year_slotgem}-HWLW_pertijdsklasse-{current_station}.png'))
 
         # plot aardappelgrafiek
         fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen=df_havengetallen)
-        fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}'))
+        fig.savefig(os.path.join(dir_havget, f'kw{year_slotgem}-aardappelgrafiek-{current_station}.png'))
 
         #write to csv
-        df_havengetallen.to_csv(os.path.join(dir_havget, f'havengetallen_{year_slotgem}_{current_station}.csv'),float_format='%.3f')
+        file_csv = os.path.join(dir_havget, f'kw{year_slotgem}-havengetallen-{current_station}.csv')
+        df_havengetallen.to_csv(file_csv, float_format='%.3f')
 
 
 
@@ -208,35 +215,24 @@
                                                    freq=pred_freq, nb=0, nf=4, 
                                                    scale_extremes=True, scale_period=True)
 
+        # TODO: the shape of the validation lines are different, so compare krommes to gele boekje instead
+        # p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211
         fig, ax = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, tick_hours=6)
-
-        # plot validation lines if available
-        # TODO: the shape is different, so compare krommes to gele boekje instead of validation data
-        dir_vali_krommen = r'p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211'
-        for tidaltype in ["gemgetij","springtij","doodtij"]:
-            file_vali_getijkromme = os.path.join(dir_vali_krommen,f'{tidaltype}kromme_{current_station}_havengetallen{year_slotgem}.csv')
-            if not os.path.exists(file_vali_getijkromme):
-                continue
-            df_vali_getij = pd.read_csv(file_vali_getijkromme, index_col=0, parse_dates=True)
-            # convert from datetimes to timedeltas to get it in the same plot (we used datetimes before)
-            df_vali_getij.index = df_vali_getij.index - df_vali_getij.index[0]
-            ax.plot(df_vali_getij['Water Level [m]'], color='grey', zorder=0, label=f'validation KW2020 {tidaltype}')
-        ax.legend(loc=4)
-        fig.savefig(os.path.join(dir_gemgetij,f'gemgetij_trefHW_{current_station}'))
+        fig.savefig(os.path.join(dir_gemgetij,f'kw{year_slotgem}-gemiddeldgetij-{current_station}.png'))
 
         # write corrected timeseries to csv files
         # TODO: better representation of negative timedeltas requested in https://github.com/pandas-dev/pandas/issues/17232#issuecomment-2205579156, maybe convert timedeltaIndex to minutes instead?
-        for key in gemgetij_corr.keys():
-            file_csv = os.path.join(dir_gemgetij, f'Getijkromme_{key}_{current_station}_slotgem{year_slotgem}.csv')
+        for key in ['mean', 'spring', 'neap']:
+            file_csv = os.path.join(dir_gemgetij, f'kw{year_slotgem}-gemiddeldgetij_{key}-{current_station}.csv')
             gemgetij_corr[key].to_csv(file_csv, float_format='%.3f')
 
         # plot BOI figure and compare to KW2020
         fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, tick_hours=12)
-        fig_boi.savefig(os.path.join(dir_gemgetij,f'gemspringdoodtijkromme_BOI_{current_station}_slotgem{year_slotgem}.png'))
+        fig_boi.savefig(os.path.join(dir_gemgetij,f'kw{year_slotgem}-gemiddeldgetij_BOI-{current_station}.png'))
 
         # write BOI timeseries to csv files
-        for key in gemgetij_corr_boi.keys():
-            file_boi_csv = os.path.join(dir_gemgetij, f'Getijkromme_BOI_{key}_{current_station}_slotgem{year_slotgem}.csv')
+        for key in ['mean', 'spring', 'neap']:
+            file_boi_csv = os.path.join(dir_gemgetij, f'kw{year_slotgem}-gemiddeldgetij_BOI_{key}-{current_station}.csv')
             gemgetij_corr_boi[key].to_csv(file_boi_csv, float_format='%.3f')
 
 
@@ -262,7 +258,7 @@ def initiate_dist_with_hydra_nl(station):
             df_hydra_nl = pd.read_table(file_hydra_nl, encoding='latin-1', decimal=',', header=0)
             df_hydra_nl.index = 1/df_hydra_nl['Terugkeertijd [jaar]']
             df_hydra_nl.index.name = 'frequency'
-            df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]']
+            df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]'] * 100
             df_hydra_nl = df_hydra_nl[['values']]
             df_hydra_nl.attrs['station'] = station
             dist_dict['Hydra-NL'] = df_hydra_nl['values']
@@ -276,7 +272,6 @@ def add_validation_dist(dist_dict, dist_type, station):
         file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station_names_vali_dict[station]}.csv')
         df_validation = pd.read_csv(file_validation, sep=';')
         df_validation = df_validation.rename({"value":"values"},axis=1)
-        df_validation['values'] /= 100
         df_validation = df_validation.set_index("value_Tfreq", drop=True)
         df_validation.index.name = 'frequency'
         df_validation.attrs['station'] = station
@@ -297,18 +292,18 @@ def add_validation_dist(dist_dict, dist_type, station):
                                           clip_physical_break=True, dist=dist_exc_hydra,
                                           interp_freqs=freqs_interested)
         add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
-        dist_exc['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'))
+        dist_exc['geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'kw{year_slotgem}-exceedance-{current_station}.csv'))
 
         fig, ax = kw.plot_overschrijding(dist_exc)
-        ax.set_ylim(0,5.5)
-        fig.savefig(os.path.join(dir_overschrijding, f'Exceedance_lines_{current_station}.png'))
+        ax.set_ylim(0,550)
+        fig.savefig(os.path.join(dir_overschrijding, f'kw{year_slotgem}-exceedance-{current_station}.png'))
 
         # 2. Deceedance
         dist_dec = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None, 
                                           clip_physical_break=True, inverse=True,
                                           interp_freqs=freqs_interested)
         add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
-        dist_dec['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'))
+        dist_dec['geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'kw{year_slotgem}-deceedance-{current_station}.csv'))
 
         fig, ax = kw.plot_overschrijding(dist_dec)
-        fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png'))
+        fig.savefig(os.path.join(dir_overschrijding, f'kw{year_slotgem}-deceedance-{current_station}.png'))
diff --git a/kenmerkendewaarden/data_retrieve.py b/kenmerkendewaarden/data_retrieve.py
@@ -429,6 +429,8 @@ def read_measurements(
         return ds_meas
 
     df_meas = xarray_to_hatyan(ds_meas)
+    # back to centimeters
+    df_meas['values'] *= 100
 
     if drop_duplicates:
         df_meas = drop_duplicate_times(df_meas)

diff --git a/kenmerkendewaarden/gemiddeldgetij.py b/kenmerkendewaarden/gemiddeldgetij.py
@@ -345,7 +345,8 @@ def plot_gemiddeldgetij(
     ax.legend(loc=4)
     ax.grid()
     ax.set_xlabel("time since high water")
-
+    ax.set_ylabel("water level [cm]")
+
     # fix timedelta ticks
     ax.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter_improved())
     # put ticks at intervals of multiples of 3 and 6, resulting in whole seconds

diff --git a/kenmerkendewaarden/havengetallen.py b/kenmerkendewaarden/havengetallen.py
@@ -314,6 +314,13 @@ def plot_HWLW_pertimeclass(df_ext: pd.DataFrame, df_havengetallen: pd.DataFrame)
     )
     ax4.plot(HWLW_culmhr_summary["LW_delay_median"].dt.total_seconds() / 3600, ".-")
     ax4.set_xlim([0 - 0.5, 12 - 0.5])
+
+    ax1.set_ylabel("water level [cm]")
+    ax2.set_ylabel("water level [cm]")
+    ax3.set_ylabel("time delay [hours]")
+    ax4.set_ylabel("time delay [hours]")
+    ax3.set_xlabel("culmination time class [hours]")
+    ax4.set_xlabel("culmination time class [hours]")
     fig.tight_layout()
     axs = np.array(((ax1, ax2), (ax3, ax4)))
 
@@ -342,8 +349,8 @@ def plot_aardappelgrafiek(df_havengetallen: pd.DataFrame):
 
     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7.5, 4), sharex=False)
     ax1.set_title("HW")
-    ax1.set_xlabel("maansverloop in uu:mm:ss")
-    ax1.set_ylabel("waterstand in m t.o.v. NAP")
+    ax1.set_xlabel("maansverloop [uu:mm:ss]")
+    ax1.set_ylabel("waterstand [cm]")
     ax1.plot(
         HWLW_culmhr_summary["HW_delay_median"],
         HWLW_culmhr_summary["HW_values_median"],
@@ -352,8 +359,8 @@ def plot_aardappelgrafiek(df_havengetallen: pd.DataFrame):
     ax1.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter_improved())
     ax1.grid()
     ax2.set_title("LW")
-    ax2.set_xlabel("maansverloop in uu:mm:ss")
-    ax2.set_ylabel("waterstand in m t.o.v. NAP")
+    ax2.set_xlabel("maansverloop [uu:mm:ss]")
+    ax2.set_ylabel("waterstand [cm]")
     ax2.plot(
         HWLW_culmhr_summary["LW_delay_median"],
         HWLW_culmhr_summary["LW_values_median"],
@@ -379,7 +386,7 @@ def plot_aardappelgrafiek(df_havengetallen: pd.DataFrame):
     ax1_ylimmean = np.mean(ax1.get_ylim())
     ax2_ylimmean = np.mean(ax2.get_ylim())
     xlimrange = 2 * 3600e9  # in nanoseconds
-    ylimrange = 1
+    ylimrange = 100  # in centimeters
     ax1.set_xlim([ax1_xlimmean - xlimrange / 2, ax1_xlimmean + xlimrange / 2])
     ax2.set_xlim([ax2_xlimmean - xlimrange / 2, ax2_xlimmean + xlimrange / 2])
     ax1.set_ylim([ax1_ylimmean - ylimrange / 2, ax1_ylimmean + ylimrange / 2])