Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

40 avoid dropping timezone of measurement dataframe #41

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- update dependencies and code to hatyan 2.8.0 in [#28](https://github.com/Deltares-research/kenmerkendewaarden/pull/28)
- added neaptide tidal indicators for extremes in [#34](https://github.com/Deltares-research/kenmerkendewaarden/pull/34)
- used threshold frequency instead of fixed index in `kw.overschrijding.blend_distributions` in [#38](https://github.com/Deltares-research/kenmerkendewaarden/pull/38)
- dropped timezones consistently in `kw.calc_wltidalindicators()` and `kw.calc_HWLWtidalindicators()` to increase performance [#41](https://github.com/Deltares-research/kenmerkendewaarden/pull/41)


## 0.1.0 (2024-03-11)
Expand Down
41 changes: 23 additions & 18 deletions kenmerkendewaarden/tidalindicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ def calc_HWLWtidalindicators(data_pd_HWLW_all, min_count=None):
DESCRIPTION.

"""
# dropping the timezone makes the code below much faster and gives equal results: https://github.com/pandas-dev/pandas/issues/58956
if data_pd_HWLW_all.index.tz is not None:
data_pd_HWLW_all = data_pd_HWLW_all.tz_localize(None)

if len(data_pd_HWLW_all['HWLWcode'].unique()) > 2: #aggers are present
# TODO: this drops first value if it is a 3/4/5 LW, should be fixed: https://github.com/Deltares/hatyan/issues/311
data_pd_HWLW_12 = calc_HWLW12345to12(data_pd_HWLW_all) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
Expand All @@ -43,18 +47,18 @@ def calc_HWLWtidalindicators(data_pd_HWLW_all, min_count=None):
data_pd_LW = data_pd_HWLW_12.loc[data_pd_HWLW_12['HWLWcode']==2]

#count HWLW values per year/month
HWLW_count_peryear = data_pd_HWLW_12.groupby(pd.PeriodIndex(data_pd_HWLW_12.index, freq="y"))['values'].count()
HWLW_count_permonth = data_pd_HWLW_12.groupby(pd.PeriodIndex(data_pd_HWLW_12.index, freq="m"))['values'].count()
HWLW_count_peryear = data_pd_HWLW_12.groupby(pd.PeriodIndex(data_pd_HWLW_12.index, freq="Y"))['values'].count()
HWLW_count_permonth = data_pd_HWLW_12.groupby(pd.PeriodIndex(data_pd_HWLW_12.index, freq="M"))['values'].count()

#yearmean HWLW from HWLW values #maybe also add *_mean_permonth
HW_mean_peryear = data_pd_HW.groupby(pd.PeriodIndex(data_pd_HW.index, freq="y"))[['values']].mean()
LW_mean_peryear = data_pd_LW.groupby(pd.PeriodIndex(data_pd_LW.index, freq="y"))[['values']].mean()
HW_mean_peryear = data_pd_HW.groupby(pd.PeriodIndex(data_pd_HW.index, freq="Y"))[['values']].mean()
LW_mean_peryear = data_pd_LW.groupby(pd.PeriodIndex(data_pd_LW.index, freq="Y"))[['values']].mean()

#derive GHHW/GHWS (gemiddeld hoogwater springtij) per month
HW_monthmax_permonth = data_pd_HW.groupby(pd.PeriodIndex(data_pd_HW.index, freq="m"))[['values']].max() #proxy for HW at spring tide
LW_monthmin_permonth = data_pd_LW.groupby(pd.PeriodIndex(data_pd_LW.index, freq="m"))[['values']].min() #proxy for LW at spring tide
HW_monthmin_permonth = data_pd_HW.groupby(pd.PeriodIndex(data_pd_HW.index, freq="m"))[['values']].min() #proxy for HW at neap tide
LW_monthmax_permonth = data_pd_LW.groupby(pd.PeriodIndex(data_pd_LW.index, freq="m"))[['values']].max() #proxy for LW at neap tide
HW_monthmax_permonth = data_pd_HW.groupby(pd.PeriodIndex(data_pd_HW.index, freq="M"))[['values']].max() #proxy for HW at spring tide
LW_monthmin_permonth = data_pd_LW.groupby(pd.PeriodIndex(data_pd_LW.index, freq="M"))[['values']].min() #proxy for LW at spring tide
HW_monthmin_permonth = data_pd_HW.groupby(pd.PeriodIndex(data_pd_HW.index, freq="M"))[['values']].min() #proxy for HW at neap tide
LW_monthmax_permonth = data_pd_LW.groupby(pd.PeriodIndex(data_pd_LW.index, freq="M"))[['values']].max() #proxy for LW at neap tide

#replace invalids with nan (in case of too less values per month or year)
if min_count is not None:
Expand All @@ -67,10 +71,10 @@ def calc_HWLWtidalindicators(data_pd_HWLW_all, min_count=None):
LW_monthmax_permonth.loc[HWLW_count_permonth<min_count_permonth] = np.nan

#derive GHHW/GHWS (gemiddeld hoogwater springtij)
HW_monthmax_mean_peryear = HW_monthmax_permonth.groupby(pd.PeriodIndex(HW_monthmax_permonth.index, freq="y"))[['values']].mean()
LW_monthmin_mean_peryear = LW_monthmin_permonth.groupby(pd.PeriodIndex(LW_monthmin_permonth.index, freq="y"))[['values']].mean()
HW_monthmin_mean_peryear = HW_monthmin_permonth.groupby(pd.PeriodIndex(HW_monthmin_permonth.index, freq="y"))[['values']].mean()
LW_monthmax_mean_peryear = LW_monthmax_permonth.groupby(pd.PeriodIndex(LW_monthmax_permonth.index, freq="y"))[['values']].mean()
HW_monthmax_mean_peryear = HW_monthmax_permonth.groupby(pd.PeriodIndex(HW_monthmax_permonth.index, freq="Y"))[['values']].mean()
LW_monthmin_mean_peryear = LW_monthmin_permonth.groupby(pd.PeriodIndex(LW_monthmin_permonth.index, freq="Y"))[['values']].mean()
HW_monthmin_mean_peryear = HW_monthmin_permonth.groupby(pd.PeriodIndex(HW_monthmin_permonth.index, freq="Y"))[['values']].mean()
LW_monthmax_mean_peryear = LW_monthmax_permonth.groupby(pd.PeriodIndex(LW_monthmax_permonth.index, freq="Y"))[['values']].mean()

dict_HWLWtidalindicators = {'HW_mean':data_pd_HW['values'].mean(), #GHW
'LW_mean':data_pd_LW['values'].mean(), #GLW
Expand Down Expand Up @@ -109,16 +113,17 @@ def calc_wltidalindicators(data_wl_pd, min_count=None):
DESCRIPTION.

"""
if hasattr(data_wl_pd.index[0],'tz'): #timezone present in index
data_wl_pd.index = data_wl_pd.index.tz_localize(None)
# dropping the timezone makes the code below much faster and gives equal results: https://github.com/pandas-dev/pandas/issues/58956
if data_wl_pd.index.tz is not None:
data_wl_pd = data_wl_pd.tz_localize(None)

#count wl values per year/month
wl_count_peryear = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="y"))['values'].count()
wl_count_permonth = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="m"))['values'].count()
wl_count_peryear = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="Y"))['values'].count()
wl_count_permonth = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="M"))['values'].count()

#yearmean wl from wl values
wl_mean_peryear = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="y"))[['values']].mean()
wl_mean_permonth = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="m"))[['values']].mean()
wl_mean_peryear = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="Y"))[['values']].mean()
wl_mean_permonth = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="M"))[['values']].mean()

#replace invalids with nan (in case of too less values per month or year)
if min_count is not None:
Expand Down
105 changes: 105 additions & 0 deletions tests/test_tidalindicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hatyan
import kenmerkendewaarden as kw
import numpy as np
import pandas as pd

dir_tests = os.path.dirname(__file__) #F9 doesnt work, only F5 (F5 also only method to reload external definition scripts)
dir_testdata = os.path.join(dir_tests,'testdata')
Expand All @@ -20,3 +21,107 @@ def test_calc_HWLWtidalrange():
vals_expected = np.array([4. , 4. , 4.1 , 4.1 , 3.77, 3.77, 3.89, 3.89, 3.5 , 3.5 ])
assert len(ranges) == 1411
assert np.allclose(ranges[-10:], vals_expected)


@pytest.fixture(scope="session")
def prediction():
comp = pd.DataFrame({"A": [1, 0.5, 0.2],
"phi_deg": [10,15,20]},
index=["M2","M4","S2"])
comp.attrs["nodalfactors"] = True
comp.attrs["fu_alltimes"] = True
comp.attrs["xfac"] = False
comp.attrs["source"] = "schureman"
comp.attrs["tzone"] = None # TODO: required but this is a bug: https://github.com/Deltares/hatyan/issues/317
dtindex = pd.date_range("2020-01-01","2024-01-01", freq="10min")
prediction = hatyan.prediction(comp, times=dtindex)
return prediction


@pytest.fixture(scope="session")
def prediction_extremes(prediction):
prediction_extremes = hatyan.calc_HWLW(prediction)
return prediction_extremes


@pytest.mark.unittest
def test_calc_HWLWtidalindicators(prediction):
wl_stats = kw.calc_wltidalindicators(prediction)
wl_stats_tzone = kw.calc_wltidalindicators(prediction.tz_localize("UTC+01:00"))

expected_keys = ['wl_mean_peryear', 'wl_mean_permonth']
for key in expected_keys:
assert key in wl_stats.keys()
assert (wl_stats[key] == wl_stats_tzone[key]).all()

wl_mean_peryear_expected = np.array([-1.74201105e-04, 4.19964454e-04, -6.46875301e-05, -2.36333457e-04,
-5.63995348e-01])
wl_mean_permonth_expected = np.array([ 9.62018771e-04, -3.31675667e-04, 8.53396688e-04, 3.16123043e-04,
1.13262094e-03, 3.56346658e-04, 4.53580440e-04, -1.14071322e-03,
-7.03490275e-04, -2.39556946e-03, -6.59549426e-04, -9.43460292e-04,
6.51602796e-04, -7.09547154e-04, 5.96494198e-04, 3.99839817e-04,
1.22594977e-03, 3.34308253e-04, 8.58938332e-04, 1.00068126e-03,
3.79093532e-04, 9.16718937e-04, 8.28879097e-05, -8.22305918e-04,
-2.17133133e-03, 2.47645095e-03, -2.13131823e-03, -7.85535462e-04,
-1.76888980e-03, -2.41317388e-04, 3.29503094e-04, 1.19024147e-03,
3.92011136e-04, 9.62169025e-04, 2.72527819e-04, 9.41814213e-04,
1.09355549e-03, -1.26423276e-03, 1.09647701e-03, 2.82808337e-04,
6.38054338e-05, -3.27907803e-04, -1.91306815e-03, -2.22220727e-03,
-5.20148544e-04, -4.85500468e-04, 1.82340166e-04, 1.09674573e-03,
-5.63995348e-01])
assert np.allclose(wl_stats['wl_mean_peryear'].values, wl_mean_peryear_expected)
assert np.allclose(wl_stats['wl_mean_permonth'].values, wl_mean_permonth_expected)


@pytest.mark.unittest
def test_calc_wltidalindicators(prediction_extremes):
ext_stats = kw.calc_HWLWtidalindicators(prediction_extremes)
ext_stats_tzone = kw.calc_HWLWtidalindicators(prediction_extremes.tz_localize("UTC+01:00"))
expected_keys = ['HW_mean', 'LW_mean',
'HW_mean_peryear', 'LW_mean_peryear',
'HW_monthmax_permonth', 'LW_monthmin_permonth',
'HW_monthmax_mean_peryear', 'LW_monthmin_mean_peryear',
'HW_monthmin_mean_peryear', 'LW_monthmax_mean_peryear']
for key in expected_keys:
assert key in ext_stats.keys()
assert (ext_stats[key] == ext_stats_tzone[key]).all()

assert np.isclose(ext_stats['HW_mean'], 1.4680606974545858)
assert np.isclose(ext_stats['LW_mean'],-0.8534702751276967)

hw_mean_peryear_expected = np.array([1.50039216, 1.47449849, 1.45577267, 1.44152448])
lw_mean_peryear_expected = np.array([-0.86997608, -0.85709345, -0.84652208, -0.84024267])
assert np.allclose(ext_stats['HW_mean_peryear'].values, hw_mean_peryear_expected)
assert np.allclose(ext_stats['LW_mean_peryear'].values, lw_mean_peryear_expected)

hw_monthmax_permonth_expected = np.array([1.70887357, 1.70693041, 1.70448206, 1.70152976, 1.69961557,
1.69838487, 1.69720287, 1.69551568, 1.69332416, 1.69062942,
1.68743283, 1.68434737, 1.6826634 , 1.6817192 , 1.68041369,
1.67860467, 1.67629303, 1.67347995, 1.67016685, 1.6691834 ,
1.66816725, 1.66664809, 1.66471394, 1.66280594, 1.66131054,
1.65951663, 1.65722138, 1.6549959 , 1.65389291, 1.65228867,
1.65079986, 1.64923816, 1.64786607, 1.64599259, 1.6437117 ,
1.6430369 , 1.64242114, 1.64186457, 1.6408059 , 1.63924574,
1.63718491, 1.63493972, 1.63476655, 1.63452784, 1.63386586,
1.63324442, 1.63212122, 1.63049693])
lw_monthmin_permonth_expected = np.array([-0.99242254, -0.99061194, -0.9894141 , -0.98883749, -0.98797913,
-0.98711698, -0.98601125, -0.98473314, -0.98292192, -0.98120697,
-0.9807385 , -0.97999275, -0.97917249, -0.97818554, -0.97691195,
-0.97590032, -0.97466971, -0.97314856, -0.972554 , -0.97224475,
-0.97166419, -0.97080684, -0.96966737, -0.96824058, -0.96617301,
-0.9655175 , -0.96534086, -0.96489778, -0.96418269, -0.96319016,
-0.96191492, -0.96035182, -0.95979141, -0.95945245, -0.95913155,
-0.95857315, -0.95774217, -0.95682775, -0.95604764, -0.95499088,
-0.95463796, -0.95469814, -0.95457444, -0.95353554, -0.95271051,
-0.95211143, -0.95124064, -0.95116319])
assert np.allclose(ext_stats['HW_monthmax_permonth'].values, hw_monthmax_permonth_expected)
assert np.allclose(ext_stats['LW_monthmin_permonth'].values, lw_monthmin_permonth_expected)

hw_monthmax_mean_peryear_expected = np.array([1.69735572, 1.67290495, 1.65165594, 1.6362904 ])
lw_monthmin_mean_peryear_expected = np.array([-0.98599889, -0.97359719, -0.96237644, -0.95419002])
hw_monthmin_mean_peryear_expected = np.array([1.29661114, 1.27231033, 1.2516437 , 1.23561901])
lw_monthmax_mean_peryear_expected = np.array([-0.67207461, -0.6593444 , -0.64799771, -0.6390193 ])
assert np.allclose(ext_stats['HW_monthmax_mean_peryear'].values, hw_monthmax_mean_peryear_expected)
assert np.allclose(ext_stats['LW_monthmin_mean_peryear'].values, lw_monthmin_mean_peryear_expected)
assert np.allclose(ext_stats['HW_monthmin_mean_peryear'].values, hw_monthmin_mean_peryear_expected)
assert np.allclose(ext_stats['LW_monthmax_mean_peryear'].values, lw_monthmax_mean_peryear_expected)
Loading