Skip to content

Commit

Permalink
add bounds argument and split log(a*exp(b)) (#149)
Browse files Browse the repository at this point in the history
* add bounds argument and split log(a*exp(b))

* updated tests

* reverted formatting

* reverted formatting

---------

Co-authored-by: veenstrajelmer <[email protected]>
  • Loading branch information
epsig and veenstrajelmer authored Oct 10, 2024
1 parent 0e1ae9f commit fcab9c0
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 63 deletions.
9 changes: 6 additions & 3 deletions kenmerkendewaarden/overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib import ticker
from scipy import optimize, signal
Expand Down Expand Up @@ -318,17 +319,18 @@ def pfunc_inverse(p_X_gt_x, p_val_gt_threshold, threshold, sigma, alpha):
) ** (1 / alpha)

def der_pfunc(x, p_val_gt_threshold, threshold, alpha, sigma):
return (
longexpra = (
-p_val_gt_threshold
* (alpha * x ** (alpha - 1))
* (sigma ** (-alpha))
* np.exp(-((x / sigma) ** alpha) + ((threshold / sigma) ** alpha))
)
longexprb = -((x / sigma) ** alpha) + ((threshold / sigma) ** alpha)
return np.log(-longexpra) + longexprb

def cost_func(params, *args):
return -np.sum(
[
np.log(-der_pfunc(x, args[0], args[1], params[0], params[1]))
der_pfunc(x, args[0], args[1], params[0], params[1])
for x in args[2]
]
)
Expand All @@ -339,6 +341,7 @@ def cost_func(params, *args):
x0=initial_guess,
args=(p_val_gt_threshold, threshold, values[values > threshold]),
method="Nelder-Mead",
bounds = [[-math.inf, math.inf], [1e-10, math.inf]],
options={"maxiter": 1e4},
)
if result.success:
Expand Down
8 changes: 4 additions & 4 deletions tests/test_data_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def test_drop_duplicate_times(df_meas_2010, caplog):

assert len(meas_duplicated) == 105120
assert len(meas_clean) == 52560

# assert logging messages
assert '52530 rows with duplicated time-value-combinations dropped' in caplog.text
assert '30 rows with duplicated times dropped' in caplog.text
assert "52530 rows with duplicated time-value-combinations dropped" in caplog.text
assert "30 rows with duplicated times dropped" in caplog.text


@pytest.mark.unittest
Expand All @@ -45,7 +45,7 @@ def test_drop_duplicate_times_noaction(df_meas_2010, caplog):

assert len(df_meas_2010) == 52560
assert len(meas_clean) == 52560

# assert that there is no logging messages
assert caplog.text == ""

Expand Down
79 changes: 44 additions & 35 deletions tests/test_overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ def test_calc_overschrijding(df_ext_12_2010_2014):
expected_values = np.array(
[
1.93,
2.09327434,
2.26311592,
2.44480348,
2.70434509,
2.91627091,
3.14247786,
3.46480369,
3.72735283,
4.00701551,
2.09356726,
2.2637632,
2.44533302,
2.70383299,
2.91416492,
3.13795447,
3.45560027,
3.71330277,
3.98682045,
]
)
assert np.allclose(dist["geinterpoleerd"].values, expected_values)
Expand All @@ -96,11 +96,24 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
1 / 100,
1 / 200,
]
hydra_values = np.array([2.473, 3.18 , 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865,
6.328, 7.207])
hydra_index = np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02,
3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04,
3.33333333e-05, 1.00000000e-05, 1.00000000e-06])
hydra_values = np.array(
[2.473, 3.18, 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865, 6.328, 7.207]
)
hydra_index = np.array(
[
1.00000000e00,
1.00000000e-01,
2.00000000e-02,
1.00000000e-02,
3.33333333e-03,
1.00000000e-03,
3.33333333e-04,
1.00000000e-04,
3.33333333e-05,
1.00000000e-05,
1.00000000e-06,
]
)
ser_hydra = pd.Series(hydra_values, index=hydra_index)
ser_hydra.attrs = df_ext_12_2010_2014.attrs
dist_hydra = {"Hydra-NL": ser_hydra}
Expand All @@ -121,12 +134,12 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
expected_values = np.array(
[
1.93,
2.09327434,
2.26311587,
2.46299612,
2.79965222,
3.08436295,
3.4987347,
2.09356726,
2.26376316,
2.46348569,
2.79932582,
3.08359924,
3.49814949,
4.043,
4.164,
4.3095,
Expand Down Expand Up @@ -227,20 +240,18 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014):
expected_values_normal = np.array(
[
1.93,
2.09327434,
2.26311592,
2.44480348,
2.70434509,
2.91627091,
3.14247786,
3.46480369,
3.72735283,
4.00701551,
2.09356726,
2.2637632,
2.44533302,
2.70383299,
2.91416492,
3.13795447,
3.45560027,
3.71330277,
3.98682045,
]
)
assert np.allclose(
dist_normal["geinterpoleerd"].values, expected_values_normal
)
assert np.allclose(dist_normal["geinterpoleerd"].values, expected_values_normal)
expected_values_clip = np.array(
[
1.93,
Expand All @@ -255,9 +266,7 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014):
3.90683996,
]
)
assert np.allclose(
dist_clip["geinterpoleerd"].values, expected_values_clip
)
assert np.allclose(dist_clip["geinterpoleerd"].values, expected_values_clip)


@pytest.mark.unittest
Expand Down
10 changes: 7 additions & 3 deletions tests/test_slotgemiddelden.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
assert set(slotgemiddelden_dict_noext.keys()) == set(expected_keys_noext)

# assertion of values
# fmt: off
wl_mean_peryear_expected = np.array([0.07960731, 0.08612119, 0.0853051,
0.07010864, 0.10051922])
hw_mean_peryear_expected = np.array([1.13968839, 1.12875177, 1.13988685,
Expand All @@ -77,6 +78,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
-0.61334278, -0.58024113])
range_mean_peryear_expected = np.array([1.74530541, 1.71964539, 1.73330976,
1.75488888, 1.77022697])
# fmt: on
assert np.allclose(
slotgemiddelden_dict_inclext["wl_mean_peryear"].values, wl_mean_peryear_expected
)
Expand All @@ -88,9 +90,10 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
)
assert np.allclose(
slotgemiddelden_dict_inclext["tidalrange_mean_peryear"].values,
range_mean_peryear_expected
range_mean_peryear_expected,
)

# fmt: off
wl_model_fit_expected = np.array([0.0141927, 0.08612119, 0.0853051,
0.07010864, 0.10051922, 0.23137634])
hw_model_fit_expected = np.array([1.05295416, 1.12875177, 1.13988685,
Expand All @@ -99,6 +102,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
-0.61334278, -0.58024113, -0.42969074])
range_model_fit_expected = np.array([1.72715816, 1.71964539, 1.73330976,
1.75488888, 1.77022697, 1.76587273])
# fmt: on
assert np.allclose(
slotgemiddelden_dict_inclext["wl_model_fit"].values, wl_model_fit_expected
)
Expand All @@ -109,8 +113,8 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
slotgemiddelden_dict_inclext["LW_model_fit"].values, lw_model_fit_expected
)
assert np.allclose(
slotgemiddelden_dict_inclext["tidalrange_model_fit"].values,
range_model_fit_expected
slotgemiddelden_dict_inclext["tidalrange_model_fit"].values,
range_model_fit_expected,
)


Expand Down
45 changes: 31 additions & 14 deletions tests/test_tidalindicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,39 @@ def test_calc_HWLWtidalrange(df_ext_12_2010):

@pytest.mark.unittest
def test_calc_HWLWtidalindicators(df_ext_12_2010_2014):
ext_stats_notimezone = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014.tz_localize(None))
ext_stats_notimezone = kw.calc_HWLWtidalindicators(
df_ext_12_2010_2014.tz_localize(None)
)
ext_stats = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014)
ext_stats_min100 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=1)
ext_stats_min099 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.99)
ext_stats_min098 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.98)

expected_keys = ['HW_mean', 'LW_mean',
'HW_mean_peryear', 'LW_mean_peryear',
'HW_monthmax_permonth', 'LW_monthmin_permonth',
'HW_monthmax_mean_peryear', 'LW_monthmax_mean_peryear',
'HW_monthmin_mean_peryear', 'LW_monthmin_mean_peryear']
ext_stats_min099 = kw.calc_HWLWtidalindicators(
df_ext_12_2010_2014, min_coverage=0.99
)
ext_stats_min098 = kw.calc_HWLWtidalindicators(
df_ext_12_2010_2014, min_coverage=0.98
)

expected_keys = [
"HW_mean",
"LW_mean",
"HW_mean_peryear",
"LW_mean_peryear",
"HW_monthmax_permonth",
"LW_monthmin_permonth",
"HW_monthmax_mean_peryear",
"LW_monthmax_mean_peryear",
"HW_monthmin_mean_peryear",
"LW_monthmin_mean_peryear",
]
for key in expected_keys:
assert key in ext_stats.keys()
assert (ext_stats[key] == ext_stats_notimezone[key]).all()

assert ext_stats_notimezone['HW_monthmax_permonth'].isnull().sum() == 0
assert ext_stats['HW_monthmax_permonth'].isnull().sum() == 0
assert ext_stats_min100['HW_monthmax_permonth'].isnull().sum() == 1
assert ext_stats_min099['HW_monthmax_permonth'].isnull().sum() == 1
assert ext_stats_min098['HW_monthmax_permonth'].isnull().sum() == 0
assert ext_stats_notimezone["HW_monthmax_permonth"].isnull().sum() == 0
assert ext_stats["HW_monthmax_permonth"].isnull().sum() == 0
assert ext_stats_min100["HW_monthmax_permonth"].isnull().sum() == 1
assert ext_stats_min099["HW_monthmax_permonth"].isnull().sum() == 1
assert ext_stats_min098["HW_monthmax_permonth"].isnull().sum() == 0


@pytest.mark.unittest
Expand All @@ -77,6 +90,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014):
wl_mean_peryear_expected = np.array(
[0.07960731, 0.08612119, 0.0853051, 0.07010864, 0.10051922]
)
# fmt: off
wl_mean_permonth_expected = np.array([
-0.00227151, 0.089313 , 0.04443996, -0.03440509, -0.00206317,
0.04431481, 0.03877688, 0.18267697, 0.13494907, 0.18367832,
Expand All @@ -90,6 +104,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014):
0.17321909, 0.23108102, 0.19502688, 0.06281138, 0.08588046,
-0.00553763, 0.03490278, 0.03113575, 0.03134954, 0.10553763,
0.16540771, 0.12535648, 0.20802195, 0.10014352, 0.25624552])
# fmt: on
assert np.allclose(wl_stats["wl_mean_peryear"].values, wl_mean_peryear_expected)
assert np.allclose(wl_stats["wl_mean_permonth"].values, wl_mean_permonth_expected)

Expand Down Expand Up @@ -221,6 +236,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014):
assert np.allclose(ext_stats["HW_mean_peryear"].values, hw_mean_peryear_expected)
assert np.allclose(ext_stats["LW_mean_peryear"].values, lw_mean_peryear_expected)

# fmt: off
hw_monthmax_permonth_expected = np.array([
1.94, 1.89, 1.86, 1.55, 1.74, 1.58, 1.54, 2.07, 2.11, 2.06, 1.9 ,
1.75, 1.69, 1.82, 1.49, 1.39, 1.4 , 1.71, 1.72, 1.66, 1.69, 1.59,
Expand All @@ -236,6 +252,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014):
-1.11, -1.65, -1.37, -1.11, -1.11, -1.05, -0.98, -1.07, -0.88,
-1.05, -1.15, -1.07, -1.32, -1.31, -1.21, -1.08, -1. , -1.03,
-1.07, -0.83, -0.98, -0.97, -0.99, -1.3 ])
# fmt: on
assert np.allclose(
ext_stats["HW_monthmax_permonth"].values, hw_monthmax_permonth_expected
)
Expand Down
13 changes: 9 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
def test_raise_extremes_with_aggers_raise_12345df(df_ext):
with pytest.raises(ValueError) as e:
raise_extremes_with_aggers(df_ext)
assert ("df_ext should only contain extremes (HWLWcode 1/2), "
"but it also contains aggers (HWLWcode 3/4/5") in str(e.value)
assert (
"df_ext should only contain extremes (HWLWcode 1/2), "
"but it also contains aggers (HWLWcode 3/4/5"
) in str(e.value)


@pytest.mark.unittest
Expand All @@ -24,7 +26,10 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010):
@pytest.mark.unittest
def test_raise_extremes_with_aggers_emptydf():
import pandas as pd
time_index = pd.DatetimeIndex([], dtype='datetime64[ns, Etc/GMT-1]', name='time', freq=None)
df_ext = pd.DataFrame({"HWLWcode":[]},index=time_index)

time_index = pd.DatetimeIndex(
[], dtype="datetime64[ns, Etc/GMT-1]", name="time", freq=None
)
df_ext = pd.DataFrame({"HWLWcode": []}, index=time_index)
df_ext.attrs["station"] = "dummy"
raise_extremes_with_aggers(df_ext=df_ext)

0 comments on commit fcab9c0

Please sign in to comment.