Skip to content

Commit

Permalink
neater handling of time in calc_havengetallen (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
veenstrajelmer authored Oct 23, 2024
1 parent e3d0e2a commit 2ed77ea
Showing 1 changed file with 29 additions and 45 deletions.
74 changes: 29 additions & 45 deletions kenmerkendewaarden/havengetallen.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,6 @@ def calc_havengetallen(

current_station = df_ext.attrs["station"]
logger.info(f"computing havengetallen for {current_station}")
# TODO: we added tz_localize on 29-5-2024 (https://github.com/Deltares-research/kenmerkendewaarden/issues/30)
# this means we pass a UTC+1 timeseries as if it were a UTC timeseries
# TODO: consider supporting timezones in hatyan.astrog.dT
if df_ext.index.tz is not None:
df_ext = df_ext.tz_localize(None)
df_ext = calc_HWLW_moonculm_combi(
data_pd_HWLW_12=df_ext, moonculm_offset=moonculm_offset
)
Expand All @@ -117,7 +112,7 @@ def get_moonculm_idxHWLWno(tstart, tstop):
# should be in UTC since that relates to the relation dateline/sun
data_pd_moonculm = astrog_culminations(tFirst=tstart, tLast=tstop)
data_pd_moonculm = data_pd_moonculm.tz_convert("UTC") # convert to UTC (is already)
data_pd_moonculm = data_pd_moonculm.tz_localize(None) # remove timezone
# data_pd_moonculm = data_pd_moonculm.tz_localize(None) # remove timezone
data_pd_moonculm["datetime"] = data_pd_moonculm.index
# dummy values for TA in hatyan.calc_HWLWnumbering()
data_pd_moonculm["values"] = data_pd_moonculm["type"]
Expand Down Expand Up @@ -150,45 +145,44 @@ def calc_HWLW_moonculm_combi(data_pd_HWLW_12: pd.DataFrame, moonculm_offset: int
Copy of the input dataframe enriched with several columns related to the moonculminations.
"""

moonculm_idxHWLWno = get_moonculm_idxHWLWno(
tstart=data_pd_HWLW_12.index.min() - dt.timedelta(days=3),
tstop=data_pd_HWLW_12.index.max(),
)
# correlate HWLW to moonculmination 2 days before.
moonculm_idxHWLWno.index = moonculm_idxHWLWno.index + moonculm_offset

data_pd_HWLW_idxHWLWno = calc_HWLWnumbering(data_pd_HWLW_12)
data_pd_HWLW_idxHWLWno["times"] = data_pd_HWLW_idxHWLWno.index
data_pd_HWLW_idxHWLWno = data_pd_HWLW_idxHWLWno.set_index("HWLWno", drop=False)

HW_bool = data_pd_HWLW_idxHWLWno["HWLWcode"] == 1
data_pd_HWLW_idxHWLWno.loc[HW_bool, "getijperiod"] = (
data_pd_HWLW_idxHWLWno.loc[HW_bool, "times"].iloc[1:].values
- data_pd_HWLW_idxHWLWno.loc[HW_bool, "times"].iloc[:-1]
) # this works properly since index is HWLW
# computing getijperiod like this works properly since index is HWLW
getijperiod = data_pd_HWLW_idxHWLWno.loc[HW_bool, "times"].diff()
data_pd_HWLW_idxHWLWno.loc[HW_bool, "getijperiod"] = getijperiod
# compute duurdaling
data_pd_HWLW_idxHWLWno.loc[HW_bool, "duurdaling"] = (
data_pd_HWLW_idxHWLWno.loc[~HW_bool, "times"]
- data_pd_HWLW_idxHWLWno.loc[HW_bool, "times"]
)
data_pd_HWLW_idxHWLWno["culm_time"] = moonculm_idxHWLWno[
"datetime"
] # couple HWLW to moonculminations two days earlier (this works since index is HWLWno)
data_pd_HWLW_idxHWLWno["culm_hr"] = (
data_pd_HWLW_idxHWLWno["culm_time"].dt.round("h").dt.hour
) % 12
data_pd_HWLW_idxHWLWno["HWLW_delay"] = (
data_pd_HWLW_idxHWLWno["times"] - data_pd_HWLW_idxHWLWno["culm_time"]
)
# couple HWLW to moonculminations two days earlier (this works since index is HWLWno)
tz_hwlw = data_pd_HWLW_12.index.tz
culm_time_utc = moonculm_idxHWLWno["datetime"]
culm_hr = culm_time_utc.dt.round("h").dt.hour % 12
data_pd_HWLW_idxHWLWno["culm_time"] = culm_time_utc.dt.tz_convert(tz_hwlw)
data_pd_HWLW_idxHWLWno["culm_hr"] = culm_hr
# compute time of hwlw after moonculmination
hwlw_delay = data_pd_HWLW_idxHWLWno["times"] - data_pd_HWLW_idxHWLWno["culm_time"]
data_pd_HWLW_idxHWLWno["HWLW_delay"] = hwlw_delay

# culm_addtime is a 2d and 2u20min correction, this shifts the x-axis of aardappelgrafiek
# HW is 2 days after culmination (so 4x25min difference between length of avg moonculm and length of 2 days)
# 1 hour (GMT to MET, alternatively we can also account for timezone differences elsewhere)
# TODO: alternatively we can use `df_ext.tz_convert()` instead of `df_ext.tz_localize()`
# not anymore: timezone correction from UTC to MET, we now handle it properly with timezones in dataframes
# 20 minutes (0 to 5 meridian)
# TODO: 20 minutes seems odd since moonculm is about tidal wave from ocean
culm_addtime = (
moonculm_offset * dt.timedelta(hours=12, minutes=25)
+ dt.timedelta(hours=1)
- dt.timedelta(minutes=20)
)
# TODO: culm_addtime=None provides the same gemgetijkromme now delay is not used for scaling anymore
Expand All @@ -203,33 +197,23 @@ def calc_HWLW_culmhr_summary(data_pd_HWLW):
data_pd_HW = data_pd_HWLW.loc[data_pd_HWLW["HWLWcode"] == 1]
data_pd_LW = data_pd_HWLW.loc[data_pd_HWLW["HWLWcode"] == 2]

hw_per_culmhr = data_pd_HW.groupby(data_pd_HW["culm_hr"])
lw_per_culmhr = data_pd_LW.groupby(data_pd_LW["culm_hr"])

HWLW_culmhr_summary = pd.DataFrame()
HWLW_culmhr_summary["HW_values_median"] = data_pd_HW.groupby(data_pd_HW["culm_hr"])[
"values"
].median()
HWLW_culmhr_summary["HW_delay_median"] = data_pd_HW.groupby(data_pd_HW["culm_hr"])[
"HWLW_delay"
].median()
HWLW_culmhr_summary["LW_values_median"] = data_pd_LW.groupby(data_pd_LW["culm_hr"])[
"values"
].median()
HWLW_culmhr_summary["LW_delay_median"] = data_pd_LW.groupby(data_pd_LW["culm_hr"])[
"HWLW_delay"
].median()
HWLW_culmhr_summary["HW_values_median"] = hw_per_culmhr["values"].median()
HWLW_culmhr_summary["HW_delay_median"] = hw_per_culmhr["HWLW_delay"].median()
HWLW_culmhr_summary["LW_values_median"] = lw_per_culmhr["values"].median()
HWLW_culmhr_summary["LW_delay_median"] = lw_per_culmhr["HWLW_delay"].median()
HWLW_culmhr_summary["tijverschil"] = (
HWLW_culmhr_summary["HW_values_median"]
- HWLW_culmhr_summary["LW_values_median"]
)
HWLW_culmhr_summary["getijperiod_median"] = data_pd_HW.groupby(
data_pd_HW["culm_hr"]
)["getijperiod"].median()
HWLW_culmhr_summary["duurdaling_median"] = data_pd_HW.groupby(
data_pd_HW["culm_hr"]
)["duurdaling"].median()

HWLW_culmhr_summary.loc["mean", :] = (
HWLW_culmhr_summary.mean()
) # add mean row to dataframe (not convenient to add immediately due to plotting with index 0-11)
HWLW_culmhr_summary["getijperiod_median"] = hw_per_culmhr["getijperiod"].median()
HWLW_culmhr_summary["duurdaling_median"] = hw_per_culmhr["duurdaling"].median()

# add mean row to dataframe (not convenient to add immediately due to plotting with index 0-11)
HWLW_culmhr_summary.loc["mean", :] = HWLW_culmhr_summary.mean()

# round all timedeltas to seconds to make outputformat nicer
for colname in HWLW_culmhr_summary.columns:
Expand Down

0 comments on commit 2ed77ea

Please sign in to comment.