diff --git a/examples/KWK_process.py b/examples/KWK_process.py
index 2888037..f2a1ce2 100644
--- a/examples/KWK_process.py
+++ b/examples/KWK_process.py
@@ -16,15 +16,9 @@
 # TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT']
 # overview in https://github.com/Deltares-research/kenmerkendewaarden/issues/101 and the linked wm-ws-dl issue
 
-tstart_dt = pd.Timestamp(2011,1,1, tz="UTC+01:00")
-tstop_dt = pd.Timestamp(2021,1,1, tz="UTC+01:00")
-if ((tstop_dt.year-tstart_dt.year)==10) & (tstop_dt.month==tstop_dt.day==tstart_dt.month==tstart_dt.day==1):
-    year_slotgem = tstop_dt.year
-else:
-    year_slotgem = 'invalid'
+year_slotgem = 2021
 print(f'year_slotgem: {year_slotgem}')
 
-# dir_base = r'p:\11208031-010-kenmerkende-waarden-k\work'
 dir_base = r'p:\11210325-005-kenmerkende-waarden\work'
 dir_meas = os.path.join(dir_base,'measurements_wl_18700101_20240101')
 
@@ -84,32 +78,31 @@
     # timeseries are used for slotgemiddelden, gemgetijkrommen (needs slotgem+havget)
     df_meas_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False, 
                                        nap_correction=nap_correction, drop_duplicates=drop_duplicates)
-    if df_meas_all is not None:
-        #crop measurement data
-        df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))
-    
     # extremes are used for slotgemiddelden, havengetallen, overschrijding
-    df_ext_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
+    df_ext_12345_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
                                       nap_correction=nap_correction, drop_duplicates=drop_duplicates)
-    if df_ext_all is not None:
-        # TODO: make calc_HWLW12345to12() faster: https://github.com/Deltares/hatyan/issues/311
-        df_ext_all_12 = hatyan.calc_HWLW12345to12(df_ext_all) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
-        #crop timeseries to 10y
-        df_ext_10y_12 = hatyan.crop_timeseries(df_ext_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False)
+    if df_meas_all is None or df_ext_12345_all is None:
+        raise ValueError(f"missing data for {current_station}")
+    
+    # convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
+    # TODO: make calc_HWLW12345to12() faster: https://github.com/Deltares/hatyan/issues/311
+    df_ext_all = hatyan.calc_HWLW12345to12(df_ext_12345_all)
     
+    # crop measurement data to (excluding) year_slotgem
+    df_meas_todate = df_meas_all.loc[:str(year_slotgem-1)]
+    df_ext_todate = df_ext_all.loc[:str(year_slotgem-1)]
     
     
     
     #### TIDAL INDICATORS
-    if compute_indicators and df_meas_all is not None and df_ext_all is not None:
+    if compute_indicators:
         print(f'tidal indicators for {current_station}')
         # compute and plot tidal indicators
-        dict_wltidalindicators = kw.calc_wltidalindicators(df_meas=df_meas_all, min_coverage=min_coverage)
-        dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(df_ext=df_ext_all_12, min_coverage=min_coverage)
+        dict_wltidalindicators = kw.calc_wltidalindicators(df_meas=df_meas_todate, min_coverage=min_coverage)
+        dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(df_ext=df_ext_todate, min_coverage=min_coverage)
         
         # add hat/lat
-        df_meas_19y = df_meas_all.loc["2001":"2019"]
-        hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_19y)
+        hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_todate)
         dict_HWLWtidalindicators["hat"] = hat
         dict_HWLWtidalindicators["lat"] = lat
         
@@ -131,17 +124,17 @@
     #### SLOTGEMIDDELDEN
     # TODO: nodal cycle is not in same phase for all stations, this is not physically correct.
     # TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND)
-    if compute_slotgem and df_meas_all is not None and df_ext_all is not None:
+    if compute_slotgem:
         print(f'slotgemiddelden for {current_station}')
                 
         # compute slotgemiddelden, exclude all values after tstop_dt (is year_slotgem)
         # including years with too little values and years before physical break
-        slotgemiddelden_all = kw.calc_slotgemiddelden(df_meas=df_meas_all.loc[:tstop_dt], 
-                                                      df_ext=df_ext_all_12.loc[:tstop_dt], 
+        slotgemiddelden_all = kw.calc_slotgemiddelden(df_meas=df_meas_todate, 
+                                                      df_ext=df_ext_todate, 
                                                       min_coverage=0, clip_physical_break=True)
         # only years with enough values and after potential physical break
-        slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=df_meas_all.loc[:tstop_dt], 
-                                                        df_ext=df_ext_all_12.loc[:tstop_dt], 
+        slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=df_meas_todate, 
+                                                        df_ext=df_ext_todate, 
                                                         min_coverage=min_coverage, clip_physical_break=True)
         
         # plot slotgemiddelden
@@ -180,9 +173,9 @@
     
     
     ### HAVENGETALLEN 
-    if compute_havengetallen and df_ext_all is not None:
+    if compute_havengetallen:
         print(f'havengetallen for {current_station}')
-        df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_10y_12, return_df_ext=True)
+        df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_todate, return_df_ext=True)
         
         # plot hwlw per timeclass including median
         fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen)
@@ -200,18 +193,18 @@
     
     
     ##### GEMIDDELDE GETIJKROMMEN
-    if compute_gemgetij and df_meas_all is not None and df_ext_all is not None:
+    if compute_gemgetij:
         print(f'gemiddelde getijkrommen for {current_station}')
         pred_freq = "10s" # frequency influences the accuracy of havengetallen-scaling and is writing frequency of BOI timeseries
         
         # derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period
-        gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=None,
+        gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=None,
                                               freq=pred_freq, nb=0, nf=0, 
                                               scale_extremes=False, scale_period=False)
-        gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=df_ext_10y_12,
+        gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=df_ext_todate,
                                                freq=pred_freq, nb=1, nf=1, 
                                                scale_extremes=True, scale_period=False)
-        gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=df_ext_10y_12,
+        gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=df_ext_todate,
                                                    freq=pred_freq, nb=0, nf=4, 
                                                    scale_extremes=True, scale_period=True)
 
@@ -280,15 +273,12 @@ def add_validation_dist(dist_dict, dist_type, station):
     freqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200,
                          1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000]
     
-    if compute_overschrijding and df_ext_all is not None:
+    if compute_overschrijding:
         print(f'overschrijdingsfrequenties for {current_station}')
         
-        # only include data up to year_slotgem
-        df_measext = df_ext_all_12.loc[:tstop_dt]
-        
         # 1. Exceedance
         dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station)
-        dist_exc = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None, 
+        dist_exc = kw.calc_overschrijding(df_ext=df_ext_todate, rule_type=None, rule_value=None, 
                                           clip_physical_break=True, dist=dist_exc_hydra,
                                           interp_freqs=freqs_interested)
         add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
@@ -299,7 +289,7 @@ def add_validation_dist(dist_dict, dist_type, station):
         fig.savefig(os.path.join(dir_overschrijding, f'kw{year_slotgem}-exceedance-{current_station}.png'))
         
         # 2. Deceedance
-        dist_dec = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None, 
+        dist_dec = kw.calc_overschrijding(df_ext=df_ext_todate, rule_type=None, rule_value=None, 
                                           clip_physical_break=True, inverse=True,
                                           interp_freqs=freqs_interested)
         add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
diff --git a/kenmerkendewaarden/gemiddeldgetij.py b/kenmerkendewaarden/gemiddeldgetij.py
index ff19364..df95da4 100644
--- a/kenmerkendewaarden/gemiddeldgetij.py
+++ b/kenmerkendewaarden/gemiddeldgetij.py
@@ -11,7 +11,9 @@
 from kenmerkendewaarden.tidalindicators import (calc_HWLWtidalrange,
                                                 calc_getijcomponenten)
 from kenmerkendewaarden.havengetallen import calc_havengetallen
-from kenmerkendewaarden.utils import TimeSeries_TimedeltaFormatter_improved
+from kenmerkendewaarden.utils import (crop_timeseries_last_nyears,
+                                      TimeSeries_TimedeltaFormatter_improved,
+                                      )
 from matplotlib.ticker import MaxNLocator, MultipleLocator
 
 
@@ -45,9 +47,11 @@ def calc_gemiddeldgetij(
     Parameters
     ----------
     df_meas : pd.DataFrame
-        Timeseries of 10 years of waterlevel measurements.
+        Timeseries of waterlevel measurements. The last 10 years of this 
+        timeseries are used to compute the getijkrommes.
     df_ext : pd.DataFrame, optional
-        Timeseries of 10 years of waterlevel extremes (1/2 only). The default is None.
+        Timeseries of waterlevel extremes (1/2 only). The last 10 years of this 
+        timeseries are used to compute the getijkrommes. The default is None.
     min_coverage : float, optional
         The minimal required coverage of the df_ext timeseries. Passed on to `calc_havengetallen()`. The default is None.
     freq : str, optional
@@ -69,10 +73,10 @@ def calc_gemiddeldgetij(
         dictionary with Dataframes with gemiddeld getij for mean, spring and neap tide.
 
     """
-    data_pd_meas_10y = df_meas
+    df_meas_10y = crop_timeseries_last_nyears(df=df_meas, nyears=10)
     tstop_dt = df_meas.index.max()
 
-    current_station = data_pd_meas_10y.attrs["station"]
+    current_station = df_meas_10y.attrs["station"]
 
     # TODO: deprecate debug argument+plot (maybe use max HW instead of max tidalrange?)
     # TODO: add correctie havengetallen HW/LW av/sp/np met slotgemiddelde uit PLSS/modelfit (HW/LW av)
@@ -90,7 +94,8 @@ def calc_gemiddeldgetij(
         station_attrs = [df.attrs["station"] for df in [df_meas, df_ext]]
         assert all(x == station_attrs[0] for x in station_attrs)
 
-        df_havengetallen = calc_havengetallen(df_ext=df_ext, min_coverage=min_coverage)
+        df_ext_10y = crop_timeseries_last_nyears(df_ext, nyears=10)
+        df_havengetallen = calc_havengetallen(df_ext=df_ext_10y, min_coverage=min_coverage)
         HW_sp, LW_sp = df_havengetallen.loc[
             0, ["HW_values_median", "LW_values_median"]
         ]  # spring
@@ -106,13 +111,13 @@ def calc_gemiddeldgetij(
         HW_np = LW_np = None
 
     # derive components via TA on measured waterlevels
-    comp_frommeasurements_avg, comp_av = get_gemgetij_components(data_pd_meas_10y)
+    comp_frommeasurements_avg, comp_av = get_gemgetij_components(df_meas_10y)
 
     times_pred_1mnth = pd.date_range(
         start=pd.Timestamp(tstop_dt.year, 1, 1, 0, 0) - pd.Timedelta(hours=12),
         end=pd.Timestamp(tstop_dt.year, 2, 1, 0, 0),
         freq=freq,
-        tz=data_pd_meas_10y.index.tz,
+        tz=df_meas_10y.index.tz,
     )  # start 12 hours in advance, to assure also corrected values on desired tstart
     comp_av.attrs["nodalfactors"] = (
         False  # nodalfactors=False to guarantee repetative signal
diff --git a/kenmerkendewaarden/havengetallen.py b/kenmerkendewaarden/havengetallen.py
index c5927fa..f9b198d 100644
--- a/kenmerkendewaarden/havengetallen.py
+++ b/kenmerkendewaarden/havengetallen.py
@@ -17,6 +17,7 @@
 )
 from kenmerkendewaarden.utils import (
     raise_extremes_with_aggers,
+    crop_timeseries_last_nyears,
     TimeSeries_TimedeltaFormatter_improved,
 )
 from matplotlib.ticker import MultipleLocator
@@ -46,7 +47,8 @@ def calc_havengetallen(
     Parameters
     ----------
     df_ext : pd.DataFrame
-        DataFrame with extremes (highs and lows, no aggers).
+        DataFrame with extremes (highs and lows, no aggers). The last 10 years of this 
+        timeseries are used to compute the havengetallen.
     return_df : bool
         Whether to return the enriched input dataframe. Default is False.
     min_coverage : float, optional
@@ -64,17 +66,18 @@ def calc_havengetallen(
     df_havengetallen : pd.DataFrame
         DataFrame with havengetallen for all hour-classes.
         0 corresponds to spring, 6 corresponds to neap, mean is mean.
-    df_ext : pd.DataFrame
+    df_ext_culm : pd.DataFrame
         An enriched copy of the input DataFrame including a 'culm_hr' column.
 
     """
     raise_extremes_with_aggers(df_ext)
+    df_ext_10y = crop_timeseries_last_nyears(df=df_ext, nyears=10)
 
     # check if coverage is high enough for havengetallen
     if min_coverage is not None:
         # TODO: compute_actual_counts only returns years for which there are no nans, so will have different length than expected counts if there is an all-nan year
         # TODO: if we supply 4 years of complete data instead of 10 years, no error is raised
-        data_pd_hw = df_ext.loc[df_ext["HWLWcode"] == 1]["values"]
+        data_pd_hw = df_ext_10y.loc[df_ext_10y["HWLWcode"] == 1]["values"]
         df_actual_counts_peryear = compute_actual_counts(data_pd_hw, freq="Y")
         df_expected_counts_peryear = compute_expected_counts(data_pd_hw, freq="Y")
         # floor expected counts to avoid rounding issues
@@ -94,13 +97,13 @@ def calc_havengetallen(
                 f"min_coverage={min_coverage}:\n{df_debug}"
             )
 
-    current_station = df_ext.attrs["station"]
+    current_station = df_ext_10y.attrs["station"]
     logger.info(f"computing havengetallen for {current_station}")
-    df_ext = calc_hwlw_moonculm_combi(df_ext=df_ext, moonculm_offset=moonculm_offset)
-    df_havengetallen = calc_HWLW_culmhr_summary(df_ext)  # TODO: maybe add tijverschil
+    df_ext_culm = calc_hwlw_moonculm_combi(df_ext=df_ext_10y, moonculm_offset=moonculm_offset)
+    df_havengetallen = calc_HWLW_culmhr_summary(df_ext_culm)  # TODO: maybe add tijverschil
     logger.info("computing havengetallen done")
     if return_df_ext:
-        return df_havengetallen, df_ext
+        return df_havengetallen, df_ext_culm
     else:
         return df_havengetallen
 
diff --git a/kenmerkendewaarden/tidalindicators.py b/kenmerkendewaarden/tidalindicators.py
index 04e77e4..e6cd204 100644
--- a/kenmerkendewaarden/tidalindicators.py
+++ b/kenmerkendewaarden/tidalindicators.py
@@ -9,7 +9,8 @@
 import matplotlib.pyplot as plt
 import hatyan
 import logging
-from kenmerkendewaarden.utils import raise_extremes_with_aggers
+from kenmerkendewaarden.utils import (raise_extremes_with_aggers,
+                                      crop_timeseries_last_nyears)
 
 
 __all__ = [
@@ -380,7 +381,7 @@ def calc_hat_lat_fromcomponents(comp: pd.DataFrame) -> tuple:
     return hat, lat
 
 
-def calc_hat_lat_frommeasurements(df_meas_19y: pd.DataFrame) -> tuple:
+def calc_hat_lat_frommeasurements(df_meas: pd.DataFrame) -> tuple:
     """
     Derive highest and lowest astronomical tide (HAT/LAT) from a measurement timeseries of 19 years.
     Tidal components are derived for each year of the measurement timeseries.
@@ -392,8 +393,9 @@ def calc_hat_lat_frommeasurements(df_meas_19y: pd.DataFrame) -> tuple:
 
     Parameters
     ----------
-    df_meas_19y : pd.DataFrame
-        Measurements timeseries of 19 years.
+    df_meas : pd.DataFrame
+        Measurements timeseries. The last 19 years of this 
+        timeseries are used to compute hat and lat.
 
     Returns
     -------
@@ -402,6 +404,7 @@ def calc_hat_lat_frommeasurements(df_meas_19y: pd.DataFrame) -> tuple:
 
     """
 
+    df_meas_19y = crop_timeseries_last_nyears(df=df_meas, nyears=19)
     num_years = len(df_meas_19y.index.year.unique())
     if num_years != 19:
         raise ValueError(
diff --git a/kenmerkendewaarden/utils.py b/kenmerkendewaarden/utils.py
index aee4db2..ffa2c49 100644
--- a/kenmerkendewaarden/utils.py
+++ b/kenmerkendewaarden/utils.py
@@ -2,6 +2,9 @@
 
 import numpy as np
 from matplotlib.ticker import Formatter
+import logging
+
+logger = logging.getLogger(__name__)
 
 
 def raise_extremes_with_aggers(df_ext):
@@ -16,6 +19,31 @@ def raise_extremes_with_aggers(df_ext):
         )
 
 
+def crop_timeseries_last_nyears(df, nyears):
+    # remove last timestep if equal to "yyyy-01-01 00:00:00"
+    if '-01-01 00:00:00' in str(df.index[-1]):
+        df = df.iloc[:-1]
+    
+    # last_year, for instance 2020
+    last_year = df.index[-1].year
+    # first_year, for instance 2011
+    first_year = last_year - (nyears-1)
+
+    df_10y = df.loc[str(first_year):str(last_year)]
+    
+    # TODO: consider enforcing nyears instead of warning if it is not the case
+    # just like in `kw.calc_hat_lat_frommeasurements()`, but requires updates to tests
+    actual_years = df_10y.index.year.drop_duplicates().to_numpy()
+    is_exp_first = actual_years[0] == first_year
+    is_exp_last = actual_years[-1] == last_year
+    is_exp_amount = len(actual_years) == nyears
+    if not (is_exp_first & is_exp_last & is_exp_amount):
+        logger.warning(f"requested {nyears} years but resulted in "
+                       f"{len(actual_years)}: {actual_years}")
+    
+    return df_10y
+
+
 # TODO: fixing display of negative timedeltas was requested in https://github.com/pandas-dev/pandas/issues/17232#issuecomment-2205579156
 class TimeSeries_TimedeltaFormatter_improved(Formatter):
     """
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 40b41de..be9a0f3 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -5,17 +5,21 @@
 @author: veenstra
 """
 import pytest
-from kenmerkendewaarden.utils import raise_extremes_with_aggers
+from kenmerkendewaarden.utils import (raise_extremes_with_aggers,
+                                      crop_timeseries_last_nyears)
+import pandas as pd
+import numpy as np
 
 
 @pytest.mark.unittest
 def test_raise_extremes_with_aggers_raise_12345df(df_ext):
     with pytest.raises(ValueError) as e:
         raise_extremes_with_aggers(df_ext)
-    assert (
+    expected_error = (
         "df_ext should only contain extremes (HWLWcode 1/2), "
         "but it also contains aggers (HWLWcode 3/4/5"
-    ) in str(e.value)
+    )
+    assert expected_error in str(e.value)
 
 
 @pytest.mark.unittest
@@ -23,6 +27,32 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010):
     raise_extremes_with_aggers(df_ext_12_2010)
 
 
+@pytest.mark.unittest
+def test_crop_timeseries_last_nyears(df_meas):
+    assert df_meas.index[0] == pd.Timestamp("1987-01-01 00:00:00+01:00 ")
+    assert df_meas.index[-1] == pd.Timestamp("2022-01-01 00:00:00+01:00")
+    assert len(df_meas) == 1840897
+    
+    df_meas_10y = crop_timeseries_last_nyears(df_meas, nyears=10)
+    # assert number of years
+    num_years = len(df_meas_10y.index.year.unique())
+    assert num_years == 10
+    # assert start/end timestamps and length
+    assert df_meas_10y.index[0] == pd.Timestamp("2012-01-01 00:00:00+01:00 ")
+    assert df_meas_10y.index[-1] == pd.Timestamp("2021-12-31 23:50:00+01:00")
+    assert len(df_meas_10y) == 526032
+    # assert on years
+    actual_years = df_meas_10y.index.year.drop_duplicates().to_numpy()
+    expected_years = np.arange(2012, 2021+1)
+    assert (actual_years == expected_years).all()
+
+
+@pytest.mark.unittest
+def test_crop_timeseries_last_nyears_warning_tooshort(df_meas_2010_2014, caplog):
+    crop_timeseries_last_nyears(df_meas_2010_2014, nyears=10)
+    assert "requested 10 years but resulted in 5" in caplog.text
+
+
 @pytest.mark.unittest
 def test_raise_extremes_with_aggers_emptydf():
     import pandas as pd