diff --git a/CHANGELOG.md b/CHANGELOG.md
index 01e9f5f39..0ecf93714 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,7 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - 
 - 
-- 
+- Fix behavior of SARIMAXModel if simple_differencing=True is set ([#837](https://github.com/tinkoff-ai/etna/pull/837))
 - 
 - 
 - 
diff --git a/etna/libs/pmdarima_utils/__init__.py b/etna/libs/pmdarima_utils/__init__.py
new file mode 100644
index 000000000..bff45169f
--- /dev/null
+++ b/etna/libs/pmdarima_utils/__init__.py
@@ -0,0 +1 @@
+from etna.libs.pmdarima_utils.arima import seasonal_prediction_with_confidence
diff --git a/etna/libs/pmdarima_utils/arima.py b/etna/libs/pmdarima_utils/arima.py
new file mode 100644
index 000000000..ae7bdb3be
--- /dev/null
+++ b/etna/libs/pmdarima_utils/arima.py
@@ -0,0 +1,153 @@
+"""
+MIT License
+
+Copyright (c) 2017 Taylor G Smith
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+# Note: Copied from pmdarima package (https://github.com/blue-yonder/tsfresh/blob/https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/arima/arima.py)
+
+import numpy as np
+import numpy.polynomial.polynomial as np_polynomial
+from sklearn.utils.validation import check_array
+from pmdarima.utils import diff
+from pmdarima.utils import diff_inv
+from pmdarima.utils import check_endog
+
+
+def ARMAtoMA(ar, ma, max_deg):
+    r"""
+    Convert ARMA coefficients to infinite MA coefficients.
+    Compute coefficients of MA model equivalent to given ARMA model.
+    MA coefficients are cut off at max_deg.
+    The same function as ARMAtoMA() in stats library of R
+    Parameters
+    ----------
+    ar : array-like, shape=(n_orders,)
+        The array of AR coefficients.
+    ma : array-like, shape=(n_orders,)
+        The array of MA coefficients.
+    max_deg : int
+        Coefficients are computed up to the order of max_deg.
+    Returns
+    -------
+    np.ndarray, shape=(max_deg,)
+        Equivalent MA coefficients.
+    Notes
+    -----
+    Here is the derivation. Suppose ARMA model is defined as
+    .. math::
+    x_t - ar_1*x_{t-1} - ar_2*x_{t-2} - ... - ar_p*x_{t-p}\\
+        = e_t + ma_1*e_{t-1} + ma_2*e_{t-2} + ... + ma_q*e_{t-q}
+    namely
+    .. math::
+    (1 - \sum_{i=1}^p[ar_i*B^i]) x_t = (1 + \sum_{i=1}^q[ma_i*B^i]) e_t
+    where :math:`B` is a backward operator.
+    Equivalent MA model is
+    .. math::
+        x_t = (1 - \sum_{i=1}^p[ar_i*B^i])^{-1}\\
+        * (1 + \sum_{i=1}^q[ma_i*B^i]) e_t\\
+        = (1 + \sum_{i=1}[ema_i*B^i]) e_t
+    where :math:``ema_i`` is a coefficient of equivalent MA model.
+    The :math:``ema_i`` satisfies
+    .. math::
+        (1 - \sum_{i=1}^p[ar_i*B^i]) * (1 + \sum_{i=1}[ema_i*B^i]) \\
+        = 1 + \sum_{i=1}^q[ma_i*B^i]
+    thus
+    .. math::
+        \sum_{i=1}[ema_i*B^i] = \sum_{i=1}^p[ar_i*B^i] \\
+        + \sum_{i=1}^p[ar_i*B^i] * \sum_{j=1}[ema_j*B^j] \\
+        + \Sum_{i=1}^q[ma_i*B^i]
+    therefore
+    .. math::
+        ema_i = ar_i (but 0 if i>p) \\
+        + \Sum_{j=1}^{min(i-1,p)}[ar_j*ema_{i-j}] + ma_i(but 0 if i>q) \\
+        = \sum_{j=1}{min(i,p)}[ar_j*ema_{i-j}(but 1 if j=i)] \\
+        + ma_i(but 0 if i>q)
+    """
+    p = len(ar)
+    q = len(ma)
+    ema = np.empty(max_deg)
+    for i in range(0, max_deg):
+        temp = ma[i] if i < q else 0.0
+        for j in range(0, min(i + 1, p)):
+            temp += ar[j] * (ema[i - j - 1] if i - j - 1 >= 0 else 1.0)
+        ema[i] = temp
+    return ema
+
+
+# Note: Originally copied from pmdarima package (https://github.com/blue-yonder/tsfresh/blob/https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/arima/arima.py)
+def seasonal_prediction_with_confidence(arima_res,
+                                        start,
+                                        end,
+                                        X,
+                                        alpha,
+                                        **kwargs):
+    """Compute the prediction for a SARIMAX and get a conf interval
+
+    Unfortunately, SARIMAX does not really provide a nice way to get the
+    confidence intervals out of the box, so we have to perform the
+    ``get_prediction`` code here and unpack the confidence intervals manually.
+    """
+    results = arima_res.get_prediction(
+        start=start,
+        end=end,
+        exog=X,
+        **kwargs)
+
+    f = results.predicted_mean
+    conf_int = results.conf_int(alpha=alpha)
+    if arima_res.specification['simple_differencing']:
+        # If simple_differencing == True, statsmodels.get_prediction returns
+        # mid and confidence intervals on differenced time series.
+        # We have to invert differencing the mid and confidence intervals
+        y_org = arima_res.model.orig_endog
+        d = arima_res.model.orig_k_diff
+        D = arima_res.model.orig_k_seasonal_diff
+        period = arima_res.model.seasonal_periods
+        # Forecast mid: undifferencing non-seasonal part
+        if d > 0:
+            y_sdiff = y_org if D == 0 else diff(y_org, period, D)
+            f_temp = np.append(y_sdiff[-d:], f)
+            f_temp = diff_inv(f_temp, 1, d)
+            f = f_temp[(2 * d):]
+        # Forecast mid: undifferencing seasonal part
+        if D > 0 and period > 1:
+            f_temp = np.append(y_org[-(D * period):], f)
+            f_temp = diff_inv(f_temp, period, D)
+            f = f_temp[(2 * D * period):]
+        # confidence interval
+        ar_poly = arima_res.polynomial_reduced_ar
+        poly_diff = np_polynomial.polypow(np.array([1., -1.]), d)
+        sdiff = np.zeros(period + 1)
+        sdiff[0] = 1.
+        sdiff[-1] = 1.
+        poly_sdiff = np_polynomial.polypow(sdiff, D)
+        ar = -np.polymul(ar_poly, np.polymul(poly_diff, poly_sdiff))[1:]
+        ma = arima_res.polynomial_reduced_ma[1:]
+        n_predMinus1 = end - start
+        ema = ARMAtoMA(ar, ma, n_predMinus1)
+        sigma2 = arima_res._params_variance[0]
+        var = np.cumsum(np.append(1., ema * ema)) * sigma2
+        q = results.dist.ppf(1. - alpha / 2, *results.dist_args)
+        conf_int[:, 0] = f - q * np.sqrt(var)
+        conf_int[:, 1] = f + q * np.sqrt(var)
+
+    return check_endog(f, dtype=None, copy=False), \
+        check_array(conf_int, copy=False, dtype=None)
diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py
index a26263317..de1d19b8f 100644
--- a/etna/models/sarimax.py
+++ b/etna/models/sarimax.py
@@ -9,8 +9,10 @@
 from statsmodels.tools.sm_exceptions import ValueWarning
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
+from etna.libs.pmdarima_utils import seasonal_prediction_with_confidence
 from etna.models.base import BaseAdapter
 from etna.models.base import PerSegmentPredictionIntervalModel
+from etna.models.utils import determine_num_steps
 
 warnings.filterwarnings(
     message="No frequency information was provided, so inferred frequency .* will be used",
@@ -164,6 +166,8 @@ def __init__(
         self._model: Optional[SARIMAX] = None
         self._result: Optional[SARIMAX] = None
         self.regressor_columns: Optional[List[str]] = None
+        self._freq = None
+        self._first_train_timestamp = None
 
     def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_SARIMAXAdapter":
         """
@@ -193,8 +197,8 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_SARIMAXAdapter":
 
         self._check_df(df)
 
-        targets = df["target"]
-        targets.index = df["timestamp"]
+        # make it a numpy array for forgetting about indices, it is necessary for _seasonal_prediction_with_confidence
+        targets = df["target"].values
 
         exog_train = self._select_regressors(df)
 
@@ -221,6 +225,14 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_SARIMAXAdapter":
             **self.kwargs,
         )
         self._result = self._model.fit()
+
+        freq = pd.infer_freq(df["timestamp"], warn=False)
+        if freq is None:
+            raise ValueError("Can't determine frequency of a given dataframe")
+        self._freq = freq
+
+        self._first_train_timestamp = df["timestamp"].min()
+
         return self
 
     def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequence[float]) -> pd.DataFrame:
@@ -256,30 +268,39 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen
             )
 
         exog_future = self._select_regressors(df)
+        start_timestamp = df["timestamp"].min()
+        end_timestamp = df["timestamp"].max()
+        # determine index of start_timestamp if counting from first timestamp of train
+        start_idx = determine_num_steps(
+            start_timestamp=self._first_train_timestamp, end_timestamp=start_timestamp, freq=self._freq  # type: ignore
+        )
+        # determine index of end_timestamp if counting from first timestamp of train
+        end_idx = determine_num_steps(
+            start_timestamp=self._first_train_timestamp, end_timestamp=end_timestamp, freq=self._freq  # type: ignore
+        )
+
         if prediction_interval:
-            forecast = self._result.get_prediction(
-                start=df["timestamp"].min(), end=df["timestamp"].max(), dynamic=False, exog=exog_future
+            forecast, _ = seasonal_prediction_with_confidence(
+                arima_res=self._result, start=start_idx, end=end_idx, X=exog_future, alpha=0.05
             )
-            y_pred = forecast.predicted_mean
-            y_pred.name = "mean"
-            y_pred = pd.DataFrame(y_pred)
+            y_pred = pd.DataFrame({"mean": forecast})
             for quantile in quantiles:
                 # set alpha in the way to get a desirable quantile
                 alpha = min(quantile * 2, (1 - quantile) * 2)
-                borders = forecast.conf_int(alpha=alpha)
+                _, borders = seasonal_prediction_with_confidence(
+                    arima_res=self._result, start=start_idx, end=end_idx, X=exog_future, alpha=alpha
+                )
                 if quantile < 1 / 2:
-                    series = borders["lower target"]
+                    series = borders[:, 0]
                 else:
-                    series = borders["upper target"]
+                    series = borders[:, 1]
                 y_pred[f"mean_{quantile:.4g}"] = series
         else:
-            forecast = self._result.get_prediction(
-                start=df["timestamp"].min(), end=df["timestamp"].max(), dynamic=False, exog=exog_future
+            forecast, _ = seasonal_prediction_with_confidence(
+                arima_res=self._result, start=start_idx, end=end_idx, X=exog_future, alpha=0.05
             )
-            y_pred = forecast.predicted_mean
-            y_pred.name = "mean"
-            y_pred = pd.DataFrame(y_pred)
-        y_pred = y_pred.reset_index(drop=True, inplace=False)
+            y_pred = pd.DataFrame({"mean": forecast})
+
         rename_dict = {
             column: column.replace("mean", "target") for column in y_pred.columns if column.startswith("mean")
         }
diff --git a/etna/models/tbats.py b/etna/models/tbats.py
index 42631245a..629e63728 100644
--- a/etna/models/tbats.py
+++ b/etna/models/tbats.py
@@ -11,7 +11,7 @@
 
 from etna.models.base import BaseAdapter
 from etna.models.base import PerSegmentPredictionIntervalModel
-from etna.models.utils import determine_num_steps_to_forecast
+from etna.models.utils import determine_num_steps
 
 
 class _TBATSAdapter(BaseAdapter):
@@ -43,8 +43,8 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab
                 "In-sample predictions aren't supported by current implementation."
             )
 
-        steps_to_forecast = determine_num_steps_to_forecast(
-            last_train_timestamp=self._last_train_timestamp, last_test_timestamp=df["timestamp"].max(), freq=self._freq
+        steps_to_forecast = determine_num_steps(
+            start_timestamp=self._last_train_timestamp, end_timestamp=df["timestamp"].max(), freq=self._freq
         )
         steps_to_skip = steps_to_forecast - df.shape[0]
 
diff --git a/etna/models/utils.py b/etna/models/utils.py
index 49cb5bb93..05dc0fde0 100644
--- a/etna/models/utils.py
+++ b/etna/models/utils.py
@@ -1,20 +1,15 @@
 import pandas as pd
 
 
-def determine_num_steps_to_forecast(
-    last_train_timestamp: pd.Timestamp, last_test_timestamp: pd.Timestamp, freq: str
-) -> int:
-    """Determine number of steps to make a forecast in future.
-
-    It is useful for out-sample forecast with gap if model predicts only on a certain number of steps
-    in autoregressive manner.
+def determine_num_steps(start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp, freq: str) -> int:
+    """Determine how many steps of ``freq`` should we make from ``start_timestamp`` to reach ``end_timestamp``.
 
     Parameters
     ----------
-    last_train_timestamp:
-        last timestamp in train data
-    last_test_timestamp:
-        last timestamp in test data, should be after ``last_train_timestamp``
+    start_timestamp:
+        timestamp to start counting from
+    end_timestamp:
+        timestamp to end counting, should be not earlier than ``start_timestamp``
     freq:
         pandas frequency string: `Offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
 
@@ -26,26 +21,30 @@ def determine_num_steps_to_forecast(
     Raises
     ------
     ValueError:
-        Value of last test timestamp is less or equal than last train timestamp
+        Value of end timestamp is less than start timestamp
     ValueError:
-        Last train timestamp isn't correct according to a given frequency
+        Start timestamp isn't correct according to a given frequency
     ValueError:
-        Last test timestamps isn't reachable with a given frequency
+        End timestamp isn't reachable with a given frequency
     """
-    if last_test_timestamp <= last_train_timestamp:
-        raise ValueError("Last train timestamp should be less than last test timestamp!")
+    if start_timestamp > end_timestamp:
+        raise ValueError("Start train timestamp should be less or equal than end timestamp!")
+
+    # check if start_timestamp is normalized
+    normalized_start_timestamp = pd.date_range(start=start_timestamp, periods=1, freq=freq)
+    if normalized_start_timestamp != start_timestamp:
+        raise ValueError(f"Start timestamp isn't correct according to given frequency: {freq}")
 
-    # check if last_train_timestamp is normalized
-    normalized_last_train_timestamp = pd.date_range(start=last_train_timestamp, periods=1, freq=freq)
-    if normalized_last_train_timestamp != last_train_timestamp:
-        raise ValueError(f"Last train timestamp isn't correct according to given frequency: {freq}")
+    # check a simple case
+    if start_timestamp == end_timestamp:
+        return 0
 
     # make linear probing, because for complex offsets there is a cycle in `pd.date_range`
     cur_value = 1
     while True:
-        timestamps = pd.date_range(start=last_train_timestamp, periods=cur_value + 1, freq=freq)
-        if timestamps[-1] == last_test_timestamp:
+        timestamps = pd.date_range(start=start_timestamp, periods=cur_value + 1, freq=freq)
+        if timestamps[-1] == end_timestamp:
             return cur_value
-        elif timestamps[-1] > last_test_timestamp:
-            raise ValueError(f"Last test timestamps isn't reachable with freq: {freq}")
+        elif timestamps[-1] > end_timestamp:
+            raise ValueError(f"End timestamp isn't reachable with freq: {freq}")
         cur_value += 1
diff --git a/tests/test_models/test_sarimax_model.py b/tests/test_models/test_sarimax_model.py
index 3a7d6c299..4a6e581c0 100644
--- a/tests/test_models/test_sarimax_model.py
+++ b/tests/test_models/test_sarimax_model.py
@@ -5,22 +5,23 @@
 from etna.pipeline import Pipeline
 
 
+def _check_prediction(ts, model, horizon):
+    model.fit(ts)
+    future_ts = ts.make_future(future_steps=horizon)
+    res = model.forecast(future_ts)
+    res = res.to_pandas(flatten=True)
+
+    assert not res.isnull().values.any()
+    assert len(res) == 14
+
+
 def test_sarimax_forecaster_run(example_tsds):
     """
     Given: I have dataframe with 2 segments
     When:
     Then: I get 7 periods per dataset as a forecast
     """
-
-    horizon = 7
-    model = SARIMAXModel()
-    model.fit(example_tsds)
-    future_ts = example_tsds.make_future(future_steps=horizon)
-    res = model.forecast(future_ts)
-    res = res.to_pandas(flatten=True)
-
-    assert not res.isnull().values.any()
-    assert len(res) == 14
+    _check_prediction(ts=example_tsds, model=SARIMAXModel(), horizon=7)
 
 
 def test_sarimax_save_regressors_on_fit(example_reg_tsds):
@@ -40,21 +41,22 @@ def test_sarimax_select_regressors_correctly(example_reg_tsds):
         assert (segment_regressors == segment_regressors_expected).all().all()
 
 
-def test_sarimax_forecaster_run_with_reg(example_reg_tsds):
+def test_sarimax_forecaster_run_with_simple_differencing(example_tsds):
     """
     Given: I have dataframe with 2 segments
     When:
     Then: I get 7 periods per dataset as a forecast
     """
-    horizon = 7
-    model = SARIMAXModel()
-    model.fit(example_reg_tsds)
-    future_ts = example_reg_tsds.make_future(future_steps=horizon)
-    res = model.forecast(future_ts)
-    res = res.to_pandas(flatten=True)
+    _check_prediction(ts=example_tsds, model=SARIMAXModel(simple_differencing=True), horizon=7)
 
-    assert not res.isnull().values.any()
-    assert len(res) == 14
+
+def test_sarimax_forecaster_run_with_reg(example_reg_tsds):
+    """
+    Given: I have dataframe with 2 segments
+    When:
+    Then: I get 7 periods per dataset as a forecast
+    """
+    _check_prediction(ts=example_reg_tsds, model=SARIMAXModel(), horizon=7)
 
 
 def test_sarimax_forececaster_run_with_reg_custom_order(example_reg_tsds):
@@ -63,15 +65,7 @@ def test_sarimax_forececaster_run_with_reg_custom_order(example_reg_tsds):
     When: Sarimax have non standard `order` param
     Then: I get 7 periods per dataset as a forecast
     """
-    horizon = 7
-    model = SARIMAXModel(order=(3, 1, 0))
-    model.fit(example_reg_tsds)
-    future_ts = example_reg_tsds.make_future(future_steps=horizon)
-    res = model.forecast(future_ts)
-    res = res.to_pandas(flatten=True)
-
-    assert not res.isnull().values.any()
-    assert len(res) == 14
+    _check_prediction(ts=example_reg_tsds, model=SARIMAXModel(order=(3, 1, 0)), horizon=7)
 
 
 def test_prediction_interval_run_insample(example_tsds):
diff --git a/tests/test_models/test_utils.py b/tests/test_models/test_utils.py
index d269a5b99..2a4bf7153 100644
--- a/tests/test_models/test_utils.py
+++ b/tests/test_models/test_utils.py
@@ -1,64 +1,56 @@
 import pandas as pd
 import pytest
 
-from etna.models.utils import determine_num_steps_to_forecast
+from etna.models.utils import determine_num_steps
 
 
 @pytest.mark.parametrize(
-    "last_train_timestamp, last_test_timestamp, freq, answer",
+    "start_timestamp, end_timestamp, freq, answer",
     [
         (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02"), "D", 1),
         (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-11"), "D", 10),
+        (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-01"), "D", 0),
         (pd.Timestamp("2020-01-05"), pd.Timestamp("2020-01-19"), "W-SUN", 2),
         (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-15"), pd.offsets.Week(), 2),
         (pd.Timestamp("2020-01-31"), pd.Timestamp("2021-02-28"), "M", 13),
         (pd.Timestamp("2020-01-01"), pd.Timestamp("2021-06-01"), "MS", 17),
     ],
 )
-def test_determine_num_steps_to_forecast_ok(last_train_timestamp, last_test_timestamp, freq, answer):
-    result = determine_num_steps_to_forecast(
-        last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq
-    )
+def test_determine_num_steps_ok(start_timestamp, end_timestamp, freq, answer):
+    result = determine_num_steps(start_timestamp=start_timestamp, end_timestamp=end_timestamp, freq=freq)
     assert result == answer
 
 
 @pytest.mark.parametrize(
-    "last_train_timestamp, last_test_timestamp, freq",
+    "start_timestamp, end_timestamp, freq",
     [
-        (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-01"), "D"),
         (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-01-01"), "D"),
     ],
 )
-def test_determine_num_steps_to_forecast_fail_wrong_order(last_train_timestamp, last_test_timestamp, freq):
-    with pytest.raises(ValueError, match="Last train timestamp should be less than last test timestamp"):
-        _ = determine_num_steps_to_forecast(
-            last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq
-        )
+def test_determine_num_steps_fail_wrong_order(start_timestamp, end_timestamp, freq):
+    with pytest.raises(ValueError, match="Start train timestamp should be less or equal than end timestamp"):
+        _ = determine_num_steps(start_timestamp=start_timestamp, end_timestamp=end_timestamp, freq=freq)
 
 
 @pytest.mark.parametrize(
-    "last_train_timestamp, last_test_timestamp, freq",
+    "start_timestamp, end_timestamp, freq",
     [
         (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "M"),
         (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "MS"),
     ],
 )
-def test_determine_num_steps_to_forecast_fail_wrong_start(last_train_timestamp, last_test_timestamp, freq):
-    with pytest.raises(ValueError, match="Last train timestamp isn't correct according to given frequency"):
-        _ = determine_num_steps_to_forecast(
-            last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq
-        )
+def test_determine_num_steps_fail_wrong_start(start_timestamp, end_timestamp, freq):
+    with pytest.raises(ValueError, match="Start timestamp isn't correct according to given frequency"):
+        _ = determine_num_steps(start_timestamp=start_timestamp, end_timestamp=end_timestamp, freq=freq)
 
 
 @pytest.mark.parametrize(
-    "last_train_timestamp, last_test_timestamp, freq",
+    "start_timestamp, end_timestamp, freq",
     [
         (pd.Timestamp("2020-01-31"), pd.Timestamp("2020-06-05"), "M"),
         (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-06-05"), "MS"),
     ],
 )
-def test_determine_num_steps_to_forecast_fail_wrong_end(last_train_timestamp, last_test_timestamp, freq):
-    with pytest.raises(ValueError, match="Last test timestamps isn't reachable with freq"):
-        _ = determine_num_steps_to_forecast(
-            last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq
-        )
+def test_determine_num_steps_fail_wrong_end(start_timestamp, end_timestamp, freq):
+    with pytest.raises(ValueError, match="End timestamp isn't reachable with freq"):
+        _ = determine_num_steps(start_timestamp=start_timestamp, end_timestamp=end_timestamp, freq=freq)