Skip to content

NEW: reset global seed before every test case #155

Merged
merged 4 commits into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Colorebar scaling in Correlation heatmap plotter ([#143](https://github.com/tinkoff-ai/etna-ts/pull/143))
- Add Correlation heatmap in EDA notebook ([#144](https://github.com/tinkoff-ai/etna-ts/pull/144))
- Add `__repr__` for Pipeline ([#151](https://github.com/tinkoff-ai/etna-ts/pull/151))
- Defined random state for every test cases ([#155](https://github.com/tinkoff-ai/etna-ts/pull/155))

### Fixed
- Add more obvious Exception Error for forecasting with unfitted model ([#102](https://github.com/tinkoff-ai/etna-ts/pull/102))
Expand Down
8 changes: 5 additions & 3 deletions etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ class TSDataset:
"""

idx = pd.IndexSlice
np.random.seed(0)

def __init__(self, df: pd.DataFrame, freq: str, df_exog: Optional[pd.DataFrame] = None):
"""Init TSDataset.
Expand Down Expand Up @@ -313,7 +312,7 @@ def regressors(self) -> List[str]:
"""
return self._regressors

def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[Sequence] = None):
def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[Sequence] = None, seed: int = 1):
"""Plot of random or chosen segments.

Parameters
Expand All @@ -324,6 +323,8 @@ def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[
feature to plot
segments:
segments to plot
seed:
seed for local random state
"""
if not segments:
segments = self.segments
Expand All @@ -332,7 +333,8 @@ def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[
rows_num = math.ceil(k / columns_num)
_, ax = plt.subplots(rows_num, columns_num, figsize=(20, 5 * rows_num), squeeze=False)
ax = ax.ravel()
for i, segment in enumerate(sorted(np.random.choice(segments, size=k, replace=False))):
rnd_state = np.random.RandomState(seed)
for i, segment in enumerate(sorted(rnd_state.choice(segments, size=k, replace=False))):
df_slice = self[:, segment, column]
ax[i].plot(df_slice.index, df_slice.values)
ax[i].set_title(segment)
Expand Down
27 changes: 20 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,21 @@
from etna.datasets.tsdataset import TSDataset


@pytest.fixture(autouse=True)
def random_seed():
"Fixture to fix random state for every test case"
import random

import torch

SEED = 121 # noqa: N806
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)


@pytest.fixture()
def example_df():
def example_df(random_seed):
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2020-01-01", end="2020-02-01", freq="H")
df1["segment"] = "segment_1"
Expand All @@ -21,7 +34,7 @@ def example_df():


@pytest.fixture
def two_dfs_with_different_timestamps():
def two_dfs_with_different_timestamps(random_seed):
"""Generate two dataframes with the same segments and different timestamps"""

def generate_df(start_time):
Expand All @@ -44,7 +57,7 @@ def generate_df(start_time):


@pytest.fixture
def two_dfs_with_different_segments_sets():
def two_dfs_with_different_segments_sets(random_seed):
"""Generate two dataframes with the same timestamps and different segments"""

def generate_df(n_segments):
Expand All @@ -67,7 +80,7 @@ def generate_df(n_segments):


@pytest.fixture
def train_test_dfs():
def train_test_dfs(random_seed):
"""Generate two dataframes with the same segments and the same timestamps"""

def generate_df():
Expand Down Expand Up @@ -131,7 +144,7 @@ def outliers_df():


@pytest.fixture
def example_df_() -> pd.DataFrame:
def example_df_(random_seed) -> pd.DataFrame:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = ["segment_1"] * periods
Expand All @@ -150,7 +163,7 @@ def example_df_() -> pd.DataFrame:


@pytest.fixture
def example_tsds() -> TSDataset:
def example_tsds(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
Expand All @@ -168,7 +181,7 @@ def example_tsds() -> TSDataset:


@pytest.fixture
def example_reg_tsds() -> TSDataset:
def example_reg_tsds(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
Expand Down
7 changes: 3 additions & 4 deletions tests/test_analysis/test_outliers/test_hist_outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,10 @@ def test_v_optimal_hist(series: np.array, bins_number: int, expected: np.array):
np.testing.assert_almost_equal(error, expected)


@pytest.mark.parametrize(
"series,k", ((np.random.random(100), 10), (np.random.random(100), 20), (np.random.random(10), 4))
)
def test_compute_f_format(series: np.array, k: int):
@pytest.mark.parametrize("series_len,k", ((100, 10), (100, 20), (10, 4)))
def test_compute_f_format(random_seed, series_len: int, k: int):
"""Check that computeF produce the correct size output."""
series = np.random.random(size=series_len)
p, pp = np.empty_like(series), np.empty_like(series)
p[0] = series[0]
pp[0] = series[0] ** 2
Expand Down
2 changes: 1 addition & 1 deletion tests/test_clustering/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


@pytest.fixture
def eucl_ts() -> TSDataset:
def eucl_ts(random_seed) -> TSDataset:
df = pd.DataFrame()
for i in range(1, 8):
date_range = pd.date_range("2020-01-01", "2020-05-01")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@pytest.fixture()
def tsdf_with_exog() -> TSDataset:
def tsdf_with_exog(random_seed) -> TSDataset:
df_1 = pd.DataFrame.from_dict({"timestamp": pd.date_range("2021-02-01", "2021-07-01", freq="1d")})
df_2 = pd.DataFrame.from_dict({"timestamp": pd.date_range("2021-02-01", "2021-07-01", freq="1d")})
df_1["segment"] = "Moscow"
Expand Down
8 changes: 4 additions & 4 deletions tests/test_model_selection/test_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


@pytest.fixture
def imbalanced_tsdf() -> TSDataset:
def imbalanced_tsdf(random_seed) -> TSDataset:
"""Generate two series with big time range difference"""
df1 = pd.DataFrame({"timestamp": pd.date_range("2021-01-25", "2021-02-01", freq="D")})
df1["segment"] = "segment_1"
Expand All @@ -43,7 +43,7 @@ def imbalanced_tsdf() -> TSDataset:


@pytest.fixture()
def big_daily_example_tsdf() -> TSDataset:
def big_daily_example_tsdf(random_seed) -> TSDataset:
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2019-01-01", end="2020-04-01", freq="D")
df1["segment"] = "segment_1"
Expand All @@ -62,7 +62,7 @@ def big_daily_example_tsdf() -> TSDataset:


@pytest.fixture()
def example_tsdf() -> TSDataset:
def example_tsdf(random_seed) -> TSDataset:
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2020-01-01", end="2020-02-01", freq="H")
df1["segment"] = "segment_1"
Expand All @@ -81,7 +81,7 @@ def example_tsdf() -> TSDataset:


@pytest.fixture()
def big_example_tsdf() -> TSDataset:
def big_example_tsdf(random_seed) -> TSDataset:
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2020-01-01", end="2021-02-01", freq="D")
df1["segment"] = "segment_1"
Expand Down
8 changes: 0 additions & 8 deletions tests/test_models/nn/test_deepar.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import random

import numpy as np
import pytest
import torch
from pytorch_forecasting.data import GroupNormalizer

from etna.datasets.tsdataset import TSDataset
Expand Down Expand Up @@ -39,10 +35,6 @@ def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon):
When:
Then: I get {horizon} periods per dataset as a forecast and they "the same" as past
"""
SEED = 121 # noqa: N806
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

ts_start = sorted(set(weekly_period_df.timestamp))[-horizon]
train, test = (
Expand Down
9 changes: 0 additions & 9 deletions tests/test_models/nn/test_tft.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import random

import numpy as np
import pytest
import torch

from etna.datasets.tsdataset import TSDataset
from etna.metrics import MAE
Expand Down Expand Up @@ -41,11 +37,6 @@ def test_tft_model_run_weekly_overfit(weekly_period_df, horizon):
Then: I get {horizon} periods per dataset as a forecast and they "the same" as past
"""

SEED = 121 # noqa: N806
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

ts_start = sorted(set(weekly_period_df.timestamp))[-horizon]
train, test = (
weekly_period_df[lambda x: x.timestamp < ts_start],
Expand Down
6 changes: 2 additions & 4 deletions tests/test_models/test_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,16 @@ def linear_segments_by_parameters(alpha_values, intercept_values):


@pytest.fixture()
def linear_segments_ts_unique():
def linear_segments_ts_unique(random_seed):
"""Create TSDataset that represents 3 segments with unique linear dependency on lags in each."""
np.random.seed(42)
alpha_values = [np.random.rand() * 4 - 2 for _ in range(3)]
intercept_values = [np.random.rand() * 4 + 1 for _ in range(3)]
return linear_segments_by_parameters(alpha_values, intercept_values)


@pytest.fixture()
def linear_segments_ts_common():
def linear_segments_ts_common(random_seed):
"""Create TSDataset that represents 3 segments with common linear dependency on lags in each."""
np.random.seed(42)
alpha_values = [np.random.rand() * 4 - 2] * 3
intercept_values = [np.random.rand() * 4 + 1 for _ in range(3)]
return linear_segments_by_parameters(alpha_values, intercept_values)
Expand Down
32 changes: 0 additions & 32 deletions tests/test_models/test_sarimax_model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
import numpy as np

from etna.datasets import TSDataset
from etna.metrics import MAE
from etna.models import SARIMAXModel
from etna.transforms import TheilSenTrendTransform


def test_sarimax_forecaster_run(example_tsds):
Expand Down Expand Up @@ -39,30 +34,3 @@ def test_sarimax_forecaster_run_with_reg(example_reg_tsds):

assert not res.isnull().values.any()
assert len(res) == 14


def test_compare_sarimax_vanilla_reg(example_reg_tsds):
horizon = 24
example_tsds = TSDataset(example_reg_tsds[:, :, "target"], freq="D")
train, test = example_tsds.train_test_split(
train_start=None, train_end="2020-01-31", test_start="2020-02-01", test_end="2020-02-24"
)
model = SARIMAXModel()
model.fit(train)
future_ts = train.make_future(future_steps=horizon)
vanilla_result = model.forecast(future_ts)

train, test = example_reg_tsds.train_test_split(
train_start=None, train_end="2020-01-31", test_start="2020-02-01", test_end="2020-02-24"
)
prep = TheilSenTrendTransform(in_column="target")
train.fit_transform([prep])
model = SARIMAXModel()
model.fit(train)
future_ts = train.make_future(future_steps=horizon)
reg_result = model.forecast(future_ts)

van_acc = np.array(list(MAE()(test, vanilla_result).values()))
reg_acc = np.array(list(MAE()(test, reg_result).values()))

assert np.all(van_acc < reg_acc)
2 changes: 1 addition & 1 deletion tests/test_transforms/test_impute_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_wrong_init_two_segments(all_date_present_df_two_segments):


@pytest.fixture()
def df_with_missing_value_x_index(all_date_present_df: pd.DataFrame) -> Tuple[pd.DataFrame, int]:
def df_with_missing_value_x_index(random_seed, all_date_present_df: pd.DataFrame) -> Tuple[pd.DataFrame, int]:
"""Create pd.DataFrame that contains some target on given range of dates with one gap."""
# index cannot be first or last value,
# because Imputer should know starting and ending dates
Expand Down
4 changes: 2 additions & 2 deletions tests/test_transforms/test_log_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


@pytest.fixture
def non_positive_df_() -> pd.DataFrame:
def non_positive_df_(random_seed) -> pd.DataFrame:
"""Generate dataset with non-positive target."""
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
Expand All @@ -26,7 +26,7 @@ def non_positive_df_() -> pd.DataFrame:


@pytest.fixture
def positive_df_() -> pd.DataFrame:
def positive_df_(random_seed) -> pd.DataFrame:
"""Generate dataset with positive target."""
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
Expand Down