-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add FourierTransform * Update changelog * Add notes section to explain some details
- Loading branch information
1 parent
2fb5fc6
commit e495f57
Showing
4 changed files
with
273 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import math | ||
from typing import Optional | ||
from typing import Sequence | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
from etna.transforms.base import Transform | ||
|
||
|
||
class FourierTransform(Transform): | ||
"""Adds fourier features to the dataset.""" | ||
|
||
def __init__( | ||
self, | ||
period: float, | ||
order: Optional[int] = None, | ||
mods: Optional[Sequence[int]] = None, | ||
out_column: Optional[str] = None, | ||
): | ||
"""Create instance of FourierTransform. | ||
Parameters | ||
---------- | ||
period: | ||
the period of the seasonality to capture in frequency units of time series, it should be >= 2 | ||
order: | ||
upper order of Fourier components to include, it should be >= 1 and <= ceil(period/2)) | ||
mods: | ||
alternative and precise way of defining which harmonics will be used, | ||
for example `mods=[1, 3, 4]` means that sin of the first order | ||
and sin and cos of the second order will be used, | ||
mods should be >= 1 and < period | ||
out_column: | ||
if set, name of added column, the final name will be '{out_columnt}_{mod}', | ||
don't forget to add 'regressor_' prefix | ||
if don't set, name will be 'regressor_{repr}', repr will represent class that creates exactly this column | ||
Raises | ||
------ | ||
ValueError: | ||
if period < 2 | ||
ValueError: | ||
if both or none of order, mods is set | ||
ValueError: | ||
if order is < 1 or > ceil(period/2) | ||
ValueError: | ||
if at least one mod is < 1 or >= period | ||
Notes | ||
----- | ||
To understand how transform works we recommend: https://otexts.com/fpp2/useful-predictors.html#fourier-series | ||
* Parameter `period` is responsible for the seasonality we want to capture. | ||
* Parameters `order` and `mods` define which harmonics will be used. | ||
Parameter `order` is a more user-friendly version of `mods`. | ||
For example, `order=2` can be represented as `mods=[1, 2, 3, 4]` if `period` > 4 and | ||
as `mods=[1, 2, 3]` if 3 <= `period` <= 4. | ||
""" | ||
if period < 2: | ||
raise ValueError("Period should be at least 2") | ||
self.period = period | ||
self.mods: Sequence[int] | ||
|
||
if order is not None and mods is None: | ||
if order < 1 or order > math.ceil(period / 2): | ||
raise ValueError("Order should be within [1, ceil(period/2)] range") | ||
self.mods = [mod for mod in range(1, 2 * order + 1) if mod < period] | ||
elif mods is not None and order is None: | ||
if min(mods) < 1 or max(mods) >= period: | ||
raise ValueError("Every mod should be within [1, int(period)) range") | ||
self.mods = mods | ||
else: | ||
raise ValueError("There should be exactly one option set: order or mods") | ||
|
||
self.out_column = out_column | ||
|
||
def fit(self, df: pd.DataFrame) -> "FourierTransform": | ||
"""Fit method does nothing and is kept for compatibility. | ||
Parameters | ||
---------- | ||
df: | ||
dataframe with data. | ||
Returns | ||
------- | ||
result: FourierTransform | ||
""" | ||
return self | ||
|
||
def _get_column_name(self, mod: int) -> str: | ||
if self.out_column is None: | ||
return f"regressor_{FourierTransform(period=self.period, mods=[mod]).__repr__()}" | ||
else: | ||
return f"{self.out_column}_{mod}" | ||
|
||
@staticmethod | ||
def _construct_answer(df: pd.DataFrame, features: pd.DataFrame) -> pd.DataFrame: | ||
dataframes = [] | ||
for seg in df.columns.get_level_values("segment").unique(): | ||
tmp = df[seg].join(features) | ||
_idx = tmp.columns.to_frame() | ||
_idx.insert(0, "segment", seg) | ||
tmp.columns = pd.MultiIndex.from_frame(_idx) | ||
dataframes.append(tmp) | ||
|
||
result = pd.concat(dataframes, axis=1).sort_index(axis=1) | ||
result.columns.names = ["segment", "feature"] | ||
return result | ||
|
||
def transform(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Add harmonics to the dataset. | ||
Parameters | ||
---------- | ||
df: | ||
dataframe with data to transform. | ||
Returns | ||
------- | ||
result: pd.Dataframe | ||
transformed dataframe | ||
""" | ||
features = pd.DataFrame(index=df.index) | ||
elapsed = np.arange(features.shape[0]) / self.period | ||
|
||
for mod in self.mods: | ||
order = (mod + 1) // 2 | ||
is_cos = mod % 2 == 0 | ||
|
||
features[self._get_column_name(mod)] = np.sin(2 * np.pi * order * elapsed + np.pi / 2 * is_cos) | ||
|
||
return self._construct_answer(df, features) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
|
||
from etna.datasets import TSDataset | ||
from etna.metrics import R2 | ||
from etna.models import LinearPerSegmentModel | ||
from etna.transforms import FourierTransform | ||
|
||
|
||
def add_seasonality(series: pd.Series, period: int, magnitude: float) -> pd.Series: | ||
"""Add seasonality to given series.""" | ||
new_series = series.copy() | ||
size = series.shape[0] | ||
indices = np.arange(size) | ||
new_series += np.sin(2 * np.pi * indices / period) * magnitude | ||
return new_series | ||
|
||
|
||
def get_one_df(period_1, period_2, magnitude_1, magnitude_2): | ||
timestamp = pd.date_range(start="2020-01-01", end="2021-01-01", freq="D") | ||
df = pd.DataFrame({"timestamp": timestamp}) | ||
target = 0 | ||
indices = np.arange(timestamp.shape[0]) | ||
target += np.sin(2 * np.pi * indices * 2 / period_1) * magnitude_1 | ||
target += np.cos(2 * np.pi * indices * 3 / period_2) * magnitude_2 | ||
target += np.random.normal(scale=0.05, size=timestamp.shape[0]) | ||
df["target"] = target | ||
return df | ||
|
||
|
||
@pytest.fixture | ||
def ts_trend_seasonal(random_seed) -> TSDataset: | ||
df_1 = get_one_df(period_1=7, period_2=30.4, magnitude_1=1, magnitude_2=1 / 2) | ||
df_1["segment"] = "segment_1" | ||
df_2 = get_one_df(period_1=7, period_2=30.4, magnitude_1=1 / 2, magnitude_2=1 / 5) | ||
df_2["segment"] = "segment_2" | ||
classic_df = pd.concat([df_1, df_2], ignore_index=True) | ||
return TSDataset(TSDataset.to_dataset(classic_df), freq="D") | ||
|
||
|
||
@pytest.mark.parametrize("period", [-1, 0, 1, 1.5]) | ||
def test_fail_period(period): | ||
"""Test that transform is not created with wrong period.""" | ||
with pytest.raises(ValueError, match="Period should be at least 2"): | ||
_ = FourierTransform(period=period, order=1) | ||
|
||
|
||
@pytest.mark.parametrize("order", [0, 5]) | ||
def test_fail_order(order): | ||
"""Test that transform is not created with wrong order.""" | ||
with pytest.raises(ValueError, match="Order should be within"): | ||
_ = FourierTransform(period=7, order=order) | ||
|
||
|
||
@pytest.mark.parametrize("mods", [[0], [0, 1, 2, 3], [1, 2, 3, 7], [7]]) | ||
def test_fail_mods(mods): | ||
"""Test that transform is not created with wrong mods.""" | ||
with pytest.raises(ValueError, match="Every mod should be within"): | ||
_ = FourierTransform(period=7, mods=mods) | ||
|
||
|
||
def test_fail_set_none(): | ||
"""Test that transform is not created without order and mods.""" | ||
with pytest.raises(ValueError, match="There should be exactly one option set"): | ||
_ = FourierTransform(period=7) | ||
|
||
|
||
def test_fail_set_both(): | ||
"""Test that transform is not created with both order and mods set.""" | ||
with pytest.raises(ValueError, match="There should be exactly one option set"): | ||
_ = FourierTransform(period=7, order=1, mods=[1, 2, 3]) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"period, order, num_columns", [(6, 2, 4), (7, 2, 4), (6, 3, 5), (7, 3, 6), (5.5, 2, 4), (5.5, 3, 5)] | ||
) | ||
def test_column_names(example_df, period, order, num_columns): | ||
"""Test that transform creates expected number of columns and they can be recreated by its name.""" | ||
df = TSDataset.to_dataset(example_df) | ||
transform = FourierTransform(period=period, order=order) | ||
transformed_df = transform.fit_transform(df) | ||
columns = transformed_df.columns.get_level_values("feature").unique().drop("target") | ||
assert len(columns) == num_columns | ||
for column in columns: | ||
transform_temp = eval(column[len("regressor_") :]) | ||
df_temp = transform_temp.fit_transform(df) | ||
columns_temp = df_temp.columns.get_level_values("feature").unique().drop("target") | ||
assert len(columns_temp) == 1 | ||
assert columns_temp[0] == column | ||
|
||
|
||
def test_column_names_out_column(example_df): | ||
"""Test that transform creates expected columns if `out_column` is set""" | ||
df = TSDataset.to_dataset(example_df) | ||
transform = FourierTransform(period=10, order=3, out_column="regressor_fourier") | ||
transformed_df = transform.fit_transform(df) | ||
columns = transformed_df.columns.get_level_values("feature").unique().drop("target") | ||
expected_columns = {f"regressor_fourier_{i}" for i in range(1, 7)} | ||
assert set(columns) == expected_columns | ||
|
||
|
||
@pytest.mark.parametrize("period, mod", [(24, 1), (24, 2), (24, 9), (24, 20), (24, 23), (7.5, 3), (7.5, 4)]) | ||
def test_column_values(example_df, period, mod): | ||
"""Test that transform generates correct values.""" | ||
df = TSDataset.to_dataset(example_df) | ||
transform = FourierTransform(period=period, mods=[mod], out_column="regressor_fourier") | ||
transformed_df = transform.fit_transform(df) | ||
for segment in example_df["segment"].unique(): | ||
transform_values = transformed_df.loc[:, pd.IndexSlice[segment, f"regressor_fourier_{mod}"]] | ||
|
||
timestamp = df.index | ||
freq = pd.Timedelta("1H") | ||
elapsed = (timestamp - timestamp[0]) / (period * freq) | ||
order = (mod + 1) // 2 | ||
if mod % 2 == 0: | ||
expected_values = np.cos(2 * np.pi * order * elapsed).values | ||
else: | ||
expected_values = np.sin(2 * np.pi * order * elapsed).values | ||
|
||
assert np.allclose(transform_values, expected_values, atol=1e-12) | ||
|
||
|
||
def test_forecast(ts_trend_seasonal): | ||
"""Test that transform works correctly in forecast.""" | ||
transform_1 = FourierTransform(period=7, order=3) | ||
transform_2 = FourierTransform(period=30.4, order=5) | ||
ts_train, ts_test = ts_trend_seasonal.train_test_split(test_size=10) | ||
ts_train.fit_transform(transforms=[transform_1, transform_2]) | ||
model = LinearPerSegmentModel() | ||
model.fit(ts_train) | ||
ts_future = ts_train.make_future(10) | ||
ts_forecast = model.forecast(ts_future) | ||
metric = R2("macro") | ||
r2 = metric(ts_test, ts_forecast) | ||
assert r2 > 0.95 |