Skip to content

Commit

Permalink
Add FourierTransform (#306)
Browse files Browse the repository at this point in the history
* Add FourierTransform

* Update changelog

* Add notes section to explain some details
  • Loading branch information
Mr-Geekman authored Nov 30, 2021
1 parent 2fb5fc6 commit e495f57
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Spell checking for source code and md files ([#303](https://github.com/tinkoff-ai/etna/pull/303))
- ResampleWithDistributionTransform ([#296](https://github.com/tinkoff-ai/etna/pull/296))
- Add function to duplicate exogenous data ([#305](https://github.com/tinkoff-ai/etna/pull/305))
- FourierTransform ([#306](https://github.com/tinkoff-ai/etna/pull/306))

### Changed
- Rename confidence interval to prediction interval, start working with quantiles instead of interval_width ([#285](https://github.com/tinkoff-ai/etna/pull/285))
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from etna.transforms.detrend import TheilSenTrendTransform
from etna.transforms.feature_importance import TreeFeatureSelectionTransform
from etna.transforms.filter import FilterFeaturesTransform
from etna.transforms.fourier import FourierTransform
from etna.transforms.gale_shapley import GaleShapleyFeatureSelectionTransform
from etna.transforms.imputation import TimeSeriesImputerTransform
from etna.transforms.lags import LagTransform
Expand Down
135 changes: 135 additions & 0 deletions etna/transforms/fourier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import math
from typing import Optional
from typing import Sequence

import numpy as np
import pandas as pd

from etna.transforms.base import Transform


class FourierTransform(Transform):
"""Adds fourier features to the dataset."""

def __init__(
self,
period: float,
order: Optional[int] = None,
mods: Optional[Sequence[int]] = None,
out_column: Optional[str] = None,
):
"""Create instance of FourierTransform.
Parameters
----------
period:
the period of the seasonality to capture in frequency units of time series, it should be >= 2
order:
upper order of Fourier components to include, it should be >= 1 and <= ceil(period/2))
mods:
alternative and precise way of defining which harmonics will be used,
for example `mods=[1, 3, 4]` means that sin of the first order
and sin and cos of the second order will be used,
mods should be >= 1 and < period
out_column:
if set, name of added column, the final name will be '{out_columnt}_{mod}',
don't forget to add 'regressor_' prefix
if don't set, name will be 'regressor_{repr}', repr will represent class that creates exactly this column
Raises
------
ValueError:
if period < 2
ValueError:
if both or none of order, mods is set
ValueError:
if order is < 1 or > ceil(period/2)
ValueError:
if at least one mod is < 1 or >= period
Notes
-----
To understand how transform works we recommend: https://otexts.com/fpp2/useful-predictors.html#fourier-series
* Parameter `period` is responsible for the seasonality we want to capture.
* Parameters `order` and `mods` define which harmonics will be used.
Parameter `order` is a more user-friendly version of `mods`.
For example, `order=2` can be represented as `mods=[1, 2, 3, 4]` if `period` > 4 and
as `mods=[1, 2, 3]` if 3 <= `period` <= 4.
"""
if period < 2:
raise ValueError("Period should be at least 2")
self.period = period
self.mods: Sequence[int]

if order is not None and mods is None:
if order < 1 or order > math.ceil(period / 2):
raise ValueError("Order should be within [1, ceil(period/2)] range")
self.mods = [mod for mod in range(1, 2 * order + 1) if mod < period]
elif mods is not None and order is None:
if min(mods) < 1 or max(mods) >= period:
raise ValueError("Every mod should be within [1, int(period)) range")
self.mods = mods
else:
raise ValueError("There should be exactly one option set: order or mods")

self.out_column = out_column

def fit(self, df: pd.DataFrame) -> "FourierTransform":
"""Fit method does nothing and is kept for compatibility.
Parameters
----------
df:
dataframe with data.
Returns
-------
result: FourierTransform
"""
return self

def _get_column_name(self, mod: int) -> str:
if self.out_column is None:
return f"regressor_{FourierTransform(period=self.period, mods=[mod]).__repr__()}"
else:
return f"{self.out_column}_{mod}"

@staticmethod
def _construct_answer(df: pd.DataFrame, features: pd.DataFrame) -> pd.DataFrame:
dataframes = []
for seg in df.columns.get_level_values("segment").unique():
tmp = df[seg].join(features)
_idx = tmp.columns.to_frame()
_idx.insert(0, "segment", seg)
tmp.columns = pd.MultiIndex.from_frame(_idx)
dataframes.append(tmp)

result = pd.concat(dataframes, axis=1).sort_index(axis=1)
result.columns.names = ["segment", "feature"]
return result

def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add harmonics to the dataset.
Parameters
----------
df:
dataframe with data to transform.
Returns
-------
result: pd.Dataframe
transformed dataframe
"""
features = pd.DataFrame(index=df.index)
elapsed = np.arange(features.shape[0]) / self.period

for mod in self.mods:
order = (mod + 1) // 2
is_cos = mod % 2 == 0

features[self._get_column_name(mod)] = np.sin(2 * np.pi * order * elapsed + np.pi / 2 * is_cos)

return self._construct_answer(df, features)
136 changes: 136 additions & 0 deletions tests/test_transforms/test_fourier_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import numpy as np
import pandas as pd
import pytest

from etna.datasets import TSDataset
from etna.metrics import R2
from etna.models import LinearPerSegmentModel
from etna.transforms import FourierTransform


def add_seasonality(series: pd.Series, period: int, magnitude: float) -> pd.Series:
"""Add seasonality to given series."""
new_series = series.copy()
size = series.shape[0]
indices = np.arange(size)
new_series += np.sin(2 * np.pi * indices / period) * magnitude
return new_series


def get_one_df(period_1, period_2, magnitude_1, magnitude_2):
timestamp = pd.date_range(start="2020-01-01", end="2021-01-01", freq="D")
df = pd.DataFrame({"timestamp": timestamp})
target = 0
indices = np.arange(timestamp.shape[0])
target += np.sin(2 * np.pi * indices * 2 / period_1) * magnitude_1
target += np.cos(2 * np.pi * indices * 3 / period_2) * magnitude_2
target += np.random.normal(scale=0.05, size=timestamp.shape[0])
df["target"] = target
return df


@pytest.fixture
def ts_trend_seasonal(random_seed) -> TSDataset:
df_1 = get_one_df(period_1=7, period_2=30.4, magnitude_1=1, magnitude_2=1 / 2)
df_1["segment"] = "segment_1"
df_2 = get_one_df(period_1=7, period_2=30.4, magnitude_1=1 / 2, magnitude_2=1 / 5)
df_2["segment"] = "segment_2"
classic_df = pd.concat([df_1, df_2], ignore_index=True)
return TSDataset(TSDataset.to_dataset(classic_df), freq="D")


@pytest.mark.parametrize("period", [-1, 0, 1, 1.5])
def test_fail_period(period):
"""Test that transform is not created with wrong period."""
with pytest.raises(ValueError, match="Period should be at least 2"):
_ = FourierTransform(period=period, order=1)


@pytest.mark.parametrize("order", [0, 5])
def test_fail_order(order):
"""Test that transform is not created with wrong order."""
with pytest.raises(ValueError, match="Order should be within"):
_ = FourierTransform(period=7, order=order)


@pytest.mark.parametrize("mods", [[0], [0, 1, 2, 3], [1, 2, 3, 7], [7]])
def test_fail_mods(mods):
"""Test that transform is not created with wrong mods."""
with pytest.raises(ValueError, match="Every mod should be within"):
_ = FourierTransform(period=7, mods=mods)


def test_fail_set_none():
"""Test that transform is not created without order and mods."""
with pytest.raises(ValueError, match="There should be exactly one option set"):
_ = FourierTransform(period=7)


def test_fail_set_both():
"""Test that transform is not created with both order and mods set."""
with pytest.raises(ValueError, match="There should be exactly one option set"):
_ = FourierTransform(period=7, order=1, mods=[1, 2, 3])


@pytest.mark.parametrize(
"period, order, num_columns", [(6, 2, 4), (7, 2, 4), (6, 3, 5), (7, 3, 6), (5.5, 2, 4), (5.5, 3, 5)]
)
def test_column_names(example_df, period, order, num_columns):
"""Test that transform creates expected number of columns and they can be recreated by its name."""
df = TSDataset.to_dataset(example_df)
transform = FourierTransform(period=period, order=order)
transformed_df = transform.fit_transform(df)
columns = transformed_df.columns.get_level_values("feature").unique().drop("target")
assert len(columns) == num_columns
for column in columns:
transform_temp = eval(column[len("regressor_") :])
df_temp = transform_temp.fit_transform(df)
columns_temp = df_temp.columns.get_level_values("feature").unique().drop("target")
assert len(columns_temp) == 1
assert columns_temp[0] == column


def test_column_names_out_column(example_df):
"""Test that transform creates expected columns if `out_column` is set"""
df = TSDataset.to_dataset(example_df)
transform = FourierTransform(period=10, order=3, out_column="regressor_fourier")
transformed_df = transform.fit_transform(df)
columns = transformed_df.columns.get_level_values("feature").unique().drop("target")
expected_columns = {f"regressor_fourier_{i}" for i in range(1, 7)}
assert set(columns) == expected_columns


@pytest.mark.parametrize("period, mod", [(24, 1), (24, 2), (24, 9), (24, 20), (24, 23), (7.5, 3), (7.5, 4)])
def test_column_values(example_df, period, mod):
"""Test that transform generates correct values."""
df = TSDataset.to_dataset(example_df)
transform = FourierTransform(period=period, mods=[mod], out_column="regressor_fourier")
transformed_df = transform.fit_transform(df)
for segment in example_df["segment"].unique():
transform_values = transformed_df.loc[:, pd.IndexSlice[segment, f"regressor_fourier_{mod}"]]

timestamp = df.index
freq = pd.Timedelta("1H")
elapsed = (timestamp - timestamp[0]) / (period * freq)
order = (mod + 1) // 2
if mod % 2 == 0:
expected_values = np.cos(2 * np.pi * order * elapsed).values
else:
expected_values = np.sin(2 * np.pi * order * elapsed).values

assert np.allclose(transform_values, expected_values, atol=1e-12)


def test_forecast(ts_trend_seasonal):
"""Test that transform works correctly in forecast."""
transform_1 = FourierTransform(period=7, order=3)
transform_2 = FourierTransform(period=30.4, order=5)
ts_train, ts_test = ts_trend_seasonal.train_test_split(test_size=10)
ts_train.fit_transform(transforms=[transform_1, transform_2])
model = LinearPerSegmentModel()
model.fit(ts_train)
ts_future = ts_train.make_future(10)
ts_forecast = model.forecast(ts_future)
metric = R2("macro")
r2 = metric(ts_test, ts_forecast)
assert r2 > 0.95

0 comments on commit e495f57

Please sign in to comment.