Skip to content

Add option day_number_in_year to DateFlagsTransform #552

Merged
merged 9 commits into from
Feb 22, 2022
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add plot_time_series_with_change_points function ([#534](https://github.com/tinkoff-ai/etna/pull/534))
-
- Add find_change_points function ([#521](https://github.com/tinkoff-ai/etna/pull/521))
-
- Add option day_number_in_year to DateFlagsTransform ([#552](https://github.com/tinkoff-ai/etna/pull/552))
- Add plot_residuals ([#539](https://github.com/tinkoff-ai/etna/pull/539))
-
- Create `PerSegmentBaseModel`, `PerSegmentPredictionIntervalModel` ([#537](https://github.com/tinkoff-ai/etna/pull/537))
Expand Down
29 changes: 27 additions & 2 deletions etna/transforms/timestamp/date_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
self,
day_number_in_week: Optional[bool] = True,
day_number_in_month: Optional[bool] = True,
day_number_in_year: Optional[bool] = False,
week_number_in_month: Optional[bool] = False,
week_number_in_year: Optional[bool] = False,
month_number_in_year: Optional[bool] = False,
Expand All @@ -34,6 +35,8 @@ def __init__(
if True, add column with weekday info to feature dataframe in transform
day_number_in_month:
if True, add column with day info to feature dataframe in transform
day_number_in_year:
if True, add column with number of day in a year, leap year numeration, starts with 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May you rewrite it to make it clear

week_number_in_month:
if True, add column with week number (in month context) to feature dataframe in transform
week_number_in_year:
Expand Down Expand Up @@ -76,6 +79,7 @@ def __init__(
[
day_number_in_week,
day_number_in_month,
day_number_in_year,
week_number_in_month,
week_number_in_year,
month_number_in_year,
Expand All @@ -87,13 +91,14 @@ def __init__(
):
raise ValueError(
f"{type(self).__name__} feature does nothing with given init args configuration, "
f"at least one of day_number_in_week, day_number_in_month, week_number_in_month, "
f"at least one of day_number_in_week, day_number_in_month, day_number_in_year, week_number_in_month, "
f"week_number_in_year, month_number_in_year, year_number, is_weekend should be True or any of "
f"special_days_in_week, special_days_in_month should be not empty."
)

self.day_number_in_week = day_number_in_week
self.day_number_in_month = day_number_in_month
self.day_number_in_year = day_number_in_year
self.week_number_in_month = week_number_in_month
self.week_number_in_year = week_number_in_year
self.month_number_in_year = month_number_in_year
Expand All @@ -109,6 +114,7 @@ def __init__(
self._empty_parameters = dict(
day_number_in_week=False,
day_number_in_month=False,
day_number_in_year=False,
week_number_in_month=False,
week_number_in_year=False,
month_number_in_year=False,
Expand Down Expand Up @@ -156,6 +162,11 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
timestamp_series=timestamp_series
)

if self.day_number_in_year:
features[self._get_column_name("day_number_in_year")] = self._get_day_number_in_year(
timestamp_series=timestamp_series
)

if self.week_number_in_month:
features[self._get_column_name("week_number_in_month")] = self._get_week_number_in_month(
timestamp_series=timestamp_series
Expand Down Expand Up @@ -228,11 +239,25 @@ def _get_day_number_in_month(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with the number of the day in the month."""
return timestamp_series.apply(lambda x: x.day).values

@staticmethod
def _get_day_number_in_year(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with the number of the day in a year, lear year numeration, starts with 1."""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Generate an array with the number of the day in a year, lear year numeration, starts with 1."""
"""Generate an array with the numbers of the days in a year, leap year numeration, starts with 1."""

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Get the right variant from the constructor docstring


def leap_year_number(dt: pd.Timestamp) -> int:
"""Return day number with leap year numeration."""
day_of_year = dt.dayofyear
if not dt.is_leap_year and dt.month >= 3:
return day_of_year + 1
else:
return day_of_year

return timestamp_series.apply(leap_year_number).values

@staticmethod
def _get_week_number_in_month(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with the week number in the month."""

def week_of_month(dt: pd.Timestamp) -> float:
def week_of_month(dt: pd.Timestamp) -> int:
"""Return week of month number.

How it works:
Expand Down
17 changes: 14 additions & 3 deletions tests/test_transforms/test_timestamp/test_dateflags_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
INIT_PARAMS_TEMPLATE = {
"day_number_in_week": False,
"day_number_in_month": False,
"day_number_in_year": False,
"week_number_in_year": False,
"week_number_in_month": False,
"month_number_in_year": False,
Expand Down Expand Up @@ -44,6 +45,9 @@ def dateflags_true_df() -> pd.DataFrame:
df = dataframes[i]
df[f"regressor_{out_column}_day_number_in_week"] = df["timestamp"].dt.weekday
df[f"regressor_{out_column}_day_number_in_month"] = df["timestamp"].dt.day
df[f"regressor_{out_column}_day_number_in_year"] = df["timestamp"].apply(
lambda dt: dt.dayofyear + 1 if not dt.is_leap_year and dt.month >= 3 else dt.dayofyear
)
df[f"regressor_{out_column}_week_number_in_year"] = df["timestamp"].dt.week
df[f"regressor_{out_column}_month_number_in_year"] = df["timestamp"].dt.month
df[f"regressor_{out_column}_year_number"] = df["timestamp"].dt.year
Expand Down Expand Up @@ -93,6 +97,7 @@ def test_invalid_arguments_configuration():
_ = DateFlagsTransform(
day_number_in_month=False,
day_number_in_week=False,
day_number_in_year=False,
week_number_in_month=False,
week_number_in_year=False,
month_number_in_year=False,
Expand All @@ -109,6 +114,7 @@ def test_repr():
transform = DateFlagsTransform(
day_number_in_week=True,
day_number_in_month=True,
day_number_in_year=False,
week_number_in_month=False,
week_number_in_year=False,
month_number_in_year=True,
Expand All @@ -119,9 +125,9 @@ def test_repr():
)
transform_repr = transform.__repr__()
true_repr = (
f"{transform_class_repr}(day_number_in_week = True, day_number_in_month = True, week_number_in_month = False, "
f"week_number_in_year = False, month_number_in_year = True, year_number = True, is_weekend = True, special_days_in_week = (1, 2), "
f"special_days_in_month = (12,), out_column = None, )"
f"{transform_class_repr}(day_number_in_week = True, day_number_in_month = True, day_number_in_year = False, "
f"week_number_in_month = False, week_number_in_year = False, month_number_in_year = True, year_number = True, "
f"is_weekend = True, special_days_in_week = (1, 2), special_days_in_month = (12,), out_column = None, )"
)
assert transform_repr == true_repr

Expand All @@ -131,6 +137,7 @@ def test_repr():
(
["day_number_in_week"],
["day_number_in_month"],
["day_number_in_year"],
["week_number_in_year"],
["week_number_in_month"],
["month_number_in_year"],
Expand All @@ -139,6 +146,7 @@ def test_repr():
[
"day_number_in_week",
"day_number_in_month",
"day_number_in_year",
"week_number_in_year",
"week_number_in_month",
"month_number_in_year",
Expand Down Expand Up @@ -173,6 +181,7 @@ def test_interface_correct_args_out_column(true_params: List[str], train_df: pd.
(
["day_number_in_week"],
["day_number_in_month"],
["day_number_in_year"],
["week_number_in_year"],
["week_number_in_month"],
["month_number_in_year"],
Expand All @@ -181,6 +190,7 @@ def test_interface_correct_args_out_column(true_params: List[str], train_df: pd.
[
"day_number_in_week",
"day_number_in_month",
"day_number_in_year",
"week_number_in_year",
"week_number_in_month",
"month_number_in_year",
Expand Down Expand Up @@ -230,6 +240,7 @@ def test_interface_correct_args_repr(true_params: List[str], train_df: pd.DataFr
(
{"day_number_in_week": True},
{"day_number_in_month": True},
{"day_number_in_year": True},
{"week_number_in_year": True},
{"week_number_in_month": True},
{"month_number_in_year": True},
Expand Down