Skip to content

Add option day_number_in_year to DateFlagsTransform #552

Merged
merged 9 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add plot_time_series_with_change_points function ([#534](https://github.com/tinkoff-ai/etna/pull/534))
-
- Add find_change_points function ([#521](https://github.com/tinkoff-ai/etna/pull/521))
-
- Add option day_number_in_year to DateFlagsTransform ([#552](https://github.com/tinkoff-ai/etna/pull/552))
- Add plot_residuals ([#539](https://github.com/tinkoff-ai/etna/pull/539))
-
- Create `PerSegmentBaseModel`, `PerSegmentPredictionIntervalModel` ([#537](https://github.com/tinkoff-ai/etna/pull/537))
Expand Down
35 changes: 30 additions & 5 deletions etna/transforms/timestamp/date_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
self,
day_number_in_week: Optional[bool] = True,
day_number_in_month: Optional[bool] = True,
day_number_in_year: Optional[bool] = False,
week_number_in_month: Optional[bool] = False,
week_number_in_year: Optional[bool] = False,
month_number_in_year: Optional[bool] = False,
Expand All @@ -34,6 +35,8 @@ def __init__(
if True, add column with weekday info to feature dataframe in transform
day_number_in_month:
if True, add column with day info to feature dataframe in transform
day_number_in_year:
if True, add column with number of day in a year with leap year numeration (values from 1 to 366)
week_number_in_month:
if True, add column with week number (in month context) to feature dataframe in transform
week_number_in_year:
Expand All @@ -52,8 +55,8 @@ def __init__(
with flag that shows given date is a special day
out_column:
base for the name of created columns;
if set the final name is '{out_column}_{feature_name}', don't forget to add 'regressor_' prefix;
if don't set, name will be 'regressor_{transform.__repr__()}',
if set the final name is '{out_column}_{feature_name}';
if don't set, name will be `transform.__repr__()`,
repr will be made for transform that creates exactly this column

Notes
Expand All @@ -76,6 +79,7 @@ def __init__(
[
day_number_in_week,
day_number_in_month,
day_number_in_year,
week_number_in_month,
week_number_in_year,
month_number_in_year,
Expand All @@ -87,13 +91,14 @@ def __init__(
):
raise ValueError(
f"{type(self).__name__} feature does nothing with given init args configuration, "
f"at least one of day_number_in_week, day_number_in_month, week_number_in_month, "
f"at least one of day_number_in_week, day_number_in_month, day_number_in_year, week_number_in_month, "
f"week_number_in_year, month_number_in_year, year_number, is_weekend should be True or any of "
f"special_days_in_week, special_days_in_month should be not empty."
)

self.day_number_in_week = day_number_in_week
self.day_number_in_month = day_number_in_month
self.day_number_in_year = day_number_in_year
self.week_number_in_month = week_number_in_month
self.week_number_in_year = week_number_in_year
self.month_number_in_year = month_number_in_year
Expand All @@ -109,6 +114,7 @@ def __init__(
self._empty_parameters = dict(
day_number_in_week=False,
day_number_in_month=False,
day_number_in_year=False,
week_number_in_month=False,
week_number_in_year=False,
month_number_in_year=False,
Expand All @@ -123,7 +129,7 @@ def _get_column_name(self, feature_name: str) -> str:
init_parameters = deepcopy(self._empty_parameters)
init_parameters[feature_name] = self.__dict__[feature_name]
temp_transform = DateFlagsTransform(**init_parameters, out_column=self.out_column) # type: ignore
return f"regressor_{temp_transform.__repr__()}"
return temp_transform.__repr__()
else:
return f"{self.out_column}_{feature_name}"

Expand Down Expand Up @@ -156,6 +162,11 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
timestamp_series=timestamp_series
)

if self.day_number_in_year:
features[self._get_column_name("day_number_in_year")] = self._get_day_number_in_year(
timestamp_series=timestamp_series
)

if self.week_number_in_month:
features[self._get_column_name("week_number_in_month")] = self._get_week_number_in_month(
timestamp_series=timestamp_series
Expand Down Expand Up @@ -228,11 +239,25 @@ def _get_day_number_in_month(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with the number of the day in the month."""
return timestamp_series.apply(lambda x: x.day).values

@staticmethod
def _get_day_number_in_year(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with number of day in a year with leap year numeration (values from 1 to 366)."""

def leap_year_number(dt: pd.Timestamp) -> int:
"""Return day number with leap year numeration."""
day_of_year = dt.dayofyear
if not dt.is_leap_year and dt.month >= 3:
return day_of_year + 1
else:
return day_of_year

return timestamp_series.apply(leap_year_number).values

@staticmethod
def _get_week_number_in_month(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with the week number in the month."""

def week_of_month(dt: pd.Timestamp) -> float:
def week_of_month(dt: pd.Timestamp) -> int:
"""Return week of month number.

How it works:
Expand Down
Loading