Skip to content

Commit

Permalink
Fix TSDataset.info, TSDataset.describe methods (#519)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr-Geekman authored Feb 9, 2022
1 parent 8267bad commit e3ea0ad
Show file tree
Hide file tree
Showing 4 changed files with 903 additions and 110 deletions.
21 changes: 13 additions & 8 deletions etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,7 @@ def _gather_common_data(self) -> Dict[str, Any]:
"num_segments": len(self.segments),
"num_exogs": self.df.columns.get_level_values("feature").difference(["target"]).nunique(),
"num_regressors": len(self.regressors),
"num_known_future": len(self.known_future),
"freq": self.freq,
}

Expand Down Expand Up @@ -930,6 +931,7 @@ def describe(self, segments: Optional[Sequence[str]] = None) -> pd.DataFrame:
* num_segments: total number of segments, common for all segments
* num_exogs: number of exogenous features, common for all segments
* num_regressors: number of exogenous factors, that are regressors, common for all segments
* num_known_future: number of regressors, that are known since creation, common for all segments
* freq: frequency of the series, common for all segments
Parameters
Expand Down Expand Up @@ -960,12 +962,12 @@ def describe(self, segments: Optional[Sequence[str]] = None) -> pd.DataFrame:
... )
>>> df_exog = pd.concat([df_regressors_1, df_regressors_2], ignore_index=True)
>>> df_exog_ts_format = TSDataset.to_dataset(df_exog)
>>> ts = TSDataset(df_ts_format, df_exog=df_exog_ts_format, freq="D")
>>> ts.describe() # doctest: +SKIP
start_timestamp end_timestamp length num_missing num_segments num_exogs num_regressors freq
>>> ts = TSDataset(df_ts_format, df_exog=df_exog_ts_format, freq="D", known_future="all")
>>> ts.describe()
start_timestamp end_timestamp length num_missing num_segments num_exogs num_regressors num_known_future freq
segments
segment_0 2021-06-01 2021-06-30 30 0 2 1 1 D
segment_1 2021-06-01 2021-06-30 30 0 2 1 1 D
segment_0 2021-06-01 2021-06-30 30 0 2 1 1 1 D
segment_1 2021-06-01 2021-06-30 30 0 2 1 1 1 D
"""
if segments is None:
segments = self.segments
Expand All @@ -980,6 +982,7 @@ def describe(self, segments: Optional[Sequence[str]] = None) -> pd.DataFrame:
segments_dict["num_segments"] = [common_dict["num_segments"]] * len(segments)
segments_dict["num_exogs"] = [common_dict["num_exogs"]] * len(segments)
segments_dict["num_regressors"] = [common_dict["num_regressors"]] * len(segments)
segments_dict["num_known_future"] = [common_dict["num_known_future"]] * len(segments)
segments_dict["freq"] = [common_dict["freq"]] * len(segments)

result_df = pd.DataFrame(segments_dict, index=segments)
Expand All @@ -991,6 +994,7 @@ def describe(self, segments: Optional[Sequence[str]] = None) -> pd.DataFrame:
"num_segments",
"num_exogs",
"num_regressors",
"num_known_future",
"freq",
]
result_df = result_df[columns_order]
Expand All @@ -1006,6 +1010,7 @@ def info(self, segments: Optional[Sequence[str]] = None) -> None:
* num_segments: total number of segments
* num_exogs: number of exogenous features
* num_regressors: number of exogenous factors, that are regressors
* num_known_future: number of regressors, that are known since creation
* freq: frequency of the dataset
Information about individual segments:
Expand All @@ -1014,7 +1019,6 @@ def info(self, segments: Optional[Sequence[str]] = None) -> None:
* length: length according to start_timestamp and end_timestamp
* num_missing: number of missing variables between start_timestamp and end_timestamp
Parameters
----------
segments:
Expand All @@ -1037,12 +1041,13 @@ def info(self, segments: Optional[Sequence[str]] = None) -> None:
... )
>>> df_exog = pd.concat([df_regressors_1, df_regressors_2], ignore_index=True)
>>> df_exog_ts_format = TSDataset.to_dataset(df_exog)
>>> ts = TSDataset(df_ts_format, df_exog=df_exog_ts_format, freq="D")
>>> ts.info() # doctest: +SKIP
>>> ts = TSDataset(df_ts_format, df_exog=df_exog_ts_format, freq="D", known_future="all")
>>> ts.info()
<class 'etna.datasets.TSDataset'>
num_segments: 2
num_exogs: 1
num_regressors: 1
num_known_future: 1
freq: D
start_timestamp end_timestamp length num_missing
segments
Expand Down
Loading

0 comments on commit e3ea0ad

Please sign in to comment.