Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "[WIP] CV model selection" #180

Merged
merged 1 commit into from
May 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
881 changes: 0 additions & 881 deletions examples/CV-drafts.ipynb

This file was deleted.

5 changes: 1 addition & 4 deletions gtime/model_selection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

from .horizon_shift import horizon_shift
from .splitters import FeatureSplitter
from .cv_pipeline import CVPipeline
from .cross_validation import time_series_split, blocking_time_series_split

__all__ = ["FeatureSplitter", "horizon_shift",
"CVPipeline",
"time_series_split", "blocking_time_series_split"]
__all__ = ["FeatureSplitter", "horizon_shift"]
2 changes: 0 additions & 2 deletions gtime/model_selection/cross_validation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from typing import Union


def _time_series_split_on_index(time_series, n_samples, n_splits):
Expand Down
197 changes: 0 additions & 197 deletions gtime/model_selection/cv_pipeline.py

This file was deleted.

80 changes: 0 additions & 80 deletions gtime/model_selection/tests/test_cv_pipeline.py

This file was deleted.

46 changes: 5 additions & 41 deletions gtime/time_series_models/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from typing import List, Tuple, Union

import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.linear_model import LinearRegression
from sklearn.utils.validation import check_is_fitted

from gtime.compose import FeatureCreation
from gtime.model_selection import horizon_shift, FeatureSplitter
from gtime.metrics import rmse


class TimeSeriesForecastingModel(BaseEstimator, RegressorMixin):
""" Base class for a generic time series forecasting model.
Expand Down Expand Up @@ -88,12 +87,12 @@ def fit(self, X: pd.DataFrame, y: pd.DataFrame = None, only_model: bool = False)
self
"""
if not only_model:
X_train, y_train, X_test, y_test = self._compute_train_test_matrices(X, y)
X_train, y_train, X_test = self._compute_train_test_matrices(X, y)
elif not self.cache_features:
raise AttributeError("cache_feature must be True to fit only model")
else:
check_is_fitted(self) # only_model works if the model is already fitted
X_train, y_train, X_test, y_test = self.X_train_, self.y_train_, self.X_test_, self.y_test_
X_train, y_train, X_test = self.X_train_, self.y_train_, self.X_test_

self.model_ = self._fit_model(X_train, y_train)
self.X_test_ = X_test
Expand Down Expand Up @@ -127,15 +126,14 @@ def set_params(self, **params):

def _compute_train_test_matrices(
self, X: pd.DataFrame, y: pd.DataFrame
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
X, y = self._create_X_y_feature_matrices(X, y)
X_train, y_train, X_test, y_test = self._split_train_test(X, y)
if self.cache_features:
self.X_train_ = X_train
self.y_train_ = y_train
self.X_test_ = X_test
self.y_test_ = y_test
return X_train, y_train, X_test, y_test
return X_train, y_train, X_test

def _create_X_y_feature_matrices(
self, X, y=None
Expand All @@ -159,37 +157,3 @@ def _reset(self):
]
for attribute in attributes:
delattr(self, attribute)

def score(self, X=None, y=None, metrics=None):

check_is_fitted(self)
if X is None:
y_pred_test = self.predict()
else:
y_pred_test = self.predict(X)
y_pred_train = self.model_.predict(self.X_train_)

y_test = self.y_test_ if y is None else y

if metrics is None:
metrics = {'rmse': rmse}
score = pd.DataFrame(columns=metrics.keys(), index=['Train score', 'Test score'])
score.loc[['Train score'], :] = self._score(self.y_train_, y_pred_train, metrics=metrics, type='Train score')
score.loc[['Test score'], :] = self._score(y_test, y_pred_test, metrics=metrics, type='Test score')
return score.T

def _score(self, y, y_pred, metrics=None, type='Test'):
score = pd.DataFrame(columns=metrics.keys(), index=[type])
for name, metric in metrics.items():
scores = []
for col in y.columns:
na_mask = ~y[col].isna()
y_pred_i = y_pred[col][na_mask]
y_true_i = y[col][na_mask]
if len(y_true_i) > 0:
scores.append(metric(y_true_i, y_pred_i))
score[name] = np.mean(scores)
return score

def set_model(self, model):
self.model = model