Skip to content

Commit

Permalink
Also score support and ranking 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
dunnkers committed Jun 7, 2021
1 parent 07b62b7 commit 9ee15a5
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 32 deletions.
6 changes: 4 additions & 2 deletions fseval/pipelines/_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
from typing import List

import pandas as pd
from humanfriendly import format_timespan

from fseval.pipeline.estimator import Estimator
from fseval.types import AbstractEstimator, TerminalColor
from humanfriendly import format_timespan


@dataclass
Expand All @@ -31,6 +30,9 @@ def _logger(self, estimator):
return lambda text: getLogger(type(estimator).__name__).info(text)

def _step_text(self, step_name, step_number, estimator):
"""Provides a console logging string for logging during an experiment phase,
like in `fit` or `score`. Adds coloring and fit times to stdout."""

# step text variables
step = step_number + 1
n_steps = len(self.estimators)
Expand Down
111 changes: 88 additions & 23 deletions fseval/pipelines/rank_and_validate/_ranking_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@

import numpy as np
import pandas as pd
from omegaconf import MISSING
from sklearn.metrics import log_loss, r2_score

from fseval.types import IncompatibilityError
from omegaconf import MISSING
from sklearn.metrics import accuracy_score, log_loss, r2_score

from .._experiment import Experiment
from ._config import RankAndValidatePipeline
Expand Down Expand Up @@ -46,39 +45,105 @@ def fit(self, X, y):
super(RankingValidator, self).fit(X, y)
self.storage_provider.save_pickle(filename, self.ranker.estimator)

def score(self, X, y):
"""Scores a feature ranker, if a ground-truth on the desired dataset
feature importances is available. If this is the case, the estimated normalized
feature importances are compared to the desired ones using two metrics:
log loss and the R^2 score. Whilst the log loss converts the ground-truth
desired feature rankings to a binary value, 0/1, the R^2 score always works."""

score = {
"fit_time": self.ranker.fit_time_,
"bootstrap_state": self.bootstrap_state,
}
def _scores_to_ranking(self, scores):
"""Converts a scoring vector to a ranking vector, or standardizes an existing
feature ranking vector. e.g.:
```
[0.8, 0.1, 0.9, 0.0]
```
is converted to
```
[3, 2, 4, 1]
```
"""

_, inverse, counts = np.unique(scores, return_inverse=True, return_counts=True)
ranking_inverse = np.zeros_like(counts)
ranking_inverse[1:] = counts[:-1].cumsum()
ranking = ranking_inverse[inverse] + 1

return ranking

def _score_with_feature_importances(self, score):
"""Scores this feature ranker with the available dataset ground-truth relevant
features, which are to be known apriori. Supports three types of feature rankings:
- a real-valued feature importance vector
- a boolean-valued feature support vector
- an integer-valued feature ranking vector."""

X_importances = self.dataset.feature_importances
if X_importances is not None and self.ranker.estimates_feature_importances:
assert np.ndim(X_importances) == 1, "instance-based not supported yet."

# predicted feature importances: normalized ranker scores.
### Feature importances
if self.ranker.estimates_feature_importances:
# predicted feature importances, normalized.
y_pred = np.asarray(self.ranker.feature_importances_)
y_pred = y_pred / sum(y_pred)

# r2 score
y_true = X_importances
score["r2_score"] = r2_score(y_true, y_pred)
score["importance.r2_score"] = r2_score(y_true, y_pred)

# log loss
y_true = X_importances > 0
score["log_loss"] = log_loss(y_true, y_pred, labels=[0, 1])
score["importance.log_loss"] = log_loss(y_true, y_pred, labels=[0, 1])

### Feature support
if self.ranker.estimates_feature_support:
# predicted feature support
y_pred = np.asarray(self.ranker.feature_support_, dtype=bool)

# accuracy
y_true = X_importances > 0
score["support.accuracy"] = accuracy_score(y_true, y_pred)

### Feature ranking
# grab ranking through either (1) `ranking_` or (2) `feature_importances_`
ranking = None
if self.ranker.estimates_feature_ranking:
ranking = self.ranker.feature_ranking_
elif self.ranker.estimates_feature_importances:
ranking = self.ranker.feature_importances_

# compute ranking r2 score
if ranking is not None:
# predicted feature ranking, re-ordered and normalized.
y_pred = self._scores_to_ranking(ranking)
y_pred = y_pred / sum(y_pred)

# convert ground-truth to a ranking as well.
y_true = self._scores_to_ranking(X_importances)
y_true = y_true / sum(y_true)

# in r2 score, only consider **relevant** features, not irrelevant ones. in
# this way, when `X_importances = [0, 2, 4, 0, 0]` we do not get misleadingly
# high scores because the ranking also
sample_weight = np.ones_like(X_importances)
sample_weight[X_importances == 0] = 0.0

# r2 score
score["ranking.r2_score"] = r2_score(
y_true, y_pred, sample_weight=sample_weight
)

def score(self, X, y):
"""Scores a feature ranker, if a ground-truth on the desired dataset
feature importances is available. If this is the case, the estimated normalized
feature importances are compared to the desired ones using two metrics:
log loss and the R^2 score. Whilst the log loss converts the ground-truth
desired feature rankings to a binary value, 0/1, the R^2 score always works."""

score = {
"fit_time": self.ranker.fit_time_,
"bootstrap_state": self.bootstrap_state,
}

if X_importances is not None and self.ranker.estimates_feature_support:
...
if self.dataset.feature_importances is not None:
assert (
np.ndim(self.dataset.feature_importances) == 1
), "instance-based not supported yet."

if X_importances is not None and self.ranker.estimates_feature_ranking:
...
self._score_with_feature_importances(score)

# put a in a dataframe so can be easily merged with other pipeline scores
scores = pd.DataFrame([score])
Expand Down
100 changes: 93 additions & 7 deletions tests/integration/pipelines/test_rank_and_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,96 @@
from fseval.pipelines._callback_collection import CallbackCollection
from fseval.pipelines.rank_and_validate import RankAndValidateConfig
from fseval.storage_providers.mock import MockStorageProvider
from fseval.types import AbstractStorageProvider, Callback, IncompatibilityError, Task
from fseval.types import (
AbstractEstimator,
AbstractStorageProvider,
Callback,
IncompatibilityError,
Task,
)
from hydra.utils import instantiate
from omegaconf import OmegaConf
from sklearn.model_selection import ShuffleSplit


class MockRanker(AbstractEstimator):
def __init__(self, random_state=None):
self.random_state = random_state

def _get_random_state(self):
return np.random.RandomState(self.random_state)

def fit(self, X, y):
n, p = np.asarray(X).shape
self.n_features = p

def transform(self, X, y):
...

def fit_transform(self, X, y):
...

def score(self, X, y):
return self._get_random_state().rand()

@property
def feature_importances_(self):
return self._get_random_state().rand(self.n_features)

@property
def support_(self):
return self._get_random_state().rand(self.n_features)

@property
def ranking_(self):
return self._get_random_state().rand(self.n_features)


@pytest.fixture
def pipeline_cfg():
estimator = dict(_target_="sklearn.tree.DecisionTreeClassifier", random_state=0)
def classifier():
estimator = dict(
_target_="tests.integration.pipelines.test_rank_and_validate.MockRanker",
random_state=0,
)
classifier: EstimatorConfig = EstimatorConfig(estimator=estimator)

resample: ResampleConfig = ResampleConfig(name="shuffle")
return classifier


@pytest.fixture
def ranker(classifier):
ranker: TaskedEstimatorConfig = TaskedEstimatorConfig(
name="Decision Tree",
task=Task.classification,
classifier=classifier,
is_multioutput_dataset=False,
estimates_feature_importances=True,
estimates_feature_support=True,
estimates_feature_ranking=True,
)
return ranker


@pytest.fixture
def validator(classifier):
validator: TaskedEstimatorConfig = TaskedEstimatorConfig(
name="Decision Tree",
task=Task.classification,
classifier=classifier,
is_multioutput_dataset=False,
estimates_target=True,
)
return validator


@pytest.fixture
def resample():
resample: ResampleConfig = ResampleConfig(name="shuffle")
return resample


@pytest.fixture
def pipeline_cfg(classifier, ranker, validator, resample):
n_bootstraps: int = 2

config = RankAndValidateConfig(
Expand Down Expand Up @@ -103,7 +167,7 @@ def test_without_ranker_gt(
score = pipeline.score(X_test, y_test)

assert score["best"]["validator"]["fit_time"] > 0
assert score["best"]["validator"]["score"] == 1.0
assert score["best"]["validator"]["score"] >= 0.0


def test_with_ranker_gt(pipeline_cfg, callbacks, dataset_with_gt, cv, storage_provider):
Expand All @@ -117,8 +181,30 @@ def test_with_ranker_gt(pipeline_cfg, callbacks, dataset_with_gt, cv, storage_pr
assert score["best"]["validator"]["fit_time"] > 0
assert score["best"]["ranker"]["fit_time"] > 0

assert score["best"]["ranker"]["r2_score"] <= 1.0
assert score["best"]["validator"]["score"] == 1.0
assert score["best"]["ranker"]["importance.r2_score"] <= 1.0
assert score["best"]["ranker"]["importance.log_loss"] >= 0
assert score["best"]["ranker"]["support.accuracy"] >= 0.0
assert score["best"]["ranker"]["support.accuracy"] <= 1.0
assert score["best"]["ranker"]["ranking.r2_score"] <= 1.0
assert score["best"]["validator"]["score"] >= 0.0


def test_with_ranker_gt_no_importances_substitution(
pipeline_cfg, callbacks, dataset_with_gt, cv, storage_provider
):
"""When no `feature_ranking` available, `feature_importances` should substitute
for the ranking."""

dataset = dataset_with_gt
pipeline_cfg.ranker.estimates_feature_ranking = False

pipeline = instantiate(pipeline_cfg, callbacks, dataset, cv, storage_provider)
X_train, X_test, y_train, y_test = cv.train_test_split(dataset.X, dataset.y)
pipeline.fit(X_train, y_train)
score = pipeline.score(X_test, y_test)

assert score["best"]["ranker"]["ranking.r2_score"] <= 1.0
assert score["best"]["validator"]["score"] >= 0.0


def test_validator_incompatibility_check(
Expand Down

0 comments on commit 9ee15a5

Please sign in to comment.