Skip to content

Commit

Permalink
Ready to deploy!
Browse files Browse the repository at this point in the history
  • Loading branch information
dunnkers committed Jan 25, 2022
1 parent 8615e65 commit 433180c
Show file tree
Hide file tree
Showing 19 changed files with 347 additions and 195 deletions.
6 changes: 5 additions & 1 deletion examples/quick-start-yaml/conf/my_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ defaults:
- override dataset: synthetic
- override validator: knn
- override /callbacks:
- to_sql
- to_csv

n_bootstraps: 1

callbacks:
to_csv:
dir: /Users/dunnkers/Downloads/results_dir
3 changes: 1 addition & 2 deletions fseval/callbacks/to_wandb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
from typing import Dict, Optional, cast

import pandas as pd
from omegaconf import DictConfig, OmegaConf

import wandb
from fseval.config.callbacks.to_wandb import ToWandbCallback
from fseval.types import Callback
from fseval.utils.dict_utils import dict_flatten, dict_merge
from omegaconf import DictConfig, OmegaConf


@dataclass
Expand Down
5 changes: 3 additions & 2 deletions fseval/conf/callbacks/to_wandb.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
wandb:
to_wandb:
_target_: fseval.callbacks.to_wandb.WandbCallback
job_type: ${hydra:job.name}
wandb_init_kwargs:
job_type: ${hydra:job.name}
3 changes: 1 addition & 2 deletions fseval/pipelines/_callback_collection.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from typing import Dict, Optional

import pandas as pd
from omegaconf import DictConfig

from fseval.types import Callback
from omegaconf import DictConfig


class CallbackCollection(Callback):
Expand Down
54 changes: 36 additions & 18 deletions tests/examples/test_quick_start.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import subprocess
import tempfile
from pathlib import Path

import pandas as pd
import pytest


def install_fseval():
Expand All @@ -8,33 +13,46 @@ def install_fseval():
)


def run_quick_start(cwd):
install_fseval()
stdout = subprocess.run(
def run_quick_start(cwd: str, db_url: str):
subprocess.run(
[
"python",
"benchmark.py",
"--multirun",
"ranker=glob(*)",
f"+callbacks.to_sql.url='{db_url}'",
],
# create database in temporary directory. then check results.
# "+callbacks.to_sql.url='sqlite:////Users/dunnkers/Downloads/results.sqlite'"
stdout=subprocess.PIPE,
cwd=cwd,
)

return stdout


def test_quick_start_yaml():
stdout = run_quick_start("./examples/quick-start-yaml/")

print("done")
# TODO check output


def test_basic_structured_config_example():
stdout = run_quick_start("./examples/quick-start-structured-configs/")
@pytest.mark.parametrize(
"cwd",
["./examples/quick-start-yaml/", "./examples/quick-start-structured-configs/"],
)
def test_quick_start(cwd: str):
install_fseval()

print("done")
# TODO check output
# run pipeline
db_dir = tempfile.mkdtemp()
db_file = Path(db_dir) / "results.sqlite"
db_url = f"sqlite:///{db_file}"
run_quick_start(cwd=cwd, db_url=db_url)

# validate scores are in database
validation_scores = pd.read_sql("validation_scores", con=db_url)
assert len(validation_scores) == 40
assert "index" in validation_scores.columns
assert "n_features_to_select" in validation_scores.columns
assert "fit_time" in validation_scores.columns
assert "score" in validation_scores.columns
assert "bootstrap_state" in validation_scores.columns

# validate experiment config
experiments = pd.read_sql("experiments", con=db_url)
assert len(experiments) == 2
assert experiments.iloc[0].ranker in ["ANOVA F-value", "Mutual Info"]
assert experiments.iloc[1].ranker in ["ANOVA F-value", "Mutual Info"]
assert experiments.iloc[0]["dataset/n"] == 10000
assert experiments.iloc[0]["dataset/p"] == 20
106 changes: 54 additions & 52 deletions tests/integration/pipelines/test_rank_and_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
import numpy as np
import pandas as pd
import pytest
from hydra.core.config_store import ConfigStore
from hydra.utils import instantiate
from omegaconf import DictConfig, open_dict
from sklearn.base import BaseEstimator

from fseval.config import (
CrossValidatorConfig,
DatasetConfig,
Expand All @@ -18,23 +13,40 @@
from fseval.pipeline.dataset import Dataset, DatasetLoader
from fseval.types import AbstractAdapter, IncompatibilityError, Task
from fseval.utils.hydra_utils import get_config
from hydra.core.config_store import ConfigStore
from hydra.utils import instantiate
from omegaconf import DictConfig, open_dict
from sklearn.base import BaseEstimator

cs = ConfigStore.instance()


class RandomEstimator(BaseEstimator):
def __init__(self, random_state=None):
def __init__(self, random_state=None, multi_dim=False):
self.random_state = random_state
self.multi_dim = multi_dim

def _get_random_state(self):
return np.random.RandomState(self.random_state)

def fit(self, X, y):
n, p = np.asarray(X).shape
self.n_features = p
self.feature_importances_ = self._get_random_state().rand(self.n_features)
self.support_ = self._get_random_state().rand(self.n_features)
self.ranking_ = self._get_random_state().rand(self.n_features)

# random generator
random_state: np.random.RandomState = self._get_random_state()

# feature support. is always 1-dimensional.
self.support_ = random_state.rand(self.n_features) > 0.5

# 1-dimensional
if not self.multi_dim:
self.feature_importances_ = random_state.rand(self.n_features)
self.ranking_ = random_state.rand(self.n_features)
# multi-dimensional
else:
self.feature_importances_ = random_state.rand(self.n_features, 3)
self.ranking_ = random_state.rand(self.n_features, 3)

def score(self, X, y, **kwargs) -> Union[Dict, pd.DataFrame, np.generic, None]:
return self._get_random_state().rand()
Expand Down Expand Up @@ -64,6 +76,14 @@ def get_data(self) -> Tuple[List, List]:
)
cs.store(name="random_ranker", node=ranker, group="ranker")

ranker_multi_dim: EstimatorConfig = ranker
ranker_multi_dim.estimator = {
"_target_": "tests.integration.pipelines.test_rank_and_validate.RandomEstimator",
"random_state": 0,
"multi_dim": True,
}
cs.store(name="random_ranker_multi_dim", node=ranker, group="ranker")

validator: EstimatorConfig = EstimatorConfig(
name="Random Validator",
task=Task.classification,
Expand Down Expand Up @@ -122,9 +142,7 @@ def cfg() -> PipelineConfig:
return cfg


def test_without_ranker_gt(cfg: PipelineConfig):
"""Test execution without dataset ground-truth."""

def run_pipeline___test_version(cfg: PipelineConfig):
# callback target. requires disabling omegaconf struct.
with open_dict(cast(DictConfig, cfg)):
cfg.callbacks[
Expand All @@ -147,31 +165,34 @@ def test_without_ranker_gt(cfg: PipelineConfig):
pipeline.score(X_test, y_test, feature_importances=dataset.feature_importances)


def test_without_ranker_gt(cfg: PipelineConfig):
"""Test execution without dataset ground-truth."""

run_pipeline___test_version(cfg)


def test_with_multi_dim_ranker():
cfg: PipelineConfig = get_config(
config_module="tests.integration.pipelines.conf",
config_name="my_test_config",
overrides=[
"dataset=some_dataset",
"cv=simple_shuffle_split",
"validator=random_validator",
"ranker=random_ranker_multi_dim",
"resample=default_resampling",
],
)

run_pipeline___test_version(cfg)


def test_with_ranker_gt(cfg: PipelineConfig):
"""Test execution with dataset ground-truth: a feature importances vector attached;
i.e. the relevance per feature, known apriori."""
cfg.dataset.feature_importances = {"X[:, :]": 1.0} # uniform

# callback target. requires disabling omegaconf struct.
with open_dict(cast(DictConfig, cfg)):
cfg.callbacks[
"_target_"
] = "fseval.pipelines._callback_collection.CallbackCollection"

# load dataset
dataset_loader: DatasetLoader = instantiate(cfg.dataset)
dataset: Dataset = dataset_loader.load()
cfg.dataset.n = dataset.n
cfg.dataset.p = dataset.p
cfg.dataset.multioutput = dataset.multioutput

# fit pipeline
pipeline = instantiate(cfg)
X_train, X_test, y_train, y_test = pipeline.cv.train_test_split(
dataset.X, dataset.y
)
pipeline.fit(X_train, y_train)
pipeline.score(X_test, y_test, feature_importances=dataset.feature_importances)
run_pipeline___test_version(cfg)


def test_with_ranker_gt_no_importances_substitution(cfg: PipelineConfig):
Expand All @@ -181,26 +202,7 @@ def test_with_ranker_gt_no_importances_substitution(cfg: PipelineConfig):
cfg.dataset.feature_importances = {"X[:, :]": 1.0} # uniform
cfg.ranker.estimates_feature_ranking = False

# callback target. requires disabling omegaconf struct.
with open_dict(cast(DictConfig, cfg)):
cfg.callbacks[
"_target_"
] = "fseval.pipelines._callback_collection.CallbackCollection"

# load dataset
dataset_loader: DatasetLoader = instantiate(cfg.dataset)
dataset: Dataset = dataset_loader.load()
cfg.dataset.n = dataset.n
cfg.dataset.p = dataset.p
cfg.dataset.multioutput = dataset.multioutput

# fit pipeline
pipeline = instantiate(cfg)
X_train, X_test, y_train, y_test = pipeline.cv.train_test_split(
dataset.X, dataset.y
)
pipeline.fit(X_train, y_train)
pipeline.score(X_test, y_test, feature_importances=dataset.feature_importances)
run_pipeline___test_version(cfg)


def test_validator_incompatibility_check(cfg: PipelineConfig):
Expand Down
Loading

0 comments on commit 433180c

Please sign in to comment.