Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into 426-Use-registryfill-…
Browse files Browse the repository at this point in the history
…to-update-defaults-in-cfg-from-defaults-in-function-signatures
  • Loading branch information
MartinBernstorff committed Nov 15, 2023
2 parents 8c1b9b9 + 4f2faed commit a54d08c
Show file tree
Hide file tree
Showing 15 changed files with 39 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

def test_boostrap_estimates():
input_df = str_to_df(
"""id,y,y_hat,y_pred,
"""id,y,y_hat_prob,y_hat,
1,1,1.0,1,
1,1,1.0,1,
1,1,1.0,1,
Expand All @@ -26,7 +26,7 @@ def test_boostrap_estimates():
n_bootstraps=1,
ci_width=0.95,
input_1=input_df["y"],
input_2=input_df["y_hat"],
input_2=input_df["y_hat_prob"],
)

assert auroc_df_with_ci["ci"] == (1.0, 1.0)
Expand All @@ -36,7 +36,7 @@ def test_boostrap_estimates():
n_bootstraps=5,
ci_width=0.95,
input_1=input_df["y"],
input_2=input_df["y_pred"],
input_2=input_df["y_hat"],
)

assert sensitivity_df_with_ci["ci"] == (1.0, 1.0)
12 changes: 6 additions & 6 deletions psycop/common/model_training_v2/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
TerminalLogger,
)
from psycop.common.model_training_v2.trainer.base_dataloader import BaselineDataLoader
from psycop.common.model_training_v2.trainer.cross_validator import (
from psycop.common.model_training_v2.trainer.cross_validator_trainer import (
CrossValidatorTrainer,
)
from psycop.common.model_training_v2.trainer.preprocessing.pipeline import (
Expand All @@ -30,12 +30,12 @@
from psycop.common.model_training_v2.trainer.split_trainer import (
SplitTrainer,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_classification import (
BinaryClassification,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_classification_pipeline import (
BinaryClassificationPipeline,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_classification_task import (
BinaryClassificationTask,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_metrics import (
BinaryAUROC,
)
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_v2_train_model_pipeline(tmpdir: Path):
preprocessing_pipeline=BaselinePreprocessingPipeline(
AgeFilter(min_age=4, max_age=99, age_col_name="pred_age"),
),
task=BinaryClassification(
task=BinaryClassificationTask(
pred_time_uuid_col_name="pred_time_uuid",
task_pipe=BinaryClassificationPipeline(
sklearn_pipe=Pipeline([logistic_regression_step()]),
Expand Down Expand Up @@ -117,7 +117,7 @@ def test_v2_crossval_model_pipeline(tmpdir: Path):
preprocessing_pipeline=BaselinePreprocessingPipeline(
AgeFilter(min_age=4, max_age=99, age_col_name="pred_age"),
),
task=BinaryClassification(
task=BinaryClassificationTask(
pred_time_uuid_col_name="pred_time_uuid",
task_pipe=BinaryClassificationPipeline(
sklearn_pipe=Pipeline([logistic_regression_step()]),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from psycop.common.model_training_v2.trainer.preprocessing.pipeline import (
PreprocessingPipeline,
)
from psycop.common.model_training_v2.trainer.task.base_metric import BaseMetric
from psycop.common.model_training_v2.trainer.task.base_metric import BaselineMetric
from psycop.common.model_training_v2.trainer.task.base_task import BaselineTask


Expand All @@ -24,7 +24,7 @@ def __init__(
outcome_col_name: str,
preprocessing_pipeline: PreprocessingPipeline,
task: BaselineTask,
metric: BaseMetric,
metric: BaselineMetric,
logger: BaselineLogger,
n_splits: int = 5,
group_col_name: str = "dw_ek_borger",
Expand Down Expand Up @@ -97,12 +97,12 @@ def train(self) -> TrainingResult:

training_data_preprocessed.loc[
val_idxs,
"oof_y_hat_probs",
"oof_y_hat_prob",
] = oof_y_hat_prob.to_list() # type: ignore

main_metric = self.metric.calculate(
y=training_data_preprocessed[self.outcome_col_name],
y_hat_prob=training_data_preprocessed["oof_y_hat_probs"],
y_hat_prob=training_data_preprocessed["oof_y_hat_prob"],
)
self.logger.log_metric(main_metric)

Expand Down
4 changes: 2 additions & 2 deletions psycop/common/model_training_v2/trainer/split_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from psycop.common.model_training_v2.trainer.preprocessing.pipeline import (
PreprocessingPipeline,
)
from psycop.common.model_training_v2.trainer.task.base_metric import BaseMetric
from psycop.common.model_training_v2.trainer.task.base_metric import BaselineMetric
from psycop.common.model_training_v2.trainer.task.base_task import (
BaselineTask,
)
Expand All @@ -35,7 +35,7 @@ def __init__(
validation_outcome_col_name: str,
preprocessing_pipeline: PreprocessingPipeline,
task: BaselineTask,
metric: BaseMetric,
metric: BaselineMetric,
logger: BaselineLogger,
):
self.training_data = training_data.load()
Expand Down
4 changes: 2 additions & 2 deletions psycop/common/model_training_v2/trainer/task/base_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ class CalculatedMetric:
value: float


PredProbaSeries = pd.Series # name should be "y_hat_probs", series of floats
PredProbaSeries = pd.Series # name should be "y_hat_prob", series of floats


@runtime_checkable
class BaseMetric(Protocol):
class BaselineMetric(Protocol):
def calculate(
self,
y: pd.Series, # type: ignore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ def predict_proba(self, x: pd.DataFrame) -> PredProbaSeries:
"""Returns the predicted probabilities of the `1`
class"""
pred_probs = self.pipe.predict_proba(x)[:, 1]
return pd.Series(pred_probs, name="y_hat_probs")
return pd.Series(pred_probs, name="y_hat_prob")
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def polarsframe_to_series(polarsframe: PolarsFrame) -> pl.Series:


@BaselineRegistry.tasks.register("binary_classification")
class BinaryClassification(BaselineTask):
class BinaryClassificationTask(BaselineTask):
def __init__(
self,
task_pipe: BinaryClassificationPipeline,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import TYPE_CHECKING

from psycop.common.model_training_v2.trainer.task.base_metric import BaseMetric
from psycop.common.model_training_v2.trainer.task.base_metric import BaselineMetric

if TYPE_CHECKING:
import pandas as pd
Expand All @@ -13,7 +13,7 @@
)


class BinaryMetric(BaseMetric):
class BinaryMetric(BaselineMetric):
def calculate(
self,
y: pd.Series, # type: ignore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


@pytest.mark.parametrize(
("y_true", "y_pred", "expected"),
("y", "y_hat_prob", "expected"),
[
(
pd.Series([1, 1, 0, 0]),
Expand All @@ -30,7 +30,7 @@
),
],
)
def test_binary_auroc(y_true: pd.Series[int], y_pred: PredProbaSeries, expected: float):
def test_binary_auroc(y: pd.Series[int], y_hat_prob: PredProbaSeries, expected: float):
auroc = BinaryAUROC()
calculated_metric = auroc.calculate(y=y_true, y_hat_prob=y_pred)
calculated_metric = auroc.calculate(y=y, y_hat_prob=y_hat_prob)
assert calculated_metric.value == expected
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from pandas.testing import assert_series_equal
from sklearn.pipeline import Pipeline

from psycop.common.model_training_v2.trainer.task.binary_classification.binary_classification import (
BinaryClassification,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_classification_pipeline import (
BinaryClassificationPipeline,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_classification_task import (
BinaryClassificationTask,
)
from psycop.common.model_training_v2.trainer.task.binary_classification.binary_metrics.binary_auroc import (
BinaryAUROC,
)
Expand Down Expand Up @@ -38,7 +38,7 @@ def test_binary_classification(
y: pd.DataFrame,
main_metric_expected: float,
):
binary_classification_problem = BinaryClassification(
binary_classification_problem = BinaryClassificationTask(
task_pipe=pipe,
pred_time_uuid_col_name="uuid",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ def test_binary_classification_pipeline(
pipeline = BinaryClassificationPipeline(sklearn_pipe=pipe)
pipeline.fit(x=x, y=y)

y_hat_probs = pipeline.predict_proba(x=x)
assert isinstance(y_hat_probs, pd.Series)
assert y_hat_probs.name == "y_hat_probs"
y_hat_prob = pipeline.predict_proba(x=x)
assert isinstance(y_hat_prob, pd.Series)
assert y_hat_prob.name == "y_hat_prob"
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)


class MulticlassClassificationPipeline(Protocol):
class MultilabelClassificationPipeline(Protocol):
def __init__(self, steps: Sequence[ModelStep]) -> None:
...

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
from psycop.common.model_training_v2.trainer.task.base_task import (
BaselineTask,
)
from psycop.common.model_training_v2.trainer.task.multilabel_classification.multiclass_classification_pipeline import (
MulticlassClassificationPipeline,
from psycop.common.model_training_v2.trainer.task.multilabel_classification.multilabel_classification_pipeline import (
MultilabelClassificationPipeline,
)
from psycop.common.model_training_v2.trainer.task.multilabel_classification.multilabel_metrics.base import (
from psycop.common.model_training_v2.trainer.task.multilabel_classification.multilabel_metrics.base_multilabel_metric import (
MultilabelMetric,
)


class MultilabelClassification(BaselineTask):
class MultilabelClassificationTask(BaselineTask):
def __init__(
self,
pipe: MulticlassClassificationPipeline,
pipe: MultilabelClassificationPipeline,
main_metric: MultilabelMetric,
supplementary_metrics: Sequence[MultilabelMetric] | None = None,
):
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import pandas as pd

from psycop.common.model_training_v2.trainer.task.base_metric import (
BaseMetric,
BaselineMetric,
CalculatedMetric,
PredProbaSeries,
)


class MultilabelMetric(BaseMetric):
class MultilabelMetric(BaselineMetric):
def calculate(
self,
y: pd.Series, # type: ignore
Expand Down

0 comments on commit a54d08c

Please sign in to comment.