Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: make names of wrapper functions more generic #37

4 changes: 2 additions & 2 deletions docs/cycle/Basic Introduction to Functions and States.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"from autora.state.wrapper import theorist_from_estimator\n",
"from autora.state.wrapper import state_fn_from_estimator\n",
"\n",
"theorist = theorist_from_estimator(LinearRegression(fit_intercept=True))"
"theorist = state_fn_from_estimator(LinearRegression(fit_intercept=True))"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@
"### Defining The Theorist\n",
"\n",
"Now we define a theorist, which does a linear regression on the polynomial of degree 5. We define a regressor and a\n",
"method to return its feature names and coefficients, and then the theorist to handle it. Here, we use a different wrapper `theorist_from_estimator` that wraps the regressor and returns a function with the same functionality, but operating on `State` fields. In this case, we want to use the `State` field `experiment_data` and extend the `State` field `models`."
"method to return its feature names and coefficients, and then the theorist to handle it. Here, we use a different wrapper `state_fn_from_estimator` that wraps the regressor and returns a function with the same functionality, but operating on `State` fields. In this case, we want to use the `State` field `experiment_data` and extend the `State` field `models`."
]
},
{
Expand All @@ -278,13 +278,13 @@
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"from autora.state.wrapper import theorist_from_estimator\n",
"from autora.state.wrapper import state_fn_from_estimator\n",
"from sklearn.pipeline import make_pipeline as make_theorist_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"\n",
"# Completely standard scikit-learn pipeline regressor\n",
"regressor = make_theorist_pipeline(PolynomialFeatures(degree=5), LinearRegression())\n",
"theorist = theorist_from_estimator(regressor)\n",
"theorist = state_fn_from_estimator(regressor)\n",
"\n",
"def get_equation(r):\n",
" t = r.named_steps['polynomialfeatures'].get_feature_names_out()\n",
Expand Down
118 changes: 92 additions & 26 deletions src/autora/state/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
"""
from __future__ import annotations

from typing import Callable, Iterable, TypeVar
from typing import Callable, TypeVar

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator

from autora.experimentalist.pipeline import Pipeline
from autora.state.delta import Delta, State, wrap_to_use_state
from autora.variable import VariableCollection

Expand All @@ -23,12 +21,33 @@
Executor = Callable[[State], State]


def theorist_from_estimator(estimator: BaseEstimator) -> Executor:
def state_fn_from_estimator(estimator: BaseEstimator) -> Executor:
"""
Convert a scikit-learn compatible estimator into a function on a `State` object.

Supports passing additional `**kwargs` which are used to update the estimator's params
before fitting.

Examples:
Initialize a function which operates on the state, `state_fn` and runs a LinearRegression.
>>> from sklearn.linear_model import LinearRegression
>>> state_fn = state_fn_from_estimator(LinearRegression())

Define the state on which to operate (here an instance of the `StandardState`):
>>> from autora.state.bundled import StandardState
>>> from autora.variable import Variable, VariableCollection
>>> import pandas as pd
>>> s = StandardState(
... variables=VariableCollection(
... independent_variables=[Variable("x")],
... dependent_variables=[Variable("y")]),
... experiment_data=pd.DataFrame({"x": [1,2,3], "y":[3,6,9]})
... )

Run the function, which fits the model and adds the result to the `StandardState`
>>> state_fn(s).model.coef_
array([[3.]])

"""

@wrap_to_use_state
Expand All @@ -44,9 +63,41 @@ def theorist(
return theorist


def experiment_runner_from_x_to_y_function(f: Callable[[X], Y]) -> Executor:
def state_fn_from_x_to_y_fn_df(f: Callable[[X], Y]) -> Executor:
"""Wrapper for experiment_runner of the form $f(x) \rarrow y$, where `f` returns just the $y$
values"""
values, with inputs and outputs as a DataFrame or Series with correct column names.

Examples:
The conditions are some x-values in a StandardState object:
>>> from autora.state.bundled import StandardState
>>> s = StandardState(conditions=pd.DataFrame({"x": [1, 2, 3]}))

The function can be defined on a DataFrame (allowing the explicit inclusion of
metadata like column names).
>>> def x_to_y_fn(c: pd.DataFrame) -> pd.Series:
... result = pd.Series(2 * c["x"] + 1, name="y")
... return result

We apply the wrapped function to `s` and look at the returned experiment_data:
>>> state_fn_from_x_to_y_fn_df(x_to_y_fn)(s).experiment_data
x y
0 1 3
1 2 5
2 3 7

We can also define functions of several variables:
>>> def xs_to_y_fn(c: pd.DataFrame) -> pd.Series:
... result = pd.Series(c["x0"] + c["x1"], name="y")
... return result

With the relevant variables as conditions:
>>> t = StandardState(conditions=pd.DataFrame({"x0": [1, 2, 3], "x1": [10, 20, 30]}))
>>> state_fn_from_x_to_y_fn_df(xs_to_y_fn)(t).experiment_data
x0 x1 y
0 1 10 11
1 2 20 22
2 3 30 33
"""

@wrap_to_use_state
def experiment_runner(conditions: pd.DataFrame, **kwargs):
Expand All @@ -58,9 +109,42 @@ def experiment_runner(conditions: pd.DataFrame, **kwargs):
return experiment_runner


def experiment_runner_from_x_to_xy_function(f: Callable[[X], XY]) -> Executor:
def state_fn_from_x_to_xy_fn_df(f: Callable[[X], XY]) -> Executor:
"""Wrapper for experiment_runner of the form $f(x) \rarrow (x,y)$, where `f`
returns both $x$ and $y$ values in a complete dataframe."""
returns both $x$ and $y$ values in a complete dataframe.

Examples:
The conditions are some x-values in a StandardState object:
>>> from autora.state.bundled import StandardState
>>> s = StandardState(conditions=pd.DataFrame({"x": [1, 2, 3]}))

The function can be defined on a DataFrame, allowing the explicit inclusion of
metadata like column names.
>>> def x_to_xy_fn(c: pd.DataFrame) -> pd.Series:
... result = c.assign(y=lambda df: 2 * df.x + 1)
... return result

We apply the wrapped function to `s` and look at the returned experiment_data:
>>> state_fn_from_x_to_xy_fn_df(x_to_xy_fn)(s).experiment_data
x y
0 1 3
1 2 5
2 3 7

We can also define functions of several variables:
>>> def xs_to_xy_fn(c: pd.DataFrame) -> pd.Series:
... result = c.assign(y=c.x0 + c.x1)
... return result

With the relevant variables as conditions:
>>> t = StandardState(conditions=pd.DataFrame({"x0": [1, 2, 3], "x1": [10, 20, 30]}))
>>> state_fn_from_x_to_xy_fn_df(xs_to_xy_fn)(t).experiment_data
x0 x1 y
0 1 10 11
1 2 20 22
2 3 30 33

"""

@wrap_to_use_state
def experiment_runner(conditions: pd.DataFrame, **kwargs):
Expand All @@ -69,21 +153,3 @@ def experiment_runner(conditions: pd.DataFrame, **kwargs):
return Delta(experiment_data=experiment_data)

return experiment_runner


def experimentalist_from_pipeline(pipeline: Pipeline) -> Executor:
"""Wrapper for experimentalists of the form $f() \rarrow x$, where `f`
returns both $x$ and $y$ values in a complete dataframe."""

@wrap_to_use_state
def experimentalist(params):
conditions = pipeline(**params)
if isinstance(conditions, (pd.DataFrame, np.ndarray, np.recarray)):
conditions_ = conditions
elif isinstance(conditions, Iterable):
conditions_ = np.array(list(conditions))
else:
raise NotImplementedError("type `%s` is not supported" % (type(conditions)))
return Delta(conditions=conditions_)

return experimentalist