AutoResearch · hollandjg · Aug 15, 2023 · Jul 18, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/docs/cycle/Basic Introduction to Functions and States.ipynb b/docs/cycle/Basic Introduction to Functions and States.ipynb
@@ -131,9 +131,9 @@
    "outputs": [],
    "source": [
     "from sklearn.linear_model import LinearRegression\n",
-    "from autora.state.wrapper import theorist_from_estimator\n",
+    "from autora.state.wrapper import state_fn_from_estimator\n",
     "\n",
-    "theorist = theorist_from_estimator(LinearRegression(fit_intercept=True))"
+    "theorist = state_fn_from_estimator(LinearRegression(fit_intercept=True))"
    ]
   },
   {

diff --git a/docs/cycle/Linear and Cyclical Workflows using Functions and States.ipynb b/docs/cycle/Linear and Cyclical Workflows using Functions and States.ipynb
@@ -268,7 +268,7 @@
     "### Defining The Theorist\n",
     "\n",
     "Now we define a theorist, which does a linear regression on the polynomial of degree 5. We define a regressor and a\n",
-    "method to return its feature names and coefficients, and then the theorist to handle it. Here, we use a different wrapper `theorist_from_estimator` that wraps the regressor and returns a function with the same functionality, but operating on `State` fields. In this case, we want to use the `State` field `experiment_data` and extend the `State` field `models`."
+    "method to return its feature names and coefficients, and then the theorist to handle it. Here, we use a different wrapper `state_fn_from_estimator` that wraps the regressor and returns a function with the same functionality, but operating on `State` fields. In this case, we want to use the `State` field `experiment_data` and extend the `State` field `models`."
    ]
   },
   {
@@ -278,13 +278,13 @@
    "outputs": [],
    "source": [
     "from sklearn.linear_model import LinearRegression\n",
-    "from autora.state.wrapper import theorist_from_estimator\n",
+    "from autora.state.wrapper import state_fn_from_estimator\n",
     "from sklearn.pipeline import make_pipeline as make_theorist_pipeline\n",
     "from sklearn.preprocessing import PolynomialFeatures\n",
     "\n",
     "# Completely standard scikit-learn pipeline regressor\n",
     "regressor = make_theorist_pipeline(PolynomialFeatures(degree=5), LinearRegression())\n",
-    "theorist = theorist_from_estimator(regressor)\n",
+    "theorist = state_fn_from_estimator(regressor)\n",
     "\n",
     "def get_equation(r):\n",
     "    t = r.named_steps['polynomialfeatures'].get_feature_names_out()\n",

diff --git a/src/autora/state/wrapper.py b/src/autora/state/wrapper.py
@@ -6,13 +6,11 @@
 """
 from __future__ import annotations
 
-from typing import Callable, Iterable, TypeVar
+from typing import Callable, TypeVar
 
-import numpy as np
 import pandas as pd
 from sklearn.base import BaseEstimator
 
-from autora.experimentalist.pipeline import Pipeline
 from autora.state.delta import Delta, State, wrap_to_use_state
 from autora.variable import VariableCollection
 
@@ -23,12 +21,33 @@
 Executor = Callable[[State], State]
 
 
-def theorist_from_estimator(estimator: BaseEstimator) -> Executor:
+def state_fn_from_estimator(estimator: BaseEstimator) -> Executor:
     """
     Convert a scikit-learn compatible estimator into a function on a `State` object.
 
     Supports passing additional `**kwargs` which are used to update the estimator's params
     before fitting.
+
+    Examples:
+        Initialize a function which operates on the state, `state_fn` and runs a LinearRegression.
+        >>> from sklearn.linear_model import LinearRegression
+        >>> state_fn = state_fn_from_estimator(LinearRegression())
+
+        Define the state on which to operate (here an instance of the `StandardState`):
+        >>> from autora.state.bundled import StandardState
+        >>> from autora.variable import Variable, VariableCollection
+        >>> import pandas as pd
+        >>> s = StandardState(
+        ...     variables=VariableCollection(
+        ...         independent_variables=[Variable("x")],
+        ...         dependent_variables=[Variable("y")]),
+        ...     experiment_data=pd.DataFrame({"x": [1,2,3], "y":[3,6,9]})
+        ... )
+
+        Run the function, which fits the model and adds the result to the `StandardState`
+        >>> state_fn(s).model.coef_
+        array([[3.]])
+
     """
 
     @wrap_to_use_state
@@ -44,9 +63,41 @@ def theorist(
     return theorist
 
 
-def experiment_runner_from_x_to_y_function(f: Callable[[X], Y]) -> Executor:
+def state_fn_from_x_to_y_fn_df(f: Callable[[X], Y]) -> Executor:
     """Wrapper for experiment_runner of the form $f(x) \rarrow y$, where `f` returns just the $y$
-    values"""
+    values, with inputs and outputs as a DataFrame or Series with correct column names.
+
+    Examples:
+        The conditions are some x-values in a StandardState object:
+        >>> from autora.state.bundled import StandardState
+        >>> s = StandardState(conditions=pd.DataFrame({"x": [1, 2, 3]}))
+
+        The function can be defined on a DataFrame (allowing the explicit inclusion of
+        metadata like column names).
+        >>> def x_to_y_fn(c: pd.DataFrame) -> pd.Series:
+        ...     result = pd.Series(2 * c["x"] + 1, name="y")
+        ...     return result
+
+        We apply the wrapped function to `s` and look at the returned experiment_data:
+        >>> state_fn_from_x_to_y_fn_df(x_to_y_fn)(s).experiment_data
+           x  y
+        0  1  3
+        1  2  5
+        2  3  7
+
+        We can also define functions of several variables:
+        >>> def xs_to_y_fn(c: pd.DataFrame) -> pd.Series:
+        ...     result = pd.Series(c["x0"] + c["x1"], name="y")
+        ...     return result
+
+        With the relevant variables as conditions:
+        >>> t = StandardState(conditions=pd.DataFrame({"x0": [1, 2, 3], "x1": [10, 20, 30]}))
+        >>> state_fn_from_x_to_y_fn_df(xs_to_y_fn)(t).experiment_data
+           x0  x1   y
+        0   1  10  11
+        1   2  20  22
+        2   3  30  33
+    """
 
     @wrap_to_use_state
     def experiment_runner(conditions: pd.DataFrame, **kwargs):
@@ -58,9 +109,42 @@ def experiment_runner(conditions: pd.DataFrame, **kwargs):
     return experiment_runner
 
 
-def experiment_runner_from_x_to_xy_function(f: Callable[[X], XY]) -> Executor:
+def state_fn_from_x_to_xy_fn_df(f: Callable[[X], XY]) -> Executor:
     """Wrapper for experiment_runner of the form $f(x) \rarrow (x,y)$, where `f`
-    returns both $x$ and $y$ values in a complete dataframe."""
+    returns both $x$ and $y$ values in a complete dataframe.
+
+    Examples:
+        The conditions are some x-values in a StandardState object:
+        >>> from autora.state.bundled import StandardState
+        >>> s = StandardState(conditions=pd.DataFrame({"x": [1, 2, 3]}))
+
+        The function can be defined on a DataFrame, allowing the explicit inclusion of
+        metadata like column names.
+        >>> def x_to_xy_fn(c: pd.DataFrame) -> pd.Series:
+        ...     result = c.assign(y=lambda df: 2 * df.x + 1)
+        ...     return result
+
+        We apply the wrapped function to `s` and look at the returned experiment_data:
+        >>> state_fn_from_x_to_xy_fn_df(x_to_xy_fn)(s).experiment_data
+           x  y
+        0  1  3
+        1  2  5
+        2  3  7
+
+        We can also define functions of several variables:
+        >>> def xs_to_xy_fn(c: pd.DataFrame) -> pd.Series:
+        ...     result = c.assign(y=c.x0 + c.x1)
+        ...     return result
+
+        With the relevant variables as conditions:
+        >>> t = StandardState(conditions=pd.DataFrame({"x0": [1, 2, 3], "x1": [10, 20, 30]}))
+        >>> state_fn_from_x_to_xy_fn_df(xs_to_xy_fn)(t).experiment_data
+           x0  x1   y
+        0   1  10  11
+        1   2  20  22
+        2   3  30  33
+
+    """
 
     @wrap_to_use_state
     def experiment_runner(conditions: pd.DataFrame, **kwargs):
@@ -69,21 +153,3 @@ def experiment_runner(conditions: pd.DataFrame, **kwargs):
         return Delta(experiment_data=experiment_data)
 
     return experiment_runner
-
-
-def experimentalist_from_pipeline(pipeline: Pipeline) -> Executor:
-    """Wrapper for experimentalists of the form $f() \rarrow x$, where `f`
-    returns both $x$ and $y$ values in a complete dataframe."""
-
-    @wrap_to_use_state
-    def experimentalist(params):
-        conditions = pipeline(**params)
-        if isinstance(conditions, (pd.DataFrame, np.ndarray, np.recarray)):
-            conditions_ = conditions
-        elif isinstance(conditions, Iterable):
-            conditions_ = np.array(list(conditions))
-        else:
-            raise NotImplementedError("type `%s` is not supported" % (type(conditions)))
-        return Delta(conditions=conditions_)
-
-    return experimentalist