From c10cf6991d5e4dadcd842beccdc86fa4b25e5f85 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 09:02:27 -0400 Subject: [PATCH 1/9] feat: add fundamental controller code in a base class --- autora/controller/base.py | 108 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 autora/controller/base.py diff --git a/autora/controller/base.py b/autora/controller/base.py new file mode 100644 index 000000000..cd24ecfcb --- /dev/null +++ b/autora/controller/base.py @@ -0,0 +1,108 @@ +""" The cycle controller for AER. """ +from __future__ import annotations + +import logging +from typing import Callable, Mapping, Optional, TypeVar, Union + +from autora.controller.protocol import ( + SupportsControllerState, + SupportsControllerStateHistory, +) + +_logger = logging.getLogger(__name__) + + +State = TypeVar( + "State", bound=Union[SupportsControllerState, SupportsControllerStateHistory] +) +ExecutorName = TypeVar("ExecutorName", bound=str) + + +class BaseController: + """ + Runs an experimentalist, theorist and experiment runner in a loop. + + Once initialized, the `controller` can be started by calling `next(controller)` or using the + `controller.run` method. + + Attributes: + state (CycleState or CycleStateHistory): an object which is updated during the cycle and + has the following properties: + + - `metadata` (VariableCollection) + - `params` (dict): a nested dictionary with parameters for the cycle parts. + `{ + "experimentalist": {}, + "theorist": {}, + "experiment_runner": {} + }` + - `conditions`: a list of ArrayLike objects representing all the IVs proposed by the + experimentalist + - `observations`: a list of ArrayLike objects representing all the IVs and DVs + returned by the experiment runner + - `theories`: a list of all the fitted theories (scikit-learn compatible estimators) + - `history`: (only when using CycleStateHistory) a sequential list of all the above. + + executor_collection (FullCycleExecutorCollection, OnlineExecutorCollection): an + object with interfaces for running the theorist, experimentalist and + experiment_runner. This must be compatible with the `state`. + + planner (Callable): a function which takes the `state` as input and returns one of the + `executor_collection` methods. This must be compatible with both the `state` and + the `executor_collection`. + + monitor (Callable): a function which takes the controller as input and is called at + the end of each step. + + """ + + def __init__( + self, + state: State, + planner: Callable[[State], ExecutorName], + executor_collection: Mapping[ExecutorName, Callable[[State], State]], + monitor: Optional[Callable[[State], None]] = None, + ): + """ + Args: + state: a fully instantiated controller state object compatible with the planner, + executor_collection and monitor + planner: a function which maps from the state to the next ExecutorName + executor_collection: a mapping from the ExecutorName to a callable which can operate + on the state and return an updated state + monitor: a function which takes the state object as input + """ + + self.state = state + self.planner = planner + self.executor_collection = executor_collection + self.monitor = monitor + + def run(self, num_steps: int = 1): + """Execute the next step in the cycle.""" + for i in range(num_steps): + next(self) + return self + + def __next__(self): + + # Plan + next_function_name = self.planner(self.state, self.executor_collection) + + # Map + next_function = self.executor_collection[next_function_name] + + # Execute + result = next_function(self.state) + + # Update + self.state = result + + # Monitor + if self.monitor is not None: + self.monitor(self) + + return self + + def __iter__(self): + return self From 76db0c74a174a4efaa9917eef65f70bceda96a03 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 09:44:08 -0400 Subject: [PATCH 2/9] =?UTF-8?q?feat:=20add=20core=20Cycle=20code=20?= =?UTF-8?q?=E2=80=93=20basic=20Controller?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- autora/controller/__init__.py | 273 ++++++++++++++++++++++++++++++++++ autora/controller/base.py | 49 ++---- autora/controller/cycle.py | 235 +++++++++++++++++++++++++++++ autora/controller/executor.py | 34 ++++- 4 files changed, 549 insertions(+), 42 deletions(-) create mode 100644 autora/controller/cycle.py diff --git a/autora/controller/__init__.py b/autora/controller/__init__.py index e69de29bb..5479d73d9 100644 --- a/autora/controller/__init__.py +++ b/autora/controller/__init__.py @@ -0,0 +1,273 @@ +""" + +Functions and classes for running the complete AER cycle. + +# Basic Usage + +Aim: Use the Controller to recover a simple ground truth theory from noisy data. + +Examples: + + >>> def ground_truth(x): + ... return x + 1 + + The space of allowed x values is the integers between 0 and 10 inclusive, + and we record the allowed output values as well. + >>> from autora.variable import VariableCollection, Variable + >>> metadata_0 = VariableCollection( + ... independent_variables=[Variable(name="x1", allowed_values=range(11))], + ... dependent_variables=[Variable(name="y", value_range=(-20, 20))], + ... ) + + The experimentalist is used to propose experiments. + Since the space of values is so restricted, we can just sample them all each time. + >>> from autora.experimentalist.pipeline import make_pipeline + >>> example_experimentalist = make_pipeline( + ... [metadata_0.independent_variables[0].allowed_values]) + + When we run a synthetic experiment, we get a reproducible noisy result: + >>> import numpy as np + >>> def get_example_synthetic_experiment_runner(): + ... rng = np.random.default_rng(seed=180) + ... def runner(x): + ... return ground_truth(x) + rng.normal(0, 0.1, x.shape) + ... return runner + >>> example_synthetic_experiment_runner = get_example_synthetic_experiment_runner() + >>> example_synthetic_experiment_runner(np.array([1])) + array([2.04339546]) + + The theorist "tries" to work out the best theory. + We use a trivial scikit-learn regressor. + >>> from sklearn.linear_model import LinearRegression + >>> example_theorist = LinearRegression() + + We initialize the Controller with the metadata describing the domain of the theory, + the theorist, experimentalist and experiment runner, + as well as a monitor which will let us know which cycle we're currently on. + >>> cycle = Cycle( + ... metadata=metadata_0, + ... theorist=example_theorist, + ... experimentalist=example_experimentalist, + ... experiment_runner=example_synthetic_experiment_runner, + ... monitor=lambda state: print(f"Generated {len(state.theories)} theories"), + ... ) + >>> cycle # doctest: +ELLIPSIS + <...Cycle object at 0x...> + + We can run the cycle by calling the run method: + >>> cycle.run(num_cycles=3) # doctest: +ELLIPSIS + Generated 1 theories + Generated 2 theories + Generated 3 theories + <...Cycle object at 0x...> + + We can now interrogate the results. The first set of conditions which went into the + experiment runner were: + >>> cycle.data.conditions[0] + array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + + The observations include the conditions and the results: + >>> cycle.data.observations[0] + array([[ 0. , 0.92675345], + [ 1. , 1.89519928], + [ 2. , 3.08746571], + [ 3. , 3.93023943], + [ 4. , 4.95429102], + [ 5. , 6.04763988], + [ 6. , 7.20770574], + [ 7. , 7.85681519], + [ 8. , 9.05735823], + [ 9. , 10.18713406], + [10. , 10.88517906]]) + + In the third cycle (index = 2) the first and last values are different again: + >>> cycle.data.observations[2][[0,-1]] + array([[ 0. , 1.08559827], + [10. , 11.08179553]]) + + The best fit theory after the first cycle is: + >>> cycle.data.theories[0] + LinearRegression() + + >>> def report_linear_fit(m: LinearRegression, precision=4): + ... s = f"y = {np.round(m.coef_[0].item(), precision)} x " \\ + ... f"+ {np.round(m.intercept_.item(), 4)}" + ... return s + >>> report_linear_fit(cycle.data.theories[0]) + 'y = 1.0089 x + 0.9589' + + The best fit theory after all the cycles, including all the data, is: + >>> report_linear_fit(cycle.data.theories[-1]) + 'y = 0.9989 x + 1.0292' + + This is close to the ground truth theory of x -> (x + 1) + + We can also run the cycle with more control over the execution flow: + >>> next(cycle) # doctest: +ELLIPSIS + Generated 4 theories + <...Cycle object at 0x...> + + >>> next(cycle) # doctest: +ELLIPSIS + Generated 5 theories + <...Cycle object at 0x...> + + >>> next(cycle) # doctest: +ELLIPSIS + Generated 6 theories + <...Cycle object at 0x...> + + We can continue to run the cycle as long as we like, + with a simple arbitrary stopping condition like the number of theories generated: + >>> from itertools import takewhile + >>> _ = list(takewhile(lambda c: len(c.data.theories) < 9, cycle)) + Generated 7 theories + Generated 8 theories + Generated 9 theories + + ... or the precision (here we keep iterating while the difference between the gradients + of the second-last and last cycle is larger than 1x10^-3). + >>> _ = list( + ... takewhile( + ... lambda c: np.abs(c.data.theories[-1].coef_.item() - + ... c.data.theories[-2].coef_.item()) > 1e-3, + ... cycle + ... ) + ... ) + Generated 10 theories + Generated 11 theories + + ... or continue to run as long as we like: + >>> _ = cycle.run(num_cycles=100) # doctest: +ELLIPSIS + Generated 12 theories + ... + Generated 111 theories + +# Passing Static Parameters + +Aim: pass parameters to the cycle components, when they are needed. + +Examples: + + Here we have an experimentalist which takes a parameter: + >>> uniform_random_rng = np.random.default_rng(180) + >>> def uniform_random_sampler(n): + ... return uniform_random_rng.uniform(low=0, high=11, size=n) + >>> example_experimentalist_with_parameters = make_pipeline([uniform_random_sampler]) + + The cycle can handle that using the `params` keyword: + >>> cycle_with_parameters = Cycle( + ... metadata=metadata_0, + ... theorist=example_theorist, + ... experimentalist=example_experimentalist_with_parameters, + ... experiment_runner=example_synthetic_experiment_runner, + ... params={"experimentalist": {"uniform_random_sampler": {"n": 7}}} + ... ) + >>> _ = cycle_with_parameters.run() + >>> cycle_with_parameters.data.conditions[-1].flatten() + array([6.33661987, 7.34916618, 6.08596494, 2.28566582, 1.9553974 , + 5.80023149, 3.27007909]) + + For the next cycle, if we wish, we can change the parameter value: + >>> cycle_with_parameters.params["experimentalist"]["uniform_random_sampler"]\\ + ... ["n"] = 2 + >>> _ = cycle_with_parameters.run() + >>> cycle_with_parameters.data.conditions[-1].flatten() + array([10.5838232 , 9.45666031]) + +# Accessing "State-dependent Properties" + +Some experimentalists, experiment runners and theorists require access to the values +created during the cycle execution, e.g. experimentalists which require access +to the current best theory or the observed data. These data update each cycle, and +so cannot easily be set using simple `params`. + +For this case, it is possible to use "state-dependent properties" in the `params` +dictionary. These are the following strings, which will be replaced during execution by +their respective current values: + +- `"%observations.ivs[-1]%"`: the last observed independent variables +- `"%observations.dvs[-1]%"`: the last observed dependent variables +- `"%observations.ivs%"`: all the observed independent variables, +concatenated into a single array +- `"%observations.dvs%"`: all the observed dependent variables, +concatenated into a single array +- `"%theories[-1]%"`: the last fitted theorist +- `"%theories%"`: all the fitted theorists + +Examples: + + In the following example, we use the `"observations.ivs"` cycle property for an + experimentalist which excludes those conditions which have + already been seen. + + >>> metadata_1 = VariableCollection( + ... independent_variables=[Variable(name="x1", allowed_values=range(10))], + ... dependent_variables=[Variable(name="y")], + ... ) + >>> random_sampler_rng = np.random.default_rng(seed=180) + >>> def custom_random_sampler(conditions, n): + ... sampled_conditions = random_sampler_rng.choice(conditions, size=n, replace=False) + ... return sampled_conditions + >>> def exclude_conditions(conditions, excluded_conditions): + ... remaining_conditions = list(set(conditions) - set(excluded_conditions.flatten())) + ... return remaining_conditions + >>> unobserved_data_experimentalist = make_pipeline([ + ... metadata_1.independent_variables[0].allowed_values, + ... exclude_conditions, + ... custom_random_sampler + ... ] + ... ) + >>> cycle_with_state_dep_properties = Cycle( + ... metadata=metadata_1, + ... theorist=example_theorist, + ... experimentalist=unobserved_data_experimentalist, + ... experiment_runner=example_synthetic_experiment_runner, + ... params={ + ... "experimentalist": { + ... "exclude_conditions": {"excluded_conditions": "%observations.ivs%"}, + ... "custom_random_sampler": {"n": 1} + ... } + ... } + ... ) + + Now we can run the cycler to generate conditions and run experiments. The first time round, + we have the full set of 10 possible conditions to select from, and we select "2" at random: + >>> _ = cycle_with_state_dep_properties.run() + >>> cycle_with_state_dep_properties.data.conditions[-1] + array([2]) + + We can continue to run the cycler, each time we add more to the list of "excluded" options: + >>> _ = cycle_with_state_dep_properties.run(num_cycles=5) + >>> cycle_with_state_dep_properties.data.conditions + [array([2]), array([6]), array([5]), array([7]), array([3]), array([4])] + + By using the monitor callback, we can investigate what's going on with the + state-dependent properties: + >>> cycle_with_state_dep_properties.monitor = lambda state: print( + ... np.row_stack(state.observations)[:,0] # just the independent variable values + ... ) + + The monitor evaluates at the end of each cycle + and shows that we've added a new observed IV each step + >>> _ = cycle_with_state_dep_properties.run() + [2. 6. 5. 7. 3. 4. 9.] + >>> _ = cycle_with_state_dep_properties.run() + [2. 6. 5. 7. 3. 4. 9. 0.] + + We deactivate the monitor by making it "None" again. + >>> cycle_with_state_dep_properties.monitor = None + + We can continue until we've sampled all of the options: + >>> _ = cycle_with_state_dep_properties.run(num_cycles=2) + >>> cycle_with_state_dep_properties.data.conditions # doctest: +NORMALIZE_WHITESPACE + [array([2]), array([6]), array([5]), array([7]), array([3]), \ + array([4]), array([9]), array([0]), array([8]), array([1])] + + If we try to evaluate it again, the experimentalist fails, as there aren't any more + conditions which are available: + >>> cycle_with_state_dep_properties.run() # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + ValueError: a cannot be empty unless no samples are taken + +""" +from .cycle import Cycle diff --git a/autora/controller/base.py b/autora/controller/base.py index cd24ecfcb..3c72196e8 100644 --- a/autora/controller/base.py +++ b/autora/controller/base.py @@ -2,19 +2,12 @@ from __future__ import annotations import logging -from typing import Callable, Mapping, Optional, TypeVar, Union - -from autora.controller.protocol import ( - SupportsControllerState, - SupportsControllerStateHistory, -) +from typing import Callable, Mapping, Optional, TypeVar _logger = logging.getLogger(__name__) -State = TypeVar( - "State", bound=Union[SupportsControllerState, SupportsControllerStateHistory] -) +State = TypeVar("State") ExecutorName = TypeVar("ExecutorName", bound=str) @@ -27,31 +20,15 @@ class BaseController: Attributes: state (CycleState or CycleStateHistory): an object which is updated during the cycle and - has the following properties: - - - `metadata` (VariableCollection) - - `params` (dict): a nested dictionary with parameters for the cycle parts. - `{ - "experimentalist": {}, - "theorist": {}, - "experiment_runner": {} - }` - - `conditions`: a list of ArrayLike objects representing all the IVs proposed by the - experimentalist - - `observations`: a list of ArrayLike objects representing all the IVs and DVs - returned by the experiment runner - - `theories`: a list of all the fitted theories (scikit-learn compatible estimators) - - `history`: (only when using CycleStateHistory) a sequential list of all the above. - - executor_collection (FullCycleExecutorCollection, OnlineExecutorCollection): an - object with interfaces for running the theorist, experimentalist and - experiment_runner. This must be compatible with the `state`. - - planner (Callable): a function which takes the `state` as input and returns one of the - `executor_collection` methods. This must be compatible with both the `state` and - the `executor_collection`. - - monitor (Callable): a function which takes the controller as input and is called at + is compatible with the `executor_collection`, `planner` and `monitor`. + + planner: a function which takes the `state` as input and returns the name one of the + `executor_collection` names. + + executor_collection: a mapping between names and functions which take the state as + input and return a state. + + monitor (Callable): a function which takes the state as input and is called at the end of each step. """ @@ -87,7 +64,7 @@ def run(self, num_steps: int = 1): def __next__(self): # Plan - next_function_name = self.planner(self.state, self.executor_collection) + next_function_name = self.planner(self.state) # Map next_function = self.executor_collection[next_function_name] @@ -100,7 +77,7 @@ def __next__(self): # Monitor if self.monitor is not None: - self.monitor(self) + self.monitor(self.state) return self diff --git a/autora/controller/cycle.py b/autora/controller/cycle.py new file mode 100644 index 000000000..53afc04c6 --- /dev/null +++ b/autora/controller/cycle.py @@ -0,0 +1,235 @@ +""" The cycle controller for AER. """ +from __future__ import annotations + +import logging +from typing import Callable, Dict, Optional + +from sklearn.base import BaseEstimator + +from autora.controller.base import BaseController +from autora.controller.executor import make_default_online_executor_collection +from autora.controller.planner import full_cycle_planner +from autora.controller.state import Snapshot +from autora.experimentalist.pipeline import Pipeline +from autora.variable import VariableCollection + +_logger = logging.getLogger(__name__) + + +class Cycle(BaseController): + """ + Runs an experimentalist, theorist and experiment runner in a loop. + + Once initialized, the `cycle` can be started by calling `next(cycle)` or using the + `cycle.run` method. Each step runs the full AER cycle + + Attributes: + state (CycleState or CycleStateHistory): an object which is updated during the cycle and + has the following properties: + + - `metadata` (VariableCollection) + - `params` (dict): a nested dictionary with parameters for the cycle parts. + `{ + "experimentalist": {}, + "theorist": {}, + "experiment_runner": {} + }` + - `conditions`: a list of ArrayLike objects representing all the IVs proposed by the + experimentalist + - `observations`: a list of ArrayLike objects representing all the IVs and DVs + returned by the experiment runner + - `theories`: a list of all the fitted theories (scikit-learn compatible estimators) + - `history`: (only when using CycleStateHistory) a sequential list of all the above. + + executor_collection (FullCycleExecutorCollection, OnlineExecutorCollection): an + object with interfaces for running the theorist, experimentalist and + experiment_runner. This must be compatible with the `state`. + + planner (Callable): a function which takes the `state` as input and returns one of the + `executor_collection` methods. This must be compatible with both the `state` and + the `executor_collection`. + + monitor (Callable): a function which takes the controller as input and is called at + the end of each step. + + """ + + def __init__( + self, + metadata: VariableCollection, + theorist: Optional[BaseEstimator] = None, + experimentalist: Optional[Pipeline] = None, + experiment_runner: Optional[Callable] = None, + params: Optional[Dict] = None, + monitor: Optional[Callable[[Snapshot], None]] = None, + ): + """ + Args: + metadata: a description of the dependent and independent variables + theorist: a scikit-learn-compatible estimator + experimentalist: an autora.experimentalist.Pipeline + experiment_runner: a function to map independent variables onto observed dependent + variables + monitor: a function which gets read-only access to the `data` attribute at the end of + each cycle. + params: a nested dictionary with parameters to be passed to the parts of the cycle. + E.g. if the experimentalist had a step named "pool" which took an argument "n", + which you wanted to set to the value 30, then params would be set to this: + `{"experimentalist": {"pool": {"n": 30}}}` + """ + if params is None: + params = {} + state = Snapshot( + metadata=metadata, + conditions=[], + observations=[], + theories=[], + params=params, + ) + planner = full_cycle_planner + + self._experimentalist_pipeline = experimentalist + self._experiment_runner_callable = experiment_runner + self._theorist_estimator = theorist + + executor_collection = make_default_online_executor_collection( + experimentalist_pipeline=self._experimentalist_pipeline, + experiment_runner_callable=self._experiment_runner_callable, + theorist_estimator=self._theorist_estimator, + ) + + super().__init__( + state=state, + planner=planner, + executor_collection=executor_collection, + monitor=monitor, + ) + + def run(self, num_cycles: int = 1): + """Execute the next step in the cycle.""" + super().run(num_steps=num_cycles) + return self + + @property + def data(self): + """An alias for `.state`.""" + return self.state + + @property + def params(self): + """ + The parameters passed to the `theorist`, `experimentalist` and `experiment_runner`. + + Should be a nested dictionary like + ``` + {'experimentalist': {... params for experimentalist ...}, + 'experiment_runner': {... params for experiment_runner ...}, + 'theorist': {... params for theorist ...}} + ``` + + + Examples: + >>> from autora.controller.cycle import Cycle + >>> p = {"some": "params"} + >>> c = Cycle(metadata=None, theorist=None, experimentalist=None, + ... experiment_runner=None, params=p) + >>> c.params + {'some': 'params'} + + >>> c.params = {"new": "value"} + >>> c.params + {'new': 'value'} + """ + return self.state.params + + @params.setter + def params(self, value): + self.state = self.state.update(params=value) + + @property + def theorist(self): + """ + Generates new theories. + + Examples: + >>> from autora.controller.cycle import Cycle + >>> from sklearn.linear_model import LinearRegression, PoissonRegressor + >>> c = Cycle(metadata=None, theorist=LinearRegression(), experimentalist=None, + ... experiment_runner=None) + >>> c.theorist + LinearRegression() + + >>> c.theorist = PoissonRegressor() + >>> c.theorist + PoissonRegressor() + + """ + return self._theorist_estimator + + @theorist.setter + def theorist(self, value): + self._theorist_estimator = value + self.executor_collection = self._updated_executor_collection() + + @property + def experimentalist(self): + """ + Generates new experimental conditions. + + Examples: + >>> from autora.controller.cycle import Cycle + >>> from autora.experimentalist.pipeline import Pipeline + >>> c = Cycle(metadata=None, theorist=None, experiment_runner=None, + ... experimentalist=Pipeline([("pool", [11,12,13])])) + >>> c.experimentalist + Pipeline(steps=[('pool', [11, 12, 13])], params={}) + + >>> c.experimentalist = Pipeline([('pool', [21,22,23])]) + >>> c.experimentalist + Pipeline(steps=[('pool', [21, 22, 23])], params={}) + + """ + return self._experimentalist_pipeline + + @experimentalist.setter + def experimentalist(self, value): + self._experimentalist_pipeline = value + self.executor_collection = self._updated_executor_collection() + + @property + def experiment_runner(self): + """ + Generates new observations. + + Examples: + >>> from autora.controller.cycle import Cycle + >>> def plus_one(x): return x + 1 + >>> c = Cycle(metadata=None, theorist=None, experimentalist=None, + ... experiment_runner=plus_one) + >>> c.experiment_runner # doctest: +ELLIPSIS + + >>> c.experiment_runner(1) + 2 + + >>> def plus_two(x): return x + 2 + >>> c.experiment_runner = plus_two + >>> c.experiment_runner # doctest: +ELLIPSIS + + >>> c.experiment_runner(1) + 3 + + """ + return self._experiment_runner_callable + + @experiment_runner.setter + def experiment_runner(self, value): + self._experiment_runner_callable = value + self.executor_collection = self._updated_executor_collection() + + def _updated_executor_collection(self): + executor_collection = make_default_online_executor_collection( + experimentalist_pipeline=self._experimentalist_pipeline, + experiment_runner_callable=self._experiment_runner_callable, + theorist_estimator=self._theorist_estimator, + ) + return executor_collection diff --git a/autora/controller/executor.py b/autora/controller/executor.py index 7d57b7ced..26f1da837 100644 --- a/autora/controller/executor.py +++ b/autora/controller/executor.py @@ -5,9 +5,10 @@ from __future__ import annotations import copy +import logging from functools import partial from types import MappingProxyType -from typing import Callable, Iterable, Literal, Tuple, Union +from typing import Callable, Iterable, Literal, Optional, Tuple, Union import numpy as np from sklearn.base import BaseEstimator @@ -16,6 +17,8 @@ from autora.controller.state import resolve_state_params from autora.experimentalist.pipeline import Pipeline +_logger = logging.getLogger(__name__) + def experimentalist_wrapper( state: SupportsControllerState, pipeline: Pipeline @@ -80,9 +83,15 @@ def full_cycle_wrapper( return theorist_result +def no_op(state): + """An Executor which has no effect on the state.""" + _logger.warning("You called a `no_op` Executor. Returning the state unchanged.") + return state + + def make_online_executor( kind: Literal["experimentalist", "experiment_runner", "theorist"], - core: Union[Pipeline, Callable, BaseEstimator], + core: Optional[Union[Pipeline, Callable, BaseEstimator]] = None, ): """ @@ -94,7 +103,9 @@ def make_online_executor( Returns: a curried function which will run the kind of AER step requested """ - if kind == "experimentalist": + if core is None: + curried_function = no_op + elif kind == "experimentalist": assert isinstance(core, Pipeline) curried_function = partial(experimentalist_wrapper, pipeline=core) elif kind == "experiment_runner": @@ -166,9 +177,9 @@ def make_online_executor_collection( def make_default_online_executor_collection( - experimentalist_pipeline: Pipeline, - experiment_runner_callable: Callable, - theorist_estimator: BaseEstimator, + experimentalist_pipeline: Optional[Pipeline] = None, + experiment_runner_callable: Optional[Callable] = None, + theorist_estimator: Optional[BaseEstimator] = None, ): """ Make the default AER executor collection. @@ -183,6 +194,17 @@ def make_default_online_executor_collection( Examples: + + If we make the empty executor collection, all the executors are no-ops: + >>> make_default_online_executor_collection() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + mappingproxy({'experimentalist': , + 'experiment_runner': , + 'theorist': , + 'full_cycle': functools.partial(, + experimentalist_pipeline=None, + experiment_runner_callable=None, + theorist_estimator=None)}) + >>> from autora.experimentalist.pipeline import Pipeline >>> from sklearn.linear_model import LinearRegression >>> experimentalist_pipeline_ = Pipeline([('p', (1, 2))]) From 6d22b1d252b1690c0f259e2921052245e0dd01ed Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 09:48:18 -0400 Subject: [PATCH 3/9] docs: add cycle docstring --- autora/controller/cycle.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/autora/controller/cycle.py b/autora/controller/cycle.py index 53afc04c6..16f04c827 100644 --- a/autora/controller/cycle.py +++ b/autora/controller/cycle.py @@ -18,10 +18,11 @@ class Cycle(BaseController): """ - Runs an experimentalist, theorist and experiment runner in a loop. + Runs an experimentalist, experiment runner, and theorist in a loop. Once initialized, the `cycle` can be started by calling `next(cycle)` or using the - `cycle.run` method. Each step runs the full AER cycle + `cycle.run` method. Each iteration runs the full AER cycle, starting with the + experimentalist and ending with the theorist. Attributes: state (CycleState or CycleStateHistory): an object which is updated during the cycle and From fbe516cb3f5688bf43f6410c78ffd05adbdfc906 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 10:43:07 -0400 Subject: [PATCH 4/9] feat: add ability to switch out planner on Controller class --- autora/controller/__init__.py | 125 +++++++++++++++++++++++++++++ autora/controller/controller.py | 104 ++++++++++++++++++++++++ autora/controller/cycle.py | 29 ------- autora/controller/protocol.py | 4 +- autora/controller/state/history.py | 21 +++-- 5 files changed, 245 insertions(+), 38 deletions(-) create mode 100644 autora/controller/controller.py diff --git a/autora/controller/__init__.py b/autora/controller/__init__.py index 5479d73d9..750fd2446 100644 --- a/autora/controller/__init__.py +++ b/autora/controller/__init__.py @@ -269,5 +269,130 @@ ... ValueError: a cannot be empty unless no samples are taken + +# Using Alternative Executors and Planners + +By switching out the `executor_collection` and/or the `planner`, we can specify a +different way of running the cycle. + +## Easier Seeding with a Smarter Planner + +Examples: + + In this example, we use the `Controller` which allows much more control over execution + order. It considers the last available result and picks the matching next step. This means + that seeding is relatively simple. + >>> from autora.controller import Controller + >>> def monitor(state): + ... print(f"MONITOR: Generated new {state.history[-1].kind}") + >>> cycle_with_last_result_planner = Controller( + ... monitor=monitor, + ... metadata=metadata_0, + ... theorist=example_theorist, + ... experimentalist=example_experimentalist, + ... experiment_runner=example_synthetic_experiment_runner, + ... ) + + When we run this cycle starting with no data, we generate an experimental condition first: + >>> _ = list(takewhile(lambda c: len(c.state.theories) < 2, cycle_with_last_result_planner)) + MONITOR: Generated new CONDITION + MONITOR: Generated new OBSERVATION + MONITOR: Generated new THEORY + MONITOR: Generated new CONDITION + MONITOR: Generated new OBSERVATION + MONITOR: Generated new THEORY + + However, if we seed the same cycle with observations, then its first Executor will be the + theorist: + >>> controller_with_seed_observation = Controller( + ... monitor=monitor, + ... metadata=metadata_0, + ... theorist=example_theorist, + ... experimentalist=example_experimentalist, + ... experiment_runner=example_synthetic_experiment_runner, + ... ) + >>> seed_observation = example_synthetic_experiment_runner(np.linspace(0,5,10)) + >>> controller_with_seed_observation.seed(observations=[seed_observation]) + + >>> _ = next(controller_with_seed_observation) + MONITOR: Generated new THEORY + +## Arbitrary Execution Order (Toy Example) + +In some cases, we need to change the order of execution of different steps completely. This might be + useful in cases when different experimentalists or theorists are needed at different times in + the cycle, e.g. for initial seeding, or if the _order_ of execution is the subject of the + experiment. + +Examples: + + In this example, we use a planner which suggests a different random operation at each + step, demonstrating arbitrary execution order. We do this by modifying the planner attribute + of an existing controller + + This might be useful in cases when different experimentalists or theorists are needed at + different times in the cycle, e.g. for initial seeding. + >>> from autora.controller.planner import random_operation_planner + >>> def monitor(state): + ... print(f"MONITOR: Generated new {state.history[-1].kind}") + >>> controller_with_random_planner = Controller( + ... planner=random_operation_planner, + ... monitor=monitor, + ... metadata=metadata_0, + ... theorist=example_theorist, + ... experimentalist=example_experimentalist, + ... experiment_runner=example_synthetic_experiment_runner, + ... ) + + The `random_operation_planner` depends on the python random number generator, so we seed + it first: + >>> from random import seed + >>> seed(42) + + We also want to watch the logging messages from the cycle: + >>> import logging + >>> import sys + >>> logging.basicConfig(format='%(levelname)s: %(message)s', stream=sys.stdout, + ... level=logging.INFO) + + Now we can evaluate the cycle and watch its behaviour: + >>> def step(controller_): + ... try: + ... _ = next(controller_) + ... except ValueError as e: + ... print(f"FAILED: with {e=}") + + The first step, the theorist is selected as the random Executor, and it fails because it + depends on there being observations to theorize against: + >>> step(controller_with_random_planner) # i = 0 + FAILED: with e=ValueError('need at least one array to concatenate') + + The second step, a new condition is generated. + >>> step(controller_with_random_planner) # i = 1 + MONITOR: Generated new CONDITION + + ... which is repeated on the third step as well: + >>> step(controller_with_random_planner) # i = 2 + MONITOR: Generated new CONDITION + + On the fourth step, we generate another error when trying to run the theorist: + >>> step(controller_with_random_planner) # i = 3 + FAILED: with e=ValueError('need at least one array to concatenate') + + On the fifth step, we generate a first real observation, so that the next time we try to run + a theorist we are successful: + >>> step(controller_with_random_planner) # i = 4 + MONITOR: Generated new OBSERVATION + + By the ninth iteration, there are observations which the theorist can use, and it succeeds. + >>> _ = list(takewhile(lambda c: len(c.state.theories) < 1, controller_with_random_planner)) + MONITOR: Generated new CONDITION + MONITOR: Generated new CONDITION + MONITOR: Generated new CONDITION + MONITOR: Generated new THEORY + + + """ +from .controller import Controller from .cycle import Cycle diff --git a/autora/controller/controller.py b/autora/controller/controller.py new file mode 100644 index 000000000..5ba2d8427 --- /dev/null +++ b/autora/controller/controller.py @@ -0,0 +1,104 @@ +""" The cycle controller for AER. """ +from __future__ import annotations + +import logging +from typing import Callable, Dict, Optional + +from sklearn.base import BaseEstimator + +from autora.controller.base import BaseController, ExecutorName +from autora.controller.executor import make_online_executor_collection +from autora.controller.planner import last_result_kind_planner +from autora.controller.state import History +from autora.experimentalist.pipeline import Pipeline +from autora.variable import VariableCollection + +_logger = logging.getLogger(__name__) + + +class Controller(BaseController): + """ + Runs an experimentalist, experiment runner, and theorist in order. + + Once initialized, the `controller` can be started by calling `next(controller)` or using the + `controller.run` method. Each iteration runs the next logical step based on the last + result: + – if the last result doesn't exist or is a theory, run the experimentalist and add an + experimental condition as a new result, + - if the last result is an experimental condition, run the experiment runner and add an + observation as a new result, + - if the last result is an observation, run the theorist and add a theory as a new result. + + """ + + def __init__( + self, + metadata: Optional[VariableCollection], + theorist: Optional[BaseEstimator] = None, + experimentalist: Optional[Pipeline] = None, + experiment_runner: Optional[Callable] = None, + params: Optional[Dict] = None, + monitor: Optional[Callable[[History], None]] = None, + planner: Callable[[History], ExecutorName] = last_result_kind_planner, + ): + """ + Args: + metadata: a description of the dependent and independent variables + theorist: a scikit-learn-compatible estimator + experimentalist: an autora.experimentalist.Pipeline + experiment_runner: a function to map independent variables onto observed dependent + variables + monitor: a function which gets read-only access to the `data` attribute at the end of + each cycle. + params: a nested dictionary with parameters to be passed to the parts of the cycle. + E.g. if the experimentalist had a step named "pool" which took an argument "n", + which you wanted to set to the value 30, then params would be set to this: + `{"experimentalist": {"pool": {"n": 30}}}` + planner: a function which maps from the state to the next ExecutorName. The default + is to map from the last result in the state's history to the next logical step. + """ + + if params is None: + params = {} + state = History( + metadata=metadata, + conditions=[], + observations=[], + theories=[], + params=params, + ) + + self._experimentalist_pipeline = experimentalist + self._experiment_runner_callable = experiment_runner + self._theorist_estimator = theorist + + executor_collection = make_online_executor_collection( + [ + ( + "experimentalist", + "experimentalist", + self._experimentalist_pipeline, + ), + ( + "experiment_runner", + "experiment_runner", + self._experiment_runner_callable, + ), + ( + "theorist", + "theorist", + self._theorist_estimator, + ), + ] + ) + + super().__init__( + state=state, + planner=planner, + executor_collection=executor_collection, + monitor=monitor, + ) + + def seed(self, **kwargs): + for key, value in kwargs.items(): + self.state = self.state.update(**{key: value}) diff --git a/autora/controller/cycle.py b/autora/controller/cycle.py index 16f04c827..9805c4757 100644 --- a/autora/controller/cycle.py +++ b/autora/controller/cycle.py @@ -24,35 +24,6 @@ class Cycle(BaseController): `cycle.run` method. Each iteration runs the full AER cycle, starting with the experimentalist and ending with the theorist. - Attributes: - state (CycleState or CycleStateHistory): an object which is updated during the cycle and - has the following properties: - - - `metadata` (VariableCollection) - - `params` (dict): a nested dictionary with parameters for the cycle parts. - `{ - "experimentalist": {}, - "theorist": {}, - "experiment_runner": {} - }` - - `conditions`: a list of ArrayLike objects representing all the IVs proposed by the - experimentalist - - `observations`: a list of ArrayLike objects representing all the IVs and DVs - returned by the experiment runner - - `theories`: a list of all the fitted theories (scikit-learn compatible estimators) - - `history`: (only when using CycleStateHistory) a sequential list of all the above. - - executor_collection (FullCycleExecutorCollection, OnlineExecutorCollection): an - object with interfaces for running the theorist, experimentalist and - experiment_runner. This must be compatible with the `state`. - - planner (Callable): a function which takes the `state` as input and returns one of the - `executor_collection` methods. This must be compatible with both the `state` and - the `executor_collection`. - - monitor (Callable): a function which takes the controller as input and is called at - the end of each step. - """ def __init__( diff --git a/autora/controller/protocol.py b/autora/controller/protocol.py index 806738547..a086a0b46 100644 --- a/autora/controller/protocol.py +++ b/autora/controller/protocol.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Any, Dict, Mapping, Optional, Protocol, Sequence, TypeVar, Union +from typing import Any, Dict, Mapping, Optional, Protocol, Sequence, Set, TypeVar, Union from numpy.typing import ArrayLike from sklearn.base import BaseEstimator @@ -99,7 +99,7 @@ def theories(self) -> Sequence[BaseEstimator]: class SupportsControllerStateHistory(SupportsControllerStateProperties, Protocol): """Represents controller state as a linear sequence of entries.""" - def filter_by(self: State, **kwargs) -> State: + def filter_by(self: State, kind: Optional[Set[Union[str, ResultKind]]]) -> State: ... @property diff --git a/autora/controller/state/history.py b/autora/controller/state/history.py index 92623af51..8386e8746 100644 --- a/autora/controller/state/history.py +++ b/autora/controller/state/history.py @@ -7,12 +7,16 @@ from numpy.typing import ArrayLike from sklearn.base import BaseEstimator -from autora.controller.protocol import ResultKind, SupportsDataKind +from autora.controller.protocol import ( + ResultKind, + SupportsControllerStateHistory, + SupportsDataKind, +) from autora.controller.state.snapshot import Snapshot from autora.variable import VariableCollection -class History: +class History(SupportsControllerStateHistory): """ An immutable object for tracking the state and history of an AER cycle. """ @@ -359,7 +363,7 @@ def history(self) -> List[Result]: """ return self._history - def filter_by(self, kind: Set[Union[str, ResultKind]]) -> History: + def filter_by(self, kind: Optional[Set[Union[str, ResultKind]]]) -> History: """ Return a copy of the object with only data belonging to the specified kinds. @@ -377,10 +381,13 @@ def filter_by(self, kind: Set[Union[str, ResultKind]]) -> History: Result(data='o2', kind=ResultKind.OBSERVATION)]) """ - kind_ = {ResultKind(s) for s in kind} - filtered_history = _filter_history(self._history, kind_) - new_object = History(history=filtered_history) - return new_object + if kind is None: + return self + else: + kind_ = {ResultKind(s) for s in kind} + filtered_history = _filter_history(self._history, kind_) + new_object = History(history=filtered_history) + return new_object @dataclass(frozen=True) From 0e69aa16602b7016b27df02d75f7e0ecd414d313 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 10:44:33 -0400 Subject: [PATCH 5/9] feat: add protocol inheritance on Snapshot --- autora/controller/state/snapshot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autora/controller/state/snapshot.py b/autora/controller/state/snapshot.py index 51e75c96d..366a41b80 100644 --- a/autora/controller/state/snapshot.py +++ b/autora/controller/state/snapshot.py @@ -5,11 +5,12 @@ from numpy.typing import ArrayLike from sklearn.base import BaseEstimator +from autora.controller.protocol import SupportsControllerStateFields from autora.variable import VariableCollection @dataclass(frozen=True) -class Snapshot: +class Snapshot(SupportsControllerStateFields): """An object passed between and updated by processing steps in the Controller.""" # Single values From c9b43a01ad742c589133fe74df855b98c28d0bd0 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 13:07:39 -0400 Subject: [PATCH 6/9] docs: update example of using base Controller for full control over the cycle --- autora/controller/__init__.py | 159 ++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/autora/controller/__init__.py b/autora/controller/__init__.py index 750fd2446..07b2688b3 100644 --- a/autora/controller/__init__.py +++ b/autora/controller/__init__.py @@ -391,7 +391,166 @@ MONITOR: Generated new CONDITION MONITOR: Generated new THEORY +## Arbitrary Executors and Planners +In some cases, we need to go beyond adding different orders of planning the three +`experimentalist`, `experiment_runner` and `theorist` and build more complex cycles with +different Executors for different states. + +For instance, there might be a situation where in the +first iteration, the controller needs to gather observations over a uniform sample of the domain, +but in subsequent samples we use a different active experimentalist. + +In these cases, we need full control over (and have full responsibility for) the planners and +executors. + +Examples: + The theory we'll try to discover is: + >>> def ground_truth(x, m=3.5, c=1): + ... return m * x + c + >>> rng = np.random.default_rng(seed=180) + >>> def experiment_runner(x): + ... return ground_truth(x) + rng.normal(0, 0.1) + >>> metadata_2 = VariableCollection( + ... independent_variables=[Variable(name="x1", value_range=(-10, 10))], + ... dependent_variables=[Variable(name="y", value_range=(-100, 100))], + ... ) + + We now define a planner which chooses a different experimentalist when supplied with no data + versus some data. + >>> from autora.controller.protocol import ResultKind + >>> def seeding_planner(state): + ... # First, we have to filter the history by the kinds of objects we care about. + ... # If other objects were added later – parameters, or metadata updates – we don't want + ... # them to affect the order. + ... filtered_history = state.filter_by( + ... kind={ResultKind.CONDITION, ResultKind.OBSERVATION, ResultKind.THEORY} + ... ).history + ... + ... # In case there aren't any results, we need to have a fallback type – None + ... try: + ... last_result_kind = filtered_history[-1].kind + ... except IndexError: + ... last_result_kind = None + ... + ... # We map the result kind (or None) to the next step we care about + ... executor_name = { + ... None: "seed_experimentalist", # specify a special seeding experimentalist + ... ResultKind.THEORY: "main_experimentalist", # the usual experimentalist + ... ResultKind.CONDITION: "experiment_runner", + ... ResultKind.OBSERVATION: "theorist", + ... }[last_result_kind] + ... + ... return executor_name + + Now we can see what would happen with a particular state. If there are no results, then we get + the seed experimentalist: + >>> from autora.controller.state import History + >>> seeding_planner(History()) + 'seed_experimentalist' + + ... whereas if we have a theory to work on, we get the main experimentalist: + >>> seeding_planner(History(theories=['a theory'])) + 'main_experimentalist' + + If we had a condition last, we choose the experiment runner next: + >>> seeding_planner(History(conditions=['a condition'])) + 'experiment_runner' + + If we had an observation last, we choose the theorist next: + >>> seeding_planner(History(observations=['an observation'])) + 'theorist' + + Now we need to define an executor collection to handle the actual execution steps. + >>> from autora.experimentalist.pipeline import make_pipeline + >>> from autora.experimentalist.sampler.random import random_sampler + >>> from functools import partial + + Wen can run the seed pipeline with no data: + >>> experimentalist_which_needs_no_data = make_pipeline([ + ... np.linspace(*metadata_2.independent_variables[0].value_range, 1_000), + ... partial(random_sampler, n=10)] + ... ) + >>> np.array(experimentalist_which_needs_no_data()) + array([ 6.71671672, -0.73073073, -5.05505506, 6.13613614, 0.03003003, + 4.59459459, 2.79279279, 5.43543544, -1.65165165, 8.0980981 ]) + + + ... whereas we need some model for this sampler: + >>> from autora.experimentalist.sampler.model_disagreement import model_disagreement_sampler + >>> experimentalist_which_needs_a_theory = make_pipeline([ + ... np.linspace(*metadata_2.independent_variables[0].value_range, 1_000), + ... partial(model_disagreement_sampler, num_samples=10)]) + >>> experimentalist_which_needs_a_theory() + Traceback (most recent call last): + ... + TypeError: model_disagreement_sampler() missing 1 required positional argument: 'models' + + We'll have to provide the models during the cycle run. + + We need a reasonable theorist for this situation. For this problem, a linear regressor will + suffice. + >>> t = LinearRegression() + + Let's test the theorist for the ideal case – lots of data: + >>> X = np.linspace(*metadata_2.independent_variables[0].value_range, 1_000).reshape(-1, 1) + >>> tfitted = t.fit(X, experiment_runner(X)) + >>> f"m = {tfitted.coef_[0][0]:.2f}, c = {tfitted.intercept_[0]:.2f}" + 'm = 3.50, c = 1.04' + + This seems to work fine. + + Now we can define the executor component. We'll use a factory method to generate the + collection: + >>> from autora.controller.executor import make_online_executor_collection + >>> executor_collection = make_online_executor_collection([ + ... ("seed_experimentalist", "experimentalist", experimentalist_which_needs_no_data), + ... ("main_experimentalist", "experimentalist", experimentalist_which_needs_a_theory), + ... ("theorist", "theorist", LinearRegression()), + ... ("experiment_runner", "experiment_runner", experiment_runner), + ... ]) + + We need some special parameters to handle the main experimentalist, so we specify those: + >>> params = {"main_experimentalist": {"models": "%theories%"}} + + We now instantiate the controller: + >>> from autora.controller.base import BaseController + >>> from autora.controller.state import History + >>> c = BaseController( + ... state=History(metadata=metadata_2, params=params), + ... planner=seeding_planner, + ... executor_collection=executor_collection + ... ) + >>> c # doctest: +ELLIPSIS + <...BaseController object at 0x...> + + On the first step, we generate a condition (as we expected): + >>> next(c).state.history[-1] # doctest: +NORMALIZE_WHITESPACE + Result(data=array([ 9.4994995 , -8.17817818, -1.19119119, 8.6986987 , 7.45745746, + -6.93693694, 8.05805806, -1.45145145, -5.97597598, 1.57157157]), + kind=ResultKind.CONDITION) + + On the second step, we generate some new observations: + >>> next(c).state.history[-1] + Result(data=array([[ 9.4994995 , 34.1750017 ], + [ -8.17817818, -27.69687017], + [ -1.19119119, -3.24241572], + [ 8.6986987 , 31.3721989 ], + [ 7.45745746, 27.02785455], + [ -6.93693694, -23.35252583], + [ 8.05805806, 29.12995666], + [ -1.45145145, -4.15332663], + [ -5.97597598, -19.98916246], + [ 1.57157157, 6.42725395]]), kind=ResultKind.OBSERVATION) + + + On the third step, we generate a new theory: + >>> next(c).state.history[-1] + Result(data=LinearRegression(), kind=ResultKind.THEORY) + + On the fourth step, we switch to using the main experimentalist and generate some new + experimental data that way + >>> next(c).state.history[-1] """ from .controller import Controller From 6b499502641c99c93e81dee69e6284a7ad9d57ee Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 16:24:19 -0400 Subject: [PATCH 7/9] docs: update example of using base Controller for full control over the cycle --- autora/controller/__init__.py | 116 +++++++++++++++++----------------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/autora/controller/__init__.py b/autora/controller/__init__.py index 07b2688b3..53645bc5b 100644 --- a/autora/controller/__init__.py +++ b/autora/controller/__init__.py @@ -397,9 +397,11 @@ `experimentalist`, `experiment_runner` and `theorist` and build more complex cycles with different Executors for different states. -For instance, there might be a situation where in the -first iteration, the controller needs to gather observations over a uniform sample of the domain, -but in subsequent samples we use a different active experimentalist. +For instance, there might be a situation where at the start, the main "active" experimentalist +can't be run as it needs one or more theories as input. +Once there are at least two theories, then the active experimentalist _can_ be run. +One method to handle this is to run a "seed" experimentalist until the main experimentalist can +be used. In these cases, we need full control over (and have full responsibility for) the planners and executors. @@ -419,38 +421,31 @@ We now define a planner which chooses a different experimentalist when supplied with no data versus some data. >>> from autora.controller.protocol import ResultKind + >>> from autora.controller.planner import last_result_kind_planner >>> def seeding_planner(state): - ... # First, we have to filter the history by the kinds of objects we care about. - ... # If other objects were added later – parameters, or metadata updates – we don't want - ... # them to affect the order. - ... filtered_history = state.filter_by( - ... kind={ResultKind.CONDITION, ResultKind.OBSERVATION, ResultKind.THEORY} - ... ).history - ... - ... # In case there aren't any results, we need to have a fallback type – None - ... try: - ... last_result_kind = filtered_history[-1].kind - ... except IndexError: - ... last_result_kind = None - ... - ... # We map the result kind (or None) to the next step we care about - ... executor_name = { - ... None: "seed_experimentalist", # specify a special seeding experimentalist - ... ResultKind.THEORY: "main_experimentalist", # the usual experimentalist - ... ResultKind.CONDITION: "experiment_runner", - ... ResultKind.OBSERVATION: "theorist", - ... }[last_result_kind] - ... - ... return executor_name - - Now we can see what would happen with a particular state. If there are no results, then we get - the seed experimentalist: + ... # We're going to reuse the "last_available_result" planner, and modify its output. + ... next_function = last_result_kind_planner(state) + ... if next_function == "experimentalist": + ... if len(state.theories) >= 2: + ... return "main_experimentalist" + ... else: + ... return "seed_experimentalist" + ... else: + ... return next_function + + Now we can see what would happen with a particular state. If there are no results, + then we get the seed experimentalist: >>> from autora.controller.state import History >>> seeding_planner(History()) 'seed_experimentalist' - ... whereas if we have a theory to work on, we get the main experimentalist: - >>> seeding_planner(History(theories=['a theory'])) + ... and we also get the seed experimentalist if the last result was a theory and there are less + than two theories: + >>> seeding_planner(History(theories=['a single theory'])) + 'seed_experimentalist' + + ... whereas if we have at least two theories to work on, we get the main experimentalist: + >>> seeding_planner(History(theories=['a theory', 'another theory'])) 'main_experimentalist' If we had a condition last, we choose the experiment runner next: @@ -462,7 +457,7 @@ 'theorist' Now we need to define an executor collection to handle the actual execution steps. - >>> from autora.experimentalist.pipeline import make_pipeline + >>> from autora.experimentalist.pipeline import make_pipeline, Pipeline >>> from autora.experimentalist.sampler.random import random_sampler >>> from functools import partial @@ -478,9 +473,9 @@ ... whereas we need some model for this sampler: >>> from autora.experimentalist.sampler.model_disagreement import model_disagreement_sampler - >>> experimentalist_which_needs_a_theory = make_pipeline([ - ... np.linspace(*metadata_2.independent_variables[0].value_range, 1_000), - ... partial(model_disagreement_sampler, num_samples=10)]) + >>> experimentalist_which_needs_a_theory = Pipeline([ + ... ('pool', np.linspace(*metadata_2.independent_variables[0].value_range, 1_000)), + ... ('sampler', partial(model_disagreement_sampler, num_samples=5)),]) >>> experimentalist_which_needs_a_theory() Traceback (most recent call last): ... @@ -511,7 +506,11 @@ ... ]) We need some special parameters to handle the main experimentalist, so we specify those: - >>> params = {"main_experimentalist": {"models": "%theories%"}} + >>> params = {"experimentalist": {"sampler": {"models": "%theories%"}}} + + Warning: the dictionary `{"sampler": {"models": "%theories%"}}` above is shared by + both the seed and main experimentalists. This behavior may change in future to allow separate + parameter dictionaries for each executor in the collection. We now instantiate the controller: >>> from autora.controller.base import BaseController @@ -524,33 +523,36 @@ >>> c # doctest: +ELLIPSIS <...BaseController object at 0x...> - On the first step, we generate a condition (as we expected): + >>> class PrintHandler(logging.Handler): + ... def emit(self, record): + ... print(self.format(record)) + + On the first step, we generate a condition sampled randomly across the whole domain (as we + expected): >>> next(c).state.history[-1] # doctest: +NORMALIZE_WHITESPACE Result(data=array([ 9.4994995 , -8.17817818, -1.19119119, 8.6986987 , 7.45745746, -6.93693694, 8.05805806, -1.45145145, -5.97597598, 1.57157157]), kind=ResultKind.CONDITION) - On the second step, we generate some new observations: - >>> next(c).state.history[-1] - Result(data=array([[ 9.4994995 , 34.1750017 ], - [ -8.17817818, -27.69687017], - [ -1.19119119, -3.24241572], - [ 8.6986987 , 31.3721989 ], - [ 7.45745746, 27.02785455], - [ -6.93693694, -23.35252583], - [ 8.05805806, 29.12995666], - [ -1.45145145, -4.15332663], - [ -5.97597598, -19.98916246], - [ 1.57157157, 6.42725395]]), kind=ResultKind.OBSERVATION) - - - On the third step, we generate a new theory: - >>> next(c).state.history[-1] - Result(data=LinearRegression(), kind=ResultKind.THEORY) - - On the fourth step, we switch to using the main experimentalist and generate some new - experimental data that way - >>> next(c).state.history[-1] + After three more steps, we generate a new condition, which again is sampled across the whole + domain. Here we iterate the controller until we've got two sets of conditions: + >>> _ = list(takewhile(lambda c: len(c.state.conditions) < 2, c)) + >>> c.state.history[-1] # doctest: +NORMALIZE_WHITESPACE + Result(data=array([ 1.57157157, -3.93393393, -0.47047047, -4.47447447, 8.43843844, + 6.17617618, -3.49349349, -8.998999 , 4.93493493, 2.25225225]), + kind=ResultKind.CONDITION) + + Once we have two theories: + >>> _ = list(takewhile(lambda c: len(c.state.theories) < 2, c)) + >>> c.state.theories + [LinearRegression(), LinearRegression()] + + ... when we run the next step, we'll get the main experimentalist, which samples five points + from the extreme parts of the problem domain where the disagreement between the two theories + is the greatest: + >>> next(c).state.history[-1] # doctest: +NORMALIZE_WHITESPACE + Result(data=array([-10. , -9.97997998, -9.95995996, -9.93993994, -9.91991992]), + kind=ResultKind.CONDITION) """ from .controller import Controller From 926448a5c1677651a8acff7e26e27ef8dac51d76 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Wed, 22 Mar 2023 17:34:44 -0400 Subject: [PATCH 8/9] docs: remove extra space in docstring --- autora/controller/cycle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autora/controller/cycle.py b/autora/controller/cycle.py index 9805c4757..90221069b 100644 --- a/autora/controller/cycle.py +++ b/autora/controller/cycle.py @@ -99,7 +99,6 @@ def params(self): 'theorist': {... params for theorist ...}} ``` - Examples: >>> from autora.controller.cycle import Cycle >>> p = {"some": "params"} From 50ca132e4c5bd3b04087dfe7861e30cc9658fab4 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Thu, 23 Mar 2023 17:30:52 -0400 Subject: [PATCH 9/9] refactor: use new params in Controllers --- autora/controller/__init__.py | 6 +----- autora/controller/base.py | 3 ++- autora/controller/cycle.py | 6 +++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/autora/controller/__init__.py b/autora/controller/__init__.py index 53645bc5b..75ba05847 100644 --- a/autora/controller/__init__.py +++ b/autora/controller/__init__.py @@ -506,11 +506,7 @@ ... ]) We need some special parameters to handle the main experimentalist, so we specify those: - >>> params = {"experimentalist": {"sampler": {"models": "%theories%"}}} - - Warning: the dictionary `{"sampler": {"models": "%theories%"}}` above is shared by - both the seed and main experimentalists. This behavior may change in future to allow separate - parameter dictionaries for each executor in the collection. + >>> params = {"main_experimentalist": {"sampler": {"models": "%theories%"}}} We now instantiate the controller: >>> from autora.controller.base import BaseController diff --git a/autora/controller/base.py b/autora/controller/base.py index 3c72196e8..fcb661698 100644 --- a/autora/controller/base.py +++ b/autora/controller/base.py @@ -68,9 +68,10 @@ def __next__(self): # Map next_function = self.executor_collection[next_function_name] + next_params = self.state.params.get(next_function_name, {}) # Execute - result = next_function(self.state) + result = next_function(self.state, params=next_params) # Update self.state = result diff --git a/autora/controller/cycle.py b/autora/controller/cycle.py index 90221069b..7b82a91f0 100644 --- a/autora/controller/cycle.py +++ b/autora/controller/cycle.py @@ -56,7 +56,7 @@ def __init__( conditions=[], observations=[], theories=[], - params=params, + params={"full_cycle": params}, ) planner = full_cycle_planner @@ -111,11 +111,11 @@ def params(self): >>> c.params {'new': 'value'} """ - return self.state.params + return self.state.params["full_cycle"] @params.setter def params(self, value): - self.state = self.state.update(params=value) + self.state = self.state.update(params={"full_cycle": value}) @property def theorist(self):