diff --git a/docs/cli/basic-usage/.gitignore b/docs/cli/basic-usage/.gitignore new file mode 100644 index 00000000..13974321 --- /dev/null +++ b/docs/cli/basic-usage/.gitignore @@ -0,0 +1 @@ +*.pkl diff --git a/docs/cli/basic-usage/README.md b/docs/cli/basic-usage/README.md new file mode 100644 index 00000000..0470d14b --- /dev/null +++ b/docs/cli/basic-usage/README.md @@ -0,0 +1,145 @@ +# Basic usage + +The command line interface allows us to load and save `States` and run arbitrary functions on them. + +## Setup + +To use the command line, we first define a package `example` containing the functions we want to run on the State: + +```python title="example/__init__.py" +--8<-- "https://raw.githubusercontent.com/AutoResearch/autora-core/main/docs/cli/basic-usage/example/__init__.py" +``` + + +```python title="example/lib.py" +--8<-- "https://raw.githubusercontent.com/AutoResearch/autora-core/main/docs/cli/basic-usage/example/lib.py" +``` + +We can run the pipeline of initialization, condition generation, experiment and theory building as follows. + +First we create an initial state file: + +```shell +python -m autora.workflow example.lib.initial_state --out-path initial.pkl +``` + +Next we run the condition generation: + + +```shell +python -m autora.workflow example.lib.experimentalist --in-path initial.pkl --out-path conditions.pkl +``` + +We run the experiment: + + +```shell +python -m autora.workflow example.lib.experiment_runner --in-path conditions.pkl --out-path experiment_data.pkl +``` + +And then the theorist: + + +```shell +python -m autora.workflow example.lib.theorist --in-path experiment_data.pkl --out-path model.pkl +``` + +We can interrogate the results by loading them into the current session. + + +```python +#!/usr/bin/env python +from autora.workflow.__main__ import load_state +state = load_state("model.pkl") +print(state) +# state = +# StandardState( +# variables=VariableCollection( +# independent_variables=[ +# Variable(name='x', +# value_range=None, +# allowed_values=array([-10. , -9.98, -9.96, ..., 9.96, 9.98, 10. ]), +# units='', +# type=, +# variable_label='', +# rescale=1, +# is_covariate=False) +# ], +# dependent_variables=[ +# Variable(name='y', +# value_range=None, +# allowed_values=None, +# units='', +# type=, +# variable_label='', +# rescale=1, +# is_covariate=False) +# ], +# covariates=[] +# ), +# conditions= x +# 342 -3.16 +# 869 7.38 +# 732 4.64 +# 387 -2.26 +# 919 8.38 +# 949 8.98 +# 539 0.78 +# 563 1.26 +# 855 7.10 +# 772 5.44, +# experiment_data= x y +# 0 -3.16 1.257587 +# 1 7.38 153.259915 +# 2 4.64 54.291348 +# 3 -2.26 10.374509 +# 4 8.38 155.483778 +# 5 8.98 183.774472 +# 6 0.78 3.154024 +# 7 1.26 14.033608 +# 8 7.10 103.032008 +# 9 5.44 94.629911, +# models=[ +# GridSearchCV( +# estimator=Pipeline(steps=[ +# ('polynomialfeatures', PolynomialFeatures()), +# ('linearregression', LinearRegression())]), +# param_grid={'polynomialfeatures__degree': [0, 1, 2, 3, 4]}, +# scoring='r2' +# ) +# ] +# ) +``` + +`state` is the following object which, once loaded, can be treated like any other `State` object. +For instance, we can plot the results: + +```python title="example/plot.py" +--8<-- "https://raw.githubusercontent.com/AutoResearch/autora-core/main/docs/cli/basic-usage/example/plot.py" +``` + +```shell +python -m example.plot model.pkl +``` + + +![png](img/after-one-cycle.png) + +If we instead run the experiment for 4 cycles, we can get results closer to the ground truth. + +```shell +set -x # echo each command + +python -m autora.workflow example.lib.initial_state --out-path "result.pkl" + +for i in {1..4} +do + python -m autora.workflow example.lib.experimentalist --in-path "result.pkl" --out-path "result.pkl" + python -m autora.workflow example.lib.experiment_runner --in-path "result.pkl" --out-path "result.pkl" + python -m autora.workflow example.lib.theorist --in-path "result.pkl" --out-path "result.pkl" +done + +python example.plot result.pkl +``` + +![png](img/after-four-cycles.png) diff --git a/docs/cli/basic-usage/Readme.ipynb b/docs/cli/basic-usage/Readme.ipynb deleted file mode 100644 index de71cfe0..00000000 --- a/docs/cli/basic-usage/Readme.ipynb +++ /dev/null @@ -1,643 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Basic usage\n", - "\n", - "The command line interface `python -m autora.workflow` allows us to load and save `States` and run arbitrary \n", - "functions on them." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "import numpy as np\r\n", - "import pandas as pd\r\n", - "from sklearn.linear_model import LinearRegression\r\n", - "\r\n", - "from autora.experimentalist.grid import grid_pool\r\n", - "from autora.state import StandardState, estimator_on_state, on_state\r\n", - "from autora.variable import Variable, VariableCollection\r\n", - "from sklearn.model_selection import GridSearchCV\r\n", - "from sklearn.pipeline import make_pipeline\r\n", - "from sklearn.preprocessing import PolynomialFeatures\r\n", - "\r\n", - "rng = np.random.default_rng()\r\n", - "\r\n", - "\r\n", - "def initial_state(_):\r\n", - " state = StandardState(\r\n", - " variables=VariableCollection(\r\n", - " independent_variables=[\r\n", - " Variable(name=\"x\", allowed_values=np.linspace(-10, +10, 1001))\r\n", - " ],\r\n", - " dependent_variables=[Variable(name=\"y\")],\r\n", - " covariates=[],\r\n", - " ),\r\n", - " conditions=None,\r\n", - " experiment_data=pd.DataFrame({\"x\": [], \"y\": []}),\r\n", - " models=[],\r\n", - " )\r\n", - " return state\r\n", - "\r\n", - "\r\n", - "@on_state(output=[\"conditions\"])\r\n", - "def experimentalist(variables):\r\n", - " conditions: pd.DataFrame = grid_pool(variables)\r\n", - " selected_conditions = conditions.sample(10, random_state=rng)\r\n", - " return selected_conditions\r\n", - "\r\n", - "\r\n", - "coefs = [2.0, 3.0, 1.0]\r\n", - "noise_std = 10.0\r\n", - "\r\n", - "\r\n", - "def ground_truth(x, coefs_=coefs):\r\n", - " return coefs_[0] * x**2.0 + coefs_[1] * x + coefs_[2]\r\n", - "\r\n", - "\r\n", - "@on_state(output=[\"experiment_data\"])\r\n", - "def experiment_runner(conditions, coefs_=coefs, noise_std_=noise_std, rng=rng):\r\n", - " experiment_data = conditions.assign(\r\n", - " y=(\r\n", - " ground_truth(conditions[\"x\"], coefs_=coefs_)\r\n", - " + rng.normal(0.0, noise_std_, size=conditions[\"x\"].shape)\r\n", - " )\r\n", - " )\r\n", - " return experiment_data\r\n", - "\r\n", - "\r\n", - "theorist = estimator_on_state(\r\n", - " GridSearchCV(\r\n", - " make_pipeline(PolynomialFeatures(), LinearRegression()),\r\n", - " param_grid={\"polynomialfeatures__degree\": [0, 1, 2, 3, 4]},\r\n", - " scoring=\"r2\",\r\n", - " )\r\n", - ")\r\n" - ] - } - ], - "source": [ - "%cat lib.py" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can run the pipeline of initialization, condition generation, experiment and theory building as follows.\n", - "\n", - "First we create an initial state file: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!python -m autora.workflow lib.initial_state --out-path initial.pkl" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next we run the condition generation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!python -m autora.workflow lib.experimentalist --in-path initial.pkl --out-path conditions.pkl" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We run the experiment:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!python -m autora.workflow lib.experiment_runner --in-path conditions.pkl --out-path experiment_data.pkl" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And then the theorist:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!python -m autora.workflow lib.theorist --in-path experiment_data.pkl --out-path model.pkl" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can interrogate the results by loading them into the current session." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "StandardState(variables=VariableCollection(independent_variables=[Variable(name='x', value_range=None, allowed_values=array([-10. , -9.98, -9.96, ..., 9.96, 9.98, 10. ]), units='', type=, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions= x\n", - "342 -3.16\n", - "869 7.38\n", - "732 4.64\n", - "387 -2.26\n", - "919 8.38\n", - "949 8.98\n", - "539 0.78\n", - "563 1.26\n", - "855 7.10\n", - "772 5.44, experiment_data= x y\n", - "0 -3.16 1.257587\n", - "1 7.38 153.259915\n", - "2 4.64 54.291348\n", - "3 -2.26 10.374509\n", - "4 8.38 155.483778\n", - "5 8.98 183.774472\n", - "6 0.78 3.154024\n", - "7 1.26 14.033608\n", - "8 7.10 103.032008\n", - "9 5.44 94.629911, models=[GridSearchCV(estimator=Pipeline(steps=[('polynomialfeatures',\n", - " PolynomialFeatures()),\n", - " ('linearregression',\n", - " LinearRegression())]),\n", - " param_grid={'polynomialfeatures__degree': [0, 1, 2, 3, 4]},\n", - " scoring='r2')])" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from autora.workflow.__main__ import load_state\n", - "s = load_state(\"model.pkl\")\n", - "s" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have three new fields in the state. The first is the conditions, a sample of ten values from between -10 and 10." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
x
342-3.16
8697.38
7324.64
387-2.26
9198.38
9498.98
5390.78
5631.26
8557.10
7725.44
\n", - "
" - ], - "text/plain": [ - " x\n", - "342 -3.16\n", - "869 7.38\n", - "732 4.64\n", - "387 -2.26\n", - "919 8.38\n", - "949 8.98\n", - "539 0.78\n", - "563 1.26\n", - "855 7.10\n", - "772 5.44" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.conditions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have experiment data corresponding to those conditions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
0-3.161.257587
17.38153.259915
24.6454.291348
3-2.2610.374509
48.38155.483778
58.98183.774472
60.783.154024
71.2614.033608
87.10103.032008
95.4494.629911
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 -3.16 1.257587\n", - "1 7.38 153.259915\n", - "2 4.64 54.291348\n", - "3 -2.26 10.374509\n", - "4 8.38 155.483778\n", - "5 8.98 183.774472\n", - "6 0.78 3.154024\n", - "7 1.26 14.033608\n", - "8 7.10 103.032008\n", - "9 5.44 94.629911" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.experiment_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have a best-fit linear model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
GridSearchCV(estimator=Pipeline(steps=[('polynomialfeatures',\n",
-       "                                        PolynomialFeatures()),\n",
-       "                                       ('linearregression',\n",
-       "                                        LinearRegression())]),\n",
-       "             param_grid={'polynomialfeatures__degree': [0, 1, 2, 3, 4]},\n",
-       "             scoring='r2')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "GridSearchCV(estimator=Pipeline(steps=[('polynomialfeatures',\n", - " PolynomialFeatures()),\n", - " ('linearregression',\n", - " LinearRegression())]),\n", - " param_grid={'polynomialfeatures__degree': [0, 1, 2, 3, 4]},\n", - " scoring='r2')" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "... which we can plot:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jholla10/Developer/autora-workflow/.venv/lib/python3.8/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but PolynomialFeatures was fitted with feature names\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "\n", - "from lib import ground_truth, noise_std\n", - "\n", - "def plot_results(state):\n", - " x = np.linspace(-10, 10, 100).reshape((-1, 1))\n", - " plt.plot(x, ground_truth(x), label=\"ground_truth\", c=\"orange\")\n", - " plt.fill_between(x.flatten(), ground_truth(x).flatten() + noise_std, ground_truth(x).flatten() - noise_std,\n", - " alpha=0.3, color=\"orange\")\n", - " \n", - " xi, yi = state.experiment_data[\"x\"], state.experiment_data[\"y\"]\n", - " plt.scatter(xi, yi, label=f\"observations\")\n", - " \n", - " plt.plot(x, state.model.predict(x), label=\"model\")\n", - " \n", - " plt.legend()\n", - "\n", - "plot_results(s)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we run the experiment for another 3 cycles, we can get results closer to the ground truth. In this example, we overwrite the results file each time the CLI is called." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!cp \"model.pkl\" \"result.pkl\"\n", - "for i in range(3):\n", - " !python -m autora.workflow lib.experimentalist --in-path \"result.pkl\" --out-path \"result.pkl\"\n", - " !python -m autora.workflow lib.experiment_runner --in-path \"result.pkl\" --out-path \"result.pkl\"\n", - " !python -m autora.workflow lib.theorist --in-path \"result.pkl\" --out-path \"result.pkl\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jholla10/Developer/autora-workflow/.venv/lib/python3.8/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but PolynomialFeatures was fitted with feature names\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "state_after_three_more_cycles = load_state(\"result.pkl\")\n", - "plot_results(state_after_three_more_cycles)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can continue to run for as long as we like, e.g. another 10 cycles:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jholla10/Developer/autora-workflow/.venv/lib/python3.8/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but PolynomialFeatures was fitted with feature names\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "!cp \"result.pkl\" \"result-10.pkl\"\n", - "for i in range(10):\n", - " !python -m autora.workflow lib.experimentalist --in-path \"result-10.pkl\" --out-path \"result-10.pkl\"\n", - " !python -m autora.workflow lib.experiment_runner --in-path \"result-10.pkl\" --out-path \"result-10.pkl\"\n", - " !python -m autora.workflow lib.theorist --in-path \"result-10.pkl\" --out-path \"result-10.pkl\"\n", - "\n", - "state_after_ten_more_cycles = load_state(\"result-10.pkl\")\n", - "\n", - "plot_results(state_after_ten_more_cycles)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/docs/cli/basic-usage/example/__init__.py b/docs/cli/basic-usage/example/__init__.py new file mode 100644 index 00000000..9fe79be5 --- /dev/null +++ b/docs/cli/basic-usage/example/__init__.py @@ -0,0 +1 @@ +# This __init__.py file is required to allow `plot` to import from `.lib` diff --git a/docs/cli/basic-usage/lib.py b/docs/cli/basic-usage/example/lib.py similarity index 100% rename from docs/cli/basic-usage/lib.py rename to docs/cli/basic-usage/example/lib.py diff --git a/docs/cli/basic-usage/example/plot.py b/docs/cli/basic-usage/example/plot.py new file mode 100644 index 00000000..4ccb4f48 --- /dev/null +++ b/docs/cli/basic-usage/example/plot.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +import pathlib + +import numpy as np +import pandas as pd +import typer +from matplotlib import pyplot as plt +from sklearn.model_selection import GridSearchCV + +from autora.state import StandardState +from autora.workflow.__main__ import load_state + +from .lib import ground_truth, noise_std + + +def plot_results(state: StandardState): + x = np.linspace(-10, 10, 100).reshape((-1, 1)) + plt.plot(x, ground_truth(x), label="ground_truth", c="orange") + plt.fill_between( + x.flatten(), + ground_truth(x).flatten() + noise_std, + ground_truth(x).flatten() - noise_std, + alpha=0.3, + color="orange", + ) + + assert isinstance(state.experiment_data, pd.DataFrame) + xi, yi = state.experiment_data["x"], state.experiment_data["y"] + plt.scatter(xi, yi, label="observations") + + assert isinstance(state.models[-1], GridSearchCV) + plt.plot(x, state.models[-1].predict(x), label="model") + + plt.xlabel("x") + plt.ylabel("y") + + plt.legend() + plt.show() + + +def main(filename: pathlib.Path): + state = load_state(filename) + assert isinstance(state, StandardState) + plot_results(state) + + +if __name__ == "__main__": + typer.run(main) diff --git a/docs/cli/basic-usage/img/after-four-cycles.png b/docs/cli/basic-usage/img/after-four-cycles.png new file mode 100644 index 00000000..75d36d29 Binary files /dev/null and b/docs/cli/basic-usage/img/after-four-cycles.png differ diff --git a/docs/cli/basic-usage/img/after-one-cycle.png b/docs/cli/basic-usage/img/after-one-cycle.png new file mode 100644 index 00000000..b98dd87f Binary files /dev/null and b/docs/cli/basic-usage/img/after-one-cycle.png differ diff --git a/mkdocs.yml b/mkdocs.yml index e16c828a..5aee1786 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,7 +28,7 @@ nav: - Cycle: - Functional: 'cycle/Linear and Cyclical Workflows using Functions and States.ipynb' - "Command Line Interface": - - "Basic Usage": "cli/basic-usage/Readme.ipynb" + - "Basic Usage": "cli/basic-usage/README.md" - "Using Cylc": "cli/cylc-pip/README.md" - "Using Cylc and Slurm": "cli/cylc-slurm-pip/README.md"