From e93b00262f70fce502161990d78f972652236e83 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 10:35:49 -0400 Subject: [PATCH 01/33] feat: add dumps interface to yaml_ module --- src/autora/workflow/serializer/yaml_.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/autora/workflow/serializer/yaml_.py b/src/autora/workflow/serializer/yaml_.py index 569cf38e..0d47e47d 100644 --- a/src/autora/workflow/serializer/yaml_.py +++ b/src/autora/workflow/serializer/yaml_.py @@ -6,6 +6,11 @@ def dump(data, file): return +def dumps(data): + yaml.dumps(data, Dumper=yaml.Dumper) + return + + def load(file): result = yaml.load(file, Loader=yaml.Loader) return result From 69159ab2acb03ff79366c5ce20ee674a93a4d764 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 10:45:35 -0400 Subject: [PATCH 02/33] feat: add ability to choose between dill, pickle and yaml loaders --- src/autora/workflow/__main__.py | 77 ++++++++++++++++++++++++++++----- tests/test_load_dump_state.py | 15 ++++--- 2 files changed, 75 insertions(+), 17 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index c04c8506..eabef6d3 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -1,9 +1,10 @@ import importlib import logging import pathlib -from typing import Optional, Union +from collections import namedtuple +from enum import Enum +from typing import Callable, Dict, Literal, Optional, Tuple, Union -import dill import typer from typing_extensions import Annotated @@ -12,6 +13,24 @@ _logger = logging.getLogger(__name__) +SerializerDef = namedtuple( + "SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] +) + + +serializer_dict: Dict[str, SerializerDef] = dict( + pickle=SerializerDef("pickle", "load", "dump", "dumps", "b"), + yaml=SerializerDef("autora.workflow.serializer.yaml_", "load", "dump", "dumps", ""), + dill=SerializerDef("dill", "load", "dump", "dumps", "b"), +) + + +class _Serializer(str, Enum): + dill = "dill" + pickle = "pickle" + yaml = "yaml" + + def main( fully_qualified_function_name: Annotated[ str, typer.Argument(help="Function to load") @@ -24,16 +43,28 @@ def main( Optional[pathlib.Path], typer.Option(help="Path to output the final state as a .dill file"), ] = None, + loader: Annotated[ + _Serializer, + typer.Option( + help="deserializer to use to load the data", + ), + ] = _Serializer.dill, + dumper: Annotated[ + _Serializer, + typer.Option( + help="serializer to use to save the data", + ), + ] = _Serializer.dill, verbose: Annotated[bool, typer.Option(help="Turns on info logging level.")] = False, debug: Annotated[bool, typer.Option(help="Turns on debug logging level.")] = False, ): _configure_logger(debug, verbose) - starting_state = _load_state(in_path) + starting_state = _load_state(in_path, loader) _logger.info(f"Starting State: {starting_state}") function = _load_function(fully_qualified_function_name) ending_state = function(starting_state) _logger.info(f"Ending State: {ending_state}") - _dump_state(ending_state, out_path) + _dump_state(ending_state, out_path, dumper) return @@ -47,11 +78,29 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def _load_state(path: Optional[pathlib.Path]) -> Union[State, None]: +def _get_serializer_mode( + serializer: _Serializer, interface: Literal["load", "dump", "dumps"] +) -> Tuple[Callable, str]: + serializer_def = serializer_dict[serializer] + module = serializer_def.module + interface_function_name = getattr(serializer_def, interface) + _logger.debug( + f"_get_serializer: loading {interface_function_name=} from" f" {module=}" + ) + module = importlib.import_module(module) + function = getattr(module, interface_function_name) + file_mode = serializer_def.file_mode + return function, file_mode + + +def _load_state( + path: Optional[pathlib.Path], loader: _Serializer = _Serializer.dill +) -> Union[State, None]: if path is not None: + load, file_mode = _get_serializer_mode(loader, "load") _logger.debug(f"_load_state: loading from {path=}") - with open(path, "rb") as f: - state_ = dill.load(f) + with open(path, f"r{file_mode}") as f: + state_ = load(f) else: _logger.debug(f"_load_state: {path=} -> returning None") state_ = None @@ -67,15 +116,21 @@ def _load_function(fully_qualified_function_name: str): return function -def _dump_state(state_: State, path: Optional[pathlib.Path]) -> None: +def _dump_state( + state_: State, + path: Optional[pathlib.Path], + dumper: _Serializer = _Serializer.dill, +) -> None: if path is not None: + dump, file_mode = _get_serializer_mode(dumper, "dump") _logger.debug(f"_dump_state: dumping to {path=}") path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "wb") as f: - dill.dump(state_, f) + with open(path, f"w{file_mode}") as f: + dump(state_, f) else: + dumps, _ = _get_serializer_mode(dumper, "dumps") _logger.debug(f"_dump_state: {path=} so writing to stdout") - print(dill.dumps(state_)) + print(dumps(state_)) return diff --git a/tests/test_load_dump_state.py b/tests/test_load_dump_state.py index 6c2b19c8..66ade7d4 100644 --- a/tests/test_load_dump_state.py +++ b/tests/test_load_dump_state.py @@ -1,20 +1,23 @@ import pathlib import tempfile +import uuid from hypothesis import Verbosity, given, settings from hypothesis import strategies as st from autora.state import StandardState -from autora.workflow.__main__ import _dump_state, _load_state +from autora.workflow.__main__ import _dump_state, _load_state, _Serializer @given( - st.builds(StandardState, st.text(), st.text(), st.text(), st.lists(st.integers())) + st.builds(StandardState, st.text(), st.text(), st.text(), st.lists(st.integers())), + st.sampled_from(_Serializer), ) @settings(verbosity=Verbosity.verbose) -def test_load_inverts_dump(s): +def test_load_inverts_dump(s, serializer): with tempfile.TemporaryDirectory() as dir: - path = pathlib.Path(dir, "x.dill") + path = pathlib.Path(dir, f"{str(uuid.uuid4())}.{serializer}") print(path, s) - _dump_state(s, path) - assert _load_state(path) == s + + _dump_state(s, path, dumper=serializer) + assert _load_state(path, loader=serializer) == s From f0e59483e7e5a78b7261082626b45a1ef687338e Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 11:53:07 -0400 Subject: [PATCH 03/33] refactor: move serializer code into a module --- src/autora/workflow/__main__.py | 58 +++++----------------- src/autora/workflow/serializer/__init__.py | 39 +++++++++++++++ tests/test_load_dump_state.py | 5 +- 3 files changed, 55 insertions(+), 47 deletions(-) create mode 100644 src/autora/workflow/serializer/__init__.py diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index eabef6d3..09789975 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -1,36 +1,18 @@ import importlib import logging import pathlib -from collections import namedtuple -from enum import Enum -from typing import Callable, Dict, Literal, Optional, Tuple, Union +from typing import Optional, Union import typer from typing_extensions import Annotated from autora.state import State +from autora.workflow.serializer import Supported as SerializersSupported +from autora.workflow.serializer import get_serializer_mode _logger = logging.getLogger(__name__) -SerializerDef = namedtuple( - "SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] -) - - -serializer_dict: Dict[str, SerializerDef] = dict( - pickle=SerializerDef("pickle", "load", "dump", "dumps", "b"), - yaml=SerializerDef("autora.workflow.serializer.yaml_", "load", "dump", "dumps", ""), - dill=SerializerDef("dill", "load", "dump", "dumps", "b"), -) - - -class _Serializer(str, Enum): - dill = "dill" - pickle = "pickle" - yaml = "yaml" - - def main( fully_qualified_function_name: Annotated[ str, typer.Argument(help="Function to load") @@ -44,17 +26,17 @@ def main( typer.Option(help="Path to output the final state as a .dill file"), ] = None, loader: Annotated[ - _Serializer, + SerializersSupported, typer.Option( help="deserializer to use to load the data", ), - ] = _Serializer.dill, + ] = SerializersSupported.dill, dumper: Annotated[ - _Serializer, + SerializersSupported, typer.Option( help="serializer to use to save the data", ), - ] = _Serializer.dill, + ] = SerializersSupported.dill, verbose: Annotated[bool, typer.Option(help="Turns on info logging level.")] = False, debug: Annotated[bool, typer.Option(help="Turns on debug logging level.")] = False, ): @@ -78,26 +60,12 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def _get_serializer_mode( - serializer: _Serializer, interface: Literal["load", "dump", "dumps"] -) -> Tuple[Callable, str]: - serializer_def = serializer_dict[serializer] - module = serializer_def.module - interface_function_name = getattr(serializer_def, interface) - _logger.debug( - f"_get_serializer: loading {interface_function_name=} from" f" {module=}" - ) - module = importlib.import_module(module) - function = getattr(module, interface_function_name) - file_mode = serializer_def.file_mode - return function, file_mode - - def _load_state( - path: Optional[pathlib.Path], loader: _Serializer = _Serializer.dill + path: Optional[pathlib.Path], + loader: SerializersSupported = SerializersSupported.dill, ) -> Union[State, None]: if path is not None: - load, file_mode = _get_serializer_mode(loader, "load") + load, file_mode = get_serializer_mode(loader, "load") _logger.debug(f"_load_state: loading from {path=}") with open(path, f"r{file_mode}") as f: state_ = load(f) @@ -119,16 +87,16 @@ def _load_function(fully_qualified_function_name: str): def _dump_state( state_: State, path: Optional[pathlib.Path], - dumper: _Serializer = _Serializer.dill, + dumper: SerializersSupported = SerializersSupported.dill, ) -> None: if path is not None: - dump, file_mode = _get_serializer_mode(dumper, "dump") + dump, file_mode = get_serializer_mode(dumper, "dump") _logger.debug(f"_dump_state: dumping to {path=}") path.parent.mkdir(parents=True, exist_ok=True) with open(path, f"w{file_mode}") as f: dump(state_, f) else: - dumps, _ = _get_serializer_mode(dumper, "dumps") + dumps, _ = get_serializer_mode(dumper, "dumps") _logger.debug(f"_dump_state: {path=} so writing to stdout") print(dumps(state_)) return diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py new file mode 100644 index 00000000..801e40c5 --- /dev/null +++ b/src/autora/workflow/serializer/__init__.py @@ -0,0 +1,39 @@ +import importlib +from collections import namedtuple +from enum import Enum +from typing import Callable, Dict, Literal, Tuple + +from autora.workflow.__main__ import _logger + + +class Supported(str, Enum): + dill = "dill" + pickle = "pickle" + yaml = "yaml" + + +_SerializerDef = namedtuple( + "_SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] +) +_serializer_dict: Dict[str, _SerializerDef] = dict( + pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), + yaml=_SerializerDef( + "autora.workflow.serializer.yaml_", "load", "dump", "dumps", "" + ), + dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), +) + + +def get_serializer_mode( + serializer: Supported, interface: Literal["load", "dump", "dumps"] +) -> Tuple[Callable, str]: + serializer_def = _serializer_dict[serializer] + module = serializer_def.module + interface_function_name = getattr(serializer_def, interface) + _logger.debug( + f"_get_serializer: loading {interface_function_name=} from" f" {module=}" + ) + module = importlib.import_module(module) + function = getattr(module, interface_function_name) + file_mode = serializer_def.file_mode + return function, file_mode diff --git a/tests/test_load_dump_state.py b/tests/test_load_dump_state.py index 66ade7d4..7433849c 100644 --- a/tests/test_load_dump_state.py +++ b/tests/test_load_dump_state.py @@ -6,12 +6,13 @@ from hypothesis import strategies as st from autora.state import StandardState -from autora.workflow.__main__ import _dump_state, _load_state, _Serializer +from autora.workflow.__main__ import _dump_state, _load_state +from autora.workflow.serializer import Supported @given( st.builds(StandardState, st.text(), st.text(), st.text(), st.lists(st.integers())), - st.sampled_from(_Serializer), + st.sampled_from(Supported), ) @settings(verbosity=Verbosity.verbose) def test_load_inverts_dump(s, serializer): From 5d5a7ee9865993a8c24bf48109df8a9341932c34 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 11:55:27 -0400 Subject: [PATCH 04/33] deps: move optional serializers to optional dependencies --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2a63a4fe..d04b3429 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,8 +15,6 @@ dependencies = [ "matplotlib", "pandas", "typer[all]", - "dill", - "pyyaml", ] [project.optional-dependencies] @@ -36,6 +34,10 @@ cylc = [ "cylc-flow", "cylc-uiserver" ] +serializers = [ + "dill", + "pyyaml" +] [project.urls] homepage = "http://www.empiricalresearch.ai/" From a7cb5a1206a6355ce754e0194b8396f040313f25 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 11:57:34 -0400 Subject: [PATCH 05/33] refactor: isolate logging for serializer module --- src/autora/workflow/serializer/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py index 801e40c5..c45c34c8 100644 --- a/src/autora/workflow/serializer/__init__.py +++ b/src/autora/workflow/serializer/__init__.py @@ -1,9 +1,10 @@ import importlib +import logging from collections import namedtuple from enum import Enum from typing import Callable, Dict, Literal, Tuple -from autora.workflow.__main__ import _logger +_logger = logging.getLogger(__name__) class Supported(str, Enum): @@ -31,7 +32,7 @@ def get_serializer_mode( module = serializer_def.module interface_function_name = getattr(serializer_def, interface) _logger.debug( - f"_get_serializer: loading {interface_function_name=} from" f" {module=}" + f"get_serializer_mode: loading {interface_function_name=} from" f" {module=}" ) module = importlib.import_module(module) function = getattr(module, interface_function_name) From bca7a0f67528f4520a91d31720c1dd35b84fb6d8 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 12:42:23 -0400 Subject: [PATCH 06/33] refactor: change variable names for loader and dumper to be clearer --- src/autora/workflow/__main__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 09789975..3fdade46 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -21,17 +21,17 @@ def main( Optional[pathlib.Path], typer.Option(help="Path to a .dill file with the initial state"), ] = None, - out_path: Annotated[ - Optional[pathlib.Path], - typer.Option(help="Path to output the final state as a .dill file"), - ] = None, - loader: Annotated[ + in_loader: Annotated[ SerializersSupported, typer.Option( help="deserializer to use to load the data", ), ] = SerializersSupported.dill, - dumper: Annotated[ + out_path: Annotated[ + Optional[pathlib.Path], + typer.Option(help="Path to output the final state as a .dill file"), + ] = None, + out_dumper: Annotated[ SerializersSupported, typer.Option( help="serializer to use to save the data", @@ -41,12 +41,12 @@ def main( debug: Annotated[bool, typer.Option(help="Turns on debug logging level.")] = False, ): _configure_logger(debug, verbose) - starting_state = _load_state(in_path, loader) + starting_state = _load_state(in_path, in_loader) _logger.info(f"Starting State: {starting_state}") function = _load_function(fully_qualified_function_name) ending_state = function(starting_state) _logger.info(f"Ending State: {ending_state}") - _dump_state(ending_state, out_path, dumper) + _dump_state(ending_state, out_path, out_dumper) return From 61bbcd79b25efeac37793c3643c3c1e135da665b Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 12:51:36 -0400 Subject: [PATCH 07/33] =?UTF-8?q?test:=20add=20CLI=20tests=20=E2=80=93=20n?= =?UTF-8?q?ominal,=20each=20serializer,=20and=20a=20mix=20of=20serializers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_cli.py | 182 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 tests/test_cli.py diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 00000000..e7d6c2be --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,182 @@ +import logging +import pathlib +import tempfile +from typing import Optional + +import numpy as np +import pandas as pd +from hypothesis import Verbosity, given, settings +from hypothesis import strategies as st +from sklearn.linear_model import LinearRegression + +from autora.experimentalist.grid import grid_pool +from autora.state import StandardState, State, estimator_on_state, on_state +from autora.variable import Variable, VariableCollection +from autora.workflow.__main__ import _load_state, main +from autora.workflow.serializer import Supported + +_logger = logging.getLogger(__name__) + + +def initial_state(_): + state = StandardState( + variables=VariableCollection( + independent_variables=[Variable(name="x", allowed_values=range(100))], + dependent_variables=[Variable(name="y")], + covariates=[], + ), + conditions=None, + experiment_data=pd.DataFrame({"x": [], "y": []}), + models=[], + ) + return state + + +experimentalist = on_state(grid_pool, output=["conditions"]) + +experiment_runner = on_state( + lambda conditions: conditions.assign(y=2 * conditions["x"] + 0.5), + output=["experiment_data"], +) + +theorist = estimator_on_state(LinearRegression(fit_intercept=True)) + + +def validate_model(state: Optional[State]): + assert state is not None + + assert state.conditions is not None + assert len(state.conditions) == 100 + + assert state.experiment_data is not None + assert len(state.experiment_data) == 100 + + assert state.model is not None + assert np.allclose(state.model.coef_, [[2.0]]) + assert np.allclose(state.model.intercept_, [[0.5]]) + + +def test_nominal(): + """Test a basic standard chain of CLI calls using the default serializer.""" + + with tempfile.TemporaryDirectory() as d: + main( + "test_cli.initial_state", + out_path=pathlib.Path(d, "start"), + ) + main( + "test_cli.experimentalist", + in_path=pathlib.Path(d, "start"), + out_path=pathlib.Path(d, "conditions"), + ) + main( + "test_cli.experiment_runner", + in_path=pathlib.Path(d, "conditions"), + out_path=pathlib.Path(d, "data"), + ) + main( + "test_cli.theorist", + in_path=pathlib.Path(d, "data"), + out_path=pathlib.Path(d, "theory"), + ) + + final_state = _load_state(pathlib.Path(d, "theory")) + validate_model(final_state) + + +@given(st.sampled_from(Supported), st.booleans(), st.booleans()) +def test_serializers(serializer, verbose, debug): + """Test a basic standard chain of CLI calls using a single serializer.""" + + common_settings = dict( + in_loader=serializer, out_dumper=serializer, verbose=verbose, debug=debug + ) + + with tempfile.TemporaryDirectory() as d: + main( + "test_cli.initial_state", + out_path=pathlib.Path(d, "start"), + **common_settings + ) + main( + "test_cli.experimentalist", + in_path=pathlib.Path(d, "start"), + out_path=pathlib.Path(d, "conditions"), + **common_settings + ) + main( + "test_cli.experiment_runner", + in_path=pathlib.Path(d, "conditions"), + out_path=pathlib.Path(d, "data"), + **common_settings + ) + main( + "test_cli.theorist", + in_path=pathlib.Path(d, "data"), + out_path=pathlib.Path(d, "theory"), + **common_settings + ) + + final_state: StandardState = _load_state( + pathlib.Path(d, "theory"), loader=serializer + ) + validate_model(final_state) + + +@given( + st.sampled_from(Supported), + st.sampled_from(Supported), + st.sampled_from(Supported), + st.sampled_from(Supported), + st.booleans(), + st.booleans(), +) +@settings(verbosity=Verbosity.verbose) +def test_valid_serializer_mix( + initial_serializer, + experimental_serializer, + experiment_runner_serializer, + theorist_serializer, + verbose, + debug, +): + """Test a basic standard chain of CLI calls using a mix of serializers.""" + + common_settings = dict(verbose=verbose, debug=debug) + + with tempfile.TemporaryDirectory() as d: + main( + "test_cli.initial_state", + out_path=pathlib.Path(d, "start"), + out_dumper=initial_serializer, + **common_settings + ) + main( + "test_cli.experimentalist", + in_path=pathlib.Path(d, "start"), + out_path=pathlib.Path(d, "conditions"), + in_loader=initial_serializer, + out_dumper=experimental_serializer, + **common_settings + ) + main( + "test_cli.experiment_runner", + in_path=pathlib.Path(d, "conditions"), + out_path=pathlib.Path(d, "data"), + in_loader=experimental_serializer, + out_dumper=experiment_runner_serializer, + **common_settings + ) + main( + "test_cli.theorist", + in_path=pathlib.Path(d, "data"), + out_path=pathlib.Path(d, "theory"), + in_loader=experiment_runner_serializer, + out_dumper=theorist_serializer, + **common_settings + ) + + final_state: StandardState = _load_state( + pathlib.Path(d, "theory"), loader=theorist_serializer + ) + validate_model(final_state) From 21ebf45d3569f79ff728993b13c7a1370298b4b4 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:01:41 -0400 Subject: [PATCH 08/33] test: add a docstring to one of the tests --- tests/test_cli.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index e7d6c2be..46a6ab13 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -57,7 +57,14 @@ def validate_model(state: Optional[State]): def test_nominal(): - """Test a basic standard chain of CLI calls using the default serializer.""" + """Test a basic standard chain of CLI calls using the default serializer. + + Equivalent to: + $ python -m autora.workflow test_cli.initial_state --out-path start + $ python -m autora.workflow test_cli.experimentalist --in-path start --out-path conditions + $ python -m autora.workflow test_cli.experiment_runner --in-path conditions --out-path data + $ python -m autora.workflow test_cli.theorist --in-path data --out-path theory + """ with tempfile.TemporaryDirectory() as d: main( From 8ff8095db1e70247bb6d2a596d054aecd5f454d7 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:07:26 -0400 Subject: [PATCH 09/33] deps: add serializers to test dependencies --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d04b3429..4cb9b30d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,8 @@ docs = [ ] test = [ "autora-core[test]>=4.0.0", - "hypothesis" + "hypothesis", + "serializers" ] cylc = [ "cylc-flow", From 39f4c29fd22b698bd27882ca3255a9d7ae8c0652 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:09:43 -0400 Subject: [PATCH 10/33] deps: add serializers to test dependencies --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4cb9b30d..9a172f28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ docs = [ test = [ "autora-core[test]>=4.0.0", "hypothesis", - "serializers" + "autora-workflow[serializers]" ] cylc = [ "cylc-flow", From a2d28b39fcd4ce30fba26e414353e3b4f67821da Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:17:15 -0400 Subject: [PATCH 11/33] test: extend deadline for test (which sometimes takes longer on GitHub) --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 46a6ab13..f09e51bf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,7 +138,7 @@ def test_serializers(serializer, verbose, debug): st.booleans(), st.booleans(), ) -@settings(verbosity=Verbosity.verbose) +@settings(verbosity=Verbosity.verbose, deadline=300) def test_valid_serializer_mix( initial_serializer, experimental_serializer, From 79e1120c175b2cc3977d8ee5d61e8d153f1852da Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:20:16 -0400 Subject: [PATCH 12/33] test: extend deadline for test (which sometimes takes longer on GitHub) --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index f09e51bf..312da7ea 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,7 +138,7 @@ def test_serializers(serializer, verbose, debug): st.booleans(), st.booleans(), ) -@settings(verbosity=Verbosity.verbose, deadline=300) +@settings(verbosity=Verbosity.verbose, deadline=500) def test_valid_serializer_mix( initial_serializer, experimental_serializer, From 29fa266ee24b0b5e214857fd1995032853c22b7c Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:23:55 -0400 Subject: [PATCH 13/33] test: extend deadline for test (which sometimes takes longer on GitHub) --- tests/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 312da7ea..5332e7f0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -92,6 +92,7 @@ def test_nominal(): @given(st.sampled_from(Supported), st.booleans(), st.booleans()) +@settings(verbosity=Verbosity.verbose, deadline=500) def test_serializers(serializer, verbose, debug): """Test a basic standard chain of CLI calls using a single serializer.""" From b3fb527288bae4d363ee3568b44b9709ed09db4c Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:28:49 -0400 Subject: [PATCH 14/33] refactor: move load_state and dump_state to serializer submodule --- src/autora/workflow/__main__.py | 39 ++-------------------- src/autora/workflow/serializer/__init__.py | 38 ++++++++++++++++++++- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 3fdade46..2753caa5 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -1,14 +1,13 @@ import importlib import logging import pathlib -from typing import Optional, Union +from typing import Optional import typer from typing_extensions import Annotated -from autora.state import State from autora.workflow.serializer import Supported as SerializersSupported -from autora.workflow.serializer import get_serializer_mode +from autora.workflow.serializer import _dump_state, _load_state _logger = logging.getLogger(__name__) @@ -40,6 +39,7 @@ def main( verbose: Annotated[bool, typer.Option(help="Turns on info logging level.")] = False, debug: Annotated[bool, typer.Option(help="Turns on debug logging level.")] = False, ): + """Run an arbitrary function (on an optional State object) and store the output.""" _configure_logger(debug, verbose) starting_state = _load_state(in_path, in_loader) _logger.info(f"Starting State: {starting_state}") @@ -60,21 +60,6 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def _load_state( - path: Optional[pathlib.Path], - loader: SerializersSupported = SerializersSupported.dill, -) -> Union[State, None]: - if path is not None: - load, file_mode = get_serializer_mode(loader, "load") - _logger.debug(f"_load_state: loading from {path=}") - with open(path, f"r{file_mode}") as f: - state_ = load(f) - else: - _logger.debug(f"_load_state: {path=} -> returning None") - state_ = None - return state_ - - def _load_function(fully_qualified_function_name: str): _logger.debug(f"_load_function: Loading function {fully_qualified_function_name}") module_name, function_name = fully_qualified_function_name.rsplit(".", 1) @@ -84,23 +69,5 @@ def _load_function(fully_qualified_function_name: str): return function -def _dump_state( - state_: State, - path: Optional[pathlib.Path], - dumper: SerializersSupported = SerializersSupported.dill, -) -> None: - if path is not None: - dump, file_mode = get_serializer_mode(dumper, "dump") - _logger.debug(f"_dump_state: dumping to {path=}") - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, f"w{file_mode}") as f: - dump(state_, f) - else: - dumps, _ = get_serializer_mode(dumper, "dumps") - _logger.debug(f"_dump_state: {path=} so writing to stdout") - print(dumps(state_)) - return - - if __name__ == "__main__": typer.run(main) diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py index c45c34c8..01bfbbcf 100644 --- a/src/autora/workflow/serializer/__init__.py +++ b/src/autora/workflow/serializer/__init__.py @@ -1,8 +1,11 @@ import importlib import logging +import pathlib from collections import namedtuple from enum import Enum -from typing import Callable, Dict, Literal, Tuple +from typing import Callable, Dict, Literal, Optional, Tuple, Union + +from autora.state import State _logger = logging.getLogger(__name__) @@ -38,3 +41,36 @@ def get_serializer_mode( function = getattr(module, interface_function_name) file_mode = serializer_def.file_mode return function, file_mode + + +def _load_state( + path: Optional[pathlib.Path], + loader: Supported = Supported.dill, +) -> Union[State, None]: + if path is not None: + load, file_mode = get_serializer_mode(loader, "load") + _logger.debug(f"_load_state: loading from {path=}") + with open(path, f"r{file_mode}") as f: + state_ = load(f) + else: + _logger.debug(f"_load_state: {path=} -> returning None") + state_ = None + return state_ + + +def _dump_state( + state_: State, + path: Optional[pathlib.Path], + dumper: Supported = Supported.dill, +) -> None: + if path is not None: + dump, file_mode = get_serializer_mode(dumper, "dump") + _logger.debug(f"_dump_state: dumping to {path=}") + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, f"w{file_mode}") as f: + dump(state_, f) + else: + dumps, _ = get_serializer_mode(dumper, "dumps") + _logger.debug(f"_dump_state: {path=} so writing to stdout") + print(dumps(state_)) + return From 717c78ab94945b7939a9c09c752e94aa38938c46 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:30:59 -0400 Subject: [PATCH 15/33] refactor: rename dump_state --- src/autora/workflow/__main__.py | 4 ++-- src/autora/workflow/serializer/__init__.py | 6 +++--- tests/test_cli.py | 4 ++-- tests/test_load_dump_state.py | 5 ++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 2753caa5..15bc4796 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -7,7 +7,7 @@ from typing_extensions import Annotated from autora.workflow.serializer import Supported as SerializersSupported -from autora.workflow.serializer import _dump_state, _load_state +from autora.workflow.serializer import _load_state, dump_state _logger = logging.getLogger(__name__) @@ -46,7 +46,7 @@ def main( function = _load_function(fully_qualified_function_name) ending_state = function(starting_state) _logger.info(f"Ending State: {ending_state}") - _dump_state(ending_state, out_path, out_dumper) + dump_state(ending_state, out_path, out_dumper) return diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py index 01bfbbcf..1d529637 100644 --- a/src/autora/workflow/serializer/__init__.py +++ b/src/autora/workflow/serializer/__init__.py @@ -58,19 +58,19 @@ def _load_state( return state_ -def _dump_state( +def dump_state( state_: State, path: Optional[pathlib.Path], dumper: Supported = Supported.dill, ) -> None: if path is not None: dump, file_mode = get_serializer_mode(dumper, "dump") - _logger.debug(f"_dump_state: dumping to {path=}") + _logger.debug(f"dump_state: dumping to {path=}") path.parent.mkdir(parents=True, exist_ok=True) with open(path, f"w{file_mode}") as f: dump(state_, f) else: dumps, _ = get_serializer_mode(dumper, "dumps") - _logger.debug(f"_dump_state: {path=} so writing to stdout") + _logger.debug(f"dump_state: {path=} so writing to stdout") print(dumps(state_)) return diff --git a/tests/test_cli.py b/tests/test_cli.py index 5332e7f0..93d288de 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,8 +12,8 @@ from autora.experimentalist.grid import grid_pool from autora.state import StandardState, State, estimator_on_state, on_state from autora.variable import Variable, VariableCollection -from autora.workflow.__main__ import _load_state, main -from autora.workflow.serializer import Supported +from autora.workflow.__main__ import main +from autora.workflow.serializer import Supported, _load_state _logger = logging.getLogger(__name__) diff --git a/tests/test_load_dump_state.py b/tests/test_load_dump_state.py index 7433849c..ec01cc55 100644 --- a/tests/test_load_dump_state.py +++ b/tests/test_load_dump_state.py @@ -6,8 +6,7 @@ from hypothesis import strategies as st from autora.state import StandardState -from autora.workflow.__main__ import _dump_state, _load_state -from autora.workflow.serializer import Supported +from autora.workflow.serializer import Supported, _load_state, dump_state @given( @@ -20,5 +19,5 @@ def test_load_inverts_dump(s, serializer): path = pathlib.Path(dir, f"{str(uuid.uuid4())}.{serializer}") print(path, s) - _dump_state(s, path, dumper=serializer) + dump_state(s, path, dumper=serializer) assert _load_state(path, loader=serializer) == s From cb6214277a7b619e29c8fad2ccb9f92a509c75c8 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:31:31 -0400 Subject: [PATCH 16/33] refactor: rename load_state --- src/autora/workflow/__main__.py | 4 ++-- src/autora/workflow/serializer/__init__.py | 6 +++--- tests/test_cli.py | 8 ++++---- tests/test_load_dump_state.py | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 15bc4796..9feaf6df 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -7,7 +7,7 @@ from typing_extensions import Annotated from autora.workflow.serializer import Supported as SerializersSupported -from autora.workflow.serializer import _load_state, dump_state +from autora.workflow.serializer import dump_state, load_state _logger = logging.getLogger(__name__) @@ -41,7 +41,7 @@ def main( ): """Run an arbitrary function (on an optional State object) and store the output.""" _configure_logger(debug, verbose) - starting_state = _load_state(in_path, in_loader) + starting_state = load_state(in_path, in_loader) _logger.info(f"Starting State: {starting_state}") function = _load_function(fully_qualified_function_name) ending_state = function(starting_state) diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py index 1d529637..24da0ed6 100644 --- a/src/autora/workflow/serializer/__init__.py +++ b/src/autora/workflow/serializer/__init__.py @@ -43,17 +43,17 @@ def get_serializer_mode( return function, file_mode -def _load_state( +def load_state( path: Optional[pathlib.Path], loader: Supported = Supported.dill, ) -> Union[State, None]: if path is not None: load, file_mode = get_serializer_mode(loader, "load") - _logger.debug(f"_load_state: loading from {path=}") + _logger.debug(f"load_state: loading from {path=}") with open(path, f"r{file_mode}") as f: state_ = load(f) else: - _logger.debug(f"_load_state: {path=} -> returning None") + _logger.debug(f"load_state: {path=} -> returning None") state_ = None return state_ diff --git a/tests/test_cli.py b/tests/test_cli.py index 93d288de..8242d89c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -13,7 +13,7 @@ from autora.state import StandardState, State, estimator_on_state, on_state from autora.variable import Variable, VariableCollection from autora.workflow.__main__ import main -from autora.workflow.serializer import Supported, _load_state +from autora.workflow.serializer import Supported, load_state _logger = logging.getLogger(__name__) @@ -87,7 +87,7 @@ def test_nominal(): out_path=pathlib.Path(d, "theory"), ) - final_state = _load_state(pathlib.Path(d, "theory")) + final_state = load_state(pathlib.Path(d, "theory")) validate_model(final_state) @@ -125,7 +125,7 @@ def test_serializers(serializer, verbose, debug): **common_settings ) - final_state: StandardState = _load_state( + final_state: StandardState = load_state( pathlib.Path(d, "theory"), loader=serializer ) validate_model(final_state) @@ -184,7 +184,7 @@ def test_valid_serializer_mix( **common_settings ) - final_state: StandardState = _load_state( + final_state: StandardState = load_state( pathlib.Path(d, "theory"), loader=theorist_serializer ) validate_model(final_state) diff --git a/tests/test_load_dump_state.py b/tests/test_load_dump_state.py index ec01cc55..93567a07 100644 --- a/tests/test_load_dump_state.py +++ b/tests/test_load_dump_state.py @@ -6,7 +6,7 @@ from hypothesis import strategies as st from autora.state import StandardState -from autora.workflow.serializer import Supported, _load_state, dump_state +from autora.workflow.serializer import Supported, dump_state, load_state @given( @@ -20,4 +20,4 @@ def test_load_inverts_dump(s, serializer): print(path, s) dump_state(s, path, dumper=serializer) - assert _load_state(path, loader=serializer) == s + assert load_state(path, loader=serializer) == s From 447f6a65c4856ff07270e927f6ce9e968e73aad3 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:32:30 -0400 Subject: [PATCH 17/33] refactor: move load_function to serializer module --- src/autora/workflow/__main__.py | 12 +----------- src/autora/workflow/serializer/__init__.py | 9 +++++++++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 9feaf6df..194a351b 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -1,4 +1,3 @@ -import importlib import logging import pathlib from typing import Optional @@ -7,7 +6,7 @@ from typing_extensions import Annotated from autora.workflow.serializer import Supported as SerializersSupported -from autora.workflow.serializer import dump_state, load_state +from autora.workflow.serializer import _load_function, dump_state, load_state _logger = logging.getLogger(__name__) @@ -60,14 +59,5 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def _load_function(fully_qualified_function_name: str): - _logger.debug(f"_load_function: Loading function {fully_qualified_function_name}") - module_name, function_name = fully_qualified_function_name.rsplit(".", 1) - module = importlib.import_module(module_name) - function = getattr(module, function_name) - _logger.debug(f"_load_function: Loaded function {function} from {module}") - return function - - if __name__ == "__main__": typer.run(main) diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py index 24da0ed6..ec1ed7ea 100644 --- a/src/autora/workflow/serializer/__init__.py +++ b/src/autora/workflow/serializer/__init__.py @@ -74,3 +74,12 @@ def dump_state( _logger.debug(f"dump_state: {path=} so writing to stdout") print(dumps(state_)) return + + +def _load_function(fully_qualified_function_name: str): + _logger.debug(f"_load_function: Loading function {fully_qualified_function_name}") + module_name, function_name = fully_qualified_function_name.rsplit(".", 1) + module = importlib.import_module(module_name) + function = getattr(module, function_name) + _logger.debug(f"_load_function: Loaded function {function} from {module}") + return function From 5c5b7bf2d077fc3d7993ad0eb0e21e4132ea1255 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:33:07 -0400 Subject: [PATCH 18/33] refactor: rename load_function --- src/autora/workflow/__main__.py | 4 ++-- src/autora/workflow/serializer/__init__.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 194a351b..25bdff72 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -6,7 +6,7 @@ from typing_extensions import Annotated from autora.workflow.serializer import Supported as SerializersSupported -from autora.workflow.serializer import _load_function, dump_state, load_state +from autora.workflow.serializer import dump_state, load_function, load_state _logger = logging.getLogger(__name__) @@ -42,7 +42,7 @@ def main( _configure_logger(debug, verbose) starting_state = load_state(in_path, in_loader) _logger.info(f"Starting State: {starting_state}") - function = _load_function(fully_qualified_function_name) + function = load_function(fully_qualified_function_name) ending_state = function(starting_state) _logger.info(f"Ending State: {ending_state}") dump_state(ending_state, out_path, out_dumper) diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py index ec1ed7ea..9f479495 100644 --- a/src/autora/workflow/serializer/__init__.py +++ b/src/autora/workflow/serializer/__init__.py @@ -76,10 +76,10 @@ def dump_state( return -def _load_function(fully_qualified_function_name: str): - _logger.debug(f"_load_function: Loading function {fully_qualified_function_name}") +def load_function(fully_qualified_function_name: str): + _logger.debug(f"load_function: Loading function {fully_qualified_function_name}") module_name, function_name = fully_qualified_function_name.rsplit(".", 1) module = importlib.import_module(module_name) function = getattr(module, function_name) - _logger.debug(f"_load_function: Loaded function {function} from {module}") + _logger.debug(f"load_function: Loaded function {function} from {module}") return function From 323ac6a5f56df3bc5f8daeb05558caba232b2499 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 13:59:18 -0400 Subject: [PATCH 19/33] refactor: merge everything back into __main__ --- src/autora/workflow/__main__.py | 83 ++++++++++++++++++++- src/autora/workflow/serializer/__init__.py | 85 ---------------------- tests/test_cli.py | 13 ++-- tests/test_load_dump_state.py | 4 +- 4 files changed, 88 insertions(+), 97 deletions(-) delete mode 100644 src/autora/workflow/serializer/__init__.py diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 25bdff72..a3d23ba7 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -1,16 +1,36 @@ +import importlib import logging import pathlib -from typing import Optional +from collections import namedtuple +from enum import Enum +from typing import Callable, Dict, Literal, Optional, Tuple, Union import typer from typing_extensions import Annotated -from autora.workflow.serializer import Supported as SerializersSupported -from autora.workflow.serializer import dump_state, load_function, load_state +from autora.state import State _logger = logging.getLogger(__name__) +class SerializersSupported(str, Enum): + dill = "dill" + pickle = "pickle" + yaml = "yaml" + + +_SerializerDef = namedtuple( + "_SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] +) +_serializer_dict: Dict[str, _SerializerDef] = dict( + pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), + yaml=_SerializerDef( + "autora.workflow.serializer.yaml_", "load", "dump", "dumps", "" + ), + dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), +) + + def main( fully_qualified_function_name: Annotated[ str, typer.Argument(help="Function to load") @@ -59,5 +79,62 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") +def get_serializer_mode( + serializer: SerializersSupported, interface: Literal["load", "dump", "dumps"] +) -> Tuple[Callable, str]: + serializer_def = _serializer_dict[serializer] + module = serializer_def.module + interface_function_name = getattr(serializer_def, interface) + _logger.debug( + f"get_serializer_mode: loading {interface_function_name=} from" f" {module=}" + ) + module = importlib.import_module(module) + function = getattr(module, interface_function_name) + file_mode = serializer_def.file_mode + return function, file_mode + + +def load_state( + path: Optional[pathlib.Path], + loader: SerializersSupported = SerializersSupported.dill, +) -> Union[State, None]: + if path is not None: + load, file_mode = get_serializer_mode(loader, "load") + _logger.debug(f"load_state: loading from {path=}") + with open(path, f"r{file_mode}") as f: + state_ = load(f) + else: + _logger.debug(f"load_state: {path=} -> returning None") + state_ = None + return state_ + + +def load_function(fully_qualified_function_name: str): + _logger.debug(f"load_function: Loading function {fully_qualified_function_name}") + module_name, function_name = fully_qualified_function_name.rsplit(".", 1) + module = importlib.import_module(module_name) + function = getattr(module, function_name) + _logger.debug(f"load_function: Loaded function {function} from {module}") + return function + + +def dump_state( + state_: State, + path: Optional[pathlib.Path], + dumper: SerializersSupported = SerializersSupported.dill, +) -> None: + if path is not None: + dump, file_mode = get_serializer_mode(dumper, "dump") + _logger.debug(f"dump_state: dumping to {path=}") + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, f"w{file_mode}") as f: + dump(state_, f) + else: + dumps, _ = get_serializer_mode(dumper, "dumps") + _logger.debug(f"dump_state: {path=} so writing to stdout") + print(dumps(state_)) + return + + if __name__ == "__main__": typer.run(main) diff --git a/src/autora/workflow/serializer/__init__.py b/src/autora/workflow/serializer/__init__.py deleted file mode 100644 index 9f479495..00000000 --- a/src/autora/workflow/serializer/__init__.py +++ /dev/null @@ -1,85 +0,0 @@ -import importlib -import logging -import pathlib -from collections import namedtuple -from enum import Enum -from typing import Callable, Dict, Literal, Optional, Tuple, Union - -from autora.state import State - -_logger = logging.getLogger(__name__) - - -class Supported(str, Enum): - dill = "dill" - pickle = "pickle" - yaml = "yaml" - - -_SerializerDef = namedtuple( - "_SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] -) -_serializer_dict: Dict[str, _SerializerDef] = dict( - pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), - yaml=_SerializerDef( - "autora.workflow.serializer.yaml_", "load", "dump", "dumps", "" - ), - dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), -) - - -def get_serializer_mode( - serializer: Supported, interface: Literal["load", "dump", "dumps"] -) -> Tuple[Callable, str]: - serializer_def = _serializer_dict[serializer] - module = serializer_def.module - interface_function_name = getattr(serializer_def, interface) - _logger.debug( - f"get_serializer_mode: loading {interface_function_name=} from" f" {module=}" - ) - module = importlib.import_module(module) - function = getattr(module, interface_function_name) - file_mode = serializer_def.file_mode - return function, file_mode - - -def load_state( - path: Optional[pathlib.Path], - loader: Supported = Supported.dill, -) -> Union[State, None]: - if path is not None: - load, file_mode = get_serializer_mode(loader, "load") - _logger.debug(f"load_state: loading from {path=}") - with open(path, f"r{file_mode}") as f: - state_ = load(f) - else: - _logger.debug(f"load_state: {path=} -> returning None") - state_ = None - return state_ - - -def dump_state( - state_: State, - path: Optional[pathlib.Path], - dumper: Supported = Supported.dill, -) -> None: - if path is not None: - dump, file_mode = get_serializer_mode(dumper, "dump") - _logger.debug(f"dump_state: dumping to {path=}") - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, f"w{file_mode}") as f: - dump(state_, f) - else: - dumps, _ = get_serializer_mode(dumper, "dumps") - _logger.debug(f"dump_state: {path=} so writing to stdout") - print(dumps(state_)) - return - - -def load_function(fully_qualified_function_name: str): - _logger.debug(f"load_function: Loading function {fully_qualified_function_name}") - module_name, function_name = fully_qualified_function_name.rsplit(".", 1) - module = importlib.import_module(module_name) - function = getattr(module, function_name) - _logger.debug(f"load_function: Loaded function {function} from {module}") - return function diff --git a/tests/test_cli.py b/tests/test_cli.py index 8242d89c..9720dcc9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,8 +12,7 @@ from autora.experimentalist.grid import grid_pool from autora.state import StandardState, State, estimator_on_state, on_state from autora.variable import Variable, VariableCollection -from autora.workflow.__main__ import main -from autora.workflow.serializer import Supported, load_state +from autora.workflow.__main__ import SerializersSupported, load_state, main _logger = logging.getLogger(__name__) @@ -91,7 +90,7 @@ def test_nominal(): validate_model(final_state) -@given(st.sampled_from(Supported), st.booleans(), st.booleans()) +@given(st.sampled_from(SerializersSupported), st.booleans(), st.booleans()) @settings(verbosity=Verbosity.verbose, deadline=500) def test_serializers(serializer, verbose, debug): """Test a basic standard chain of CLI calls using a single serializer.""" @@ -132,10 +131,10 @@ def test_serializers(serializer, verbose, debug): @given( - st.sampled_from(Supported), - st.sampled_from(Supported), - st.sampled_from(Supported), - st.sampled_from(Supported), + st.sampled_from(SerializersSupported), + st.sampled_from(SerializersSupported), + st.sampled_from(SerializersSupported), + st.sampled_from(SerializersSupported), st.booleans(), st.booleans(), ) diff --git a/tests/test_load_dump_state.py b/tests/test_load_dump_state.py index 93567a07..fb861470 100644 --- a/tests/test_load_dump_state.py +++ b/tests/test_load_dump_state.py @@ -6,12 +6,12 @@ from hypothesis import strategies as st from autora.state import StandardState -from autora.workflow.serializer import Supported, dump_state, load_state +from autora.workflow.__main__ import SerializersSupported, dump_state, load_state @given( st.builds(StandardState, st.text(), st.text(), st.text(), st.lists(st.integers())), - st.sampled_from(Supported), + st.sampled_from(SerializersSupported), ) @settings(verbosity=Verbosity.verbose) def test_load_inverts_dump(s, serializer): From 591722b1421f4fdb6b10c2b76129c15bd542f504 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 14:00:10 -0400 Subject: [PATCH 20/33] chore: update docstrings --- src/autora/workflow/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index a3d23ba7..14d97f42 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -37,7 +37,7 @@ def main( ], in_path: Annotated[ Optional[pathlib.Path], - typer.Option(help="Path to a .dill file with the initial state"), + typer.Option(help="Path to a file with the initial state"), ] = None, in_loader: Annotated[ SerializersSupported, @@ -47,7 +47,7 @@ def main( ] = SerializersSupported.dill, out_path: Annotated[ Optional[pathlib.Path], - typer.Option(help="Path to output the final state as a .dill file"), + typer.Option(help="Path to output the final state"), ] = None, out_dumper: Annotated[ SerializersSupported, From eb4b2b2e72f7369926382f249bf97f3e61e05a29 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 15:18:11 -0400 Subject: [PATCH 21/33] chore: update docstrings --- src/autora/workflow/__main__.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 14d97f42..966bb1a7 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -58,7 +58,7 @@ def main( verbose: Annotated[bool, typer.Option(help="Turns on info logging level.")] = False, debug: Annotated[bool, typer.Option(help="Turns on debug logging level.")] = False, ): - """Run an arbitrary function (on an optional State object) and store the output.""" + """Run an arbitrary function on an optional input State object and save the output.""" _configure_logger(debug, verbose) starting_state = load_state(in_path, in_loader) _logger.info(f"Starting State: {starting_state}") @@ -79,14 +79,14 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def get_serializer_mode( +def _get_serializer_mode( serializer: SerializersSupported, interface: Literal["load", "dump", "dumps"] ) -> Tuple[Callable, str]: serializer_def = _serializer_dict[serializer] module = serializer_def.module interface_function_name = getattr(serializer_def, interface) _logger.debug( - f"get_serializer_mode: loading {interface_function_name=} from" f" {module=}" + f"_get_serializer_mode: loading {interface_function_name=} from" f" {module=}" ) module = importlib.import_module(module) function = getattr(module, interface_function_name) @@ -98,8 +98,9 @@ def load_state( path: Optional[pathlib.Path], loader: SerializersSupported = SerializersSupported.dill, ) -> Union[State, None]: + """Load a State object from a path.""" if path is not None: - load, file_mode = get_serializer_mode(loader, "load") + load, file_mode = _get_serializer_mode(loader, "load") _logger.debug(f"load_state: loading from {path=}") with open(path, f"r{file_mode}") as f: state_ = load(f) @@ -110,6 +111,7 @@ def load_state( def load_function(fully_qualified_function_name: str): + """Load a function by its fully qualified name, `module.function_name`""" _logger.debug(f"load_function: Loading function {fully_qualified_function_name}") module_name, function_name = fully_qualified_function_name.rsplit(".", 1) module = importlib.import_module(module_name) @@ -123,14 +125,15 @@ def dump_state( path: Optional[pathlib.Path], dumper: SerializersSupported = SerializersSupported.dill, ) -> None: + """Write a State object to a path.""" if path is not None: - dump, file_mode = get_serializer_mode(dumper, "dump") + dump, file_mode = _get_serializer_mode(dumper, "dump") _logger.debug(f"dump_state: dumping to {path=}") path.parent.mkdir(parents=True, exist_ok=True) with open(path, f"w{file_mode}") as f: dump(state_, f) else: - dumps, _ = get_serializer_mode(dumper, "dumps") + dumps, _ = _get_serializer_mode(dumper, "dumps") _logger.debug(f"dump_state: {path=} so writing to stdout") print(dumps(state_)) return From ddb37dd11312952671d211669d4ee0b310c6f683 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 15:53:02 -0400 Subject: [PATCH 22/33] chore: update docstrings --- src/autora/workflow/__main__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 966bb1a7..050c4bac 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -14,6 +14,8 @@ class SerializersSupported(str, Enum): + """Listing of allowed serializers.""" + dill = "dill" pickle = "pickle" yaml = "yaml" @@ -33,7 +35,10 @@ class SerializersSupported(str, Enum): def main( fully_qualified_function_name: Annotated[ - str, typer.Argument(help="Function to load") + str, + typer.Argument( + help="Fully qualified name of the function to load, like `module.function`" + ), ], in_path: Annotated[ Optional[pathlib.Path], @@ -42,7 +47,7 @@ def main( in_loader: Annotated[ SerializersSupported, typer.Option( - help="deserializer to use to load the data", + help="(de)serializer to load the data", ), ] = SerializersSupported.dill, out_path: Annotated[ @@ -52,7 +57,7 @@ def main( out_dumper: Annotated[ SerializersSupported, typer.Option( - help="serializer to use to save the data", + help="serializer to save the data", ), ] = SerializersSupported.dill, verbose: Annotated[bool, typer.Option(help="Turns on info logging level.")] = False, From 0ac23261970928d57ae11b82673acbeb939952c1 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 15:53:53 -0400 Subject: [PATCH 23/33] test: simplify filename --- tests/test_load_dump_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_load_dump_state.py b/tests/test_load_dump_state.py index fb861470..6927693d 100644 --- a/tests/test_load_dump_state.py +++ b/tests/test_load_dump_state.py @@ -16,7 +16,7 @@ @settings(verbosity=Verbosity.verbose) def test_load_inverts_dump(s, serializer): with tempfile.TemporaryDirectory() as dir: - path = pathlib.Path(dir, f"{str(uuid.uuid4())}.{serializer}") + path = pathlib.Path(dir, f"{str(uuid.uuid4())}") print(path, s) dump_state(s, path, dumper=serializer) From ee77932d21d3dc09bd099cc1c7e67a512adf8a1c Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 15:59:40 -0400 Subject: [PATCH 24/33] test: rename test files --- tests/{test_cli.py => test_system.py} | 32 +++++++++---------- ..._state.py => test_unit_state_load_dump.py} | 0 2 files changed, 16 insertions(+), 16 deletions(-) rename tests/{test_cli.py => test_system.py} (85%) rename tests/{test_load_dump_state.py => test_unit_state_load_dump.py} (100%) diff --git a/tests/test_cli.py b/tests/test_system.py similarity index 85% rename from tests/test_cli.py rename to tests/test_system.py index 9720dcc9..ab8b9c6a 100644 --- a/tests/test_cli.py +++ b/tests/test_system.py @@ -59,29 +59,29 @@ def test_nominal(): """Test a basic standard chain of CLI calls using the default serializer. Equivalent to: - $ python -m autora.workflow test_cli.initial_state --out-path start - $ python -m autora.workflow test_cli.experimentalist --in-path start --out-path conditions - $ python -m autora.workflow test_cli.experiment_runner --in-path conditions --out-path data - $ python -m autora.workflow test_cli.theorist --in-path data --out-path theory + $ python -m autora.workflow test_system.initial_state --out-path start + $ python -m autora.workflow test_system.experimentalist --in-path start --out-path conditions + $ python -m autora.workflow test_system.experiment_runner --in-path conditions --out-path data + $ python -m autora.workflow test_system.theorist --in-path data --out-path theory """ with tempfile.TemporaryDirectory() as d: main( - "test_cli.initial_state", + "test_system.initial_state", out_path=pathlib.Path(d, "start"), ) main( - "test_cli.experimentalist", + "test_system.experimentalist", in_path=pathlib.Path(d, "start"), out_path=pathlib.Path(d, "conditions"), ) main( - "test_cli.experiment_runner", + "test_system.experiment_runner", in_path=pathlib.Path(d, "conditions"), out_path=pathlib.Path(d, "data"), ) main( - "test_cli.theorist", + "test_system.theorist", in_path=pathlib.Path(d, "data"), out_path=pathlib.Path(d, "theory"), ) @@ -101,24 +101,24 @@ def test_serializers(serializer, verbose, debug): with tempfile.TemporaryDirectory() as d: main( - "test_cli.initial_state", + "test_system.initial_state", out_path=pathlib.Path(d, "start"), **common_settings ) main( - "test_cli.experimentalist", + "test_system.experimentalist", in_path=pathlib.Path(d, "start"), out_path=pathlib.Path(d, "conditions"), **common_settings ) main( - "test_cli.experiment_runner", + "test_system.experiment_runner", in_path=pathlib.Path(d, "conditions"), out_path=pathlib.Path(d, "data"), **common_settings ) main( - "test_cli.theorist", + "test_system.theorist", in_path=pathlib.Path(d, "data"), out_path=pathlib.Path(d, "theory"), **common_settings @@ -153,13 +153,13 @@ def test_valid_serializer_mix( with tempfile.TemporaryDirectory() as d: main( - "test_cli.initial_state", + "test_system.initial_state", out_path=pathlib.Path(d, "start"), out_dumper=initial_serializer, **common_settings ) main( - "test_cli.experimentalist", + "test_system.experimentalist", in_path=pathlib.Path(d, "start"), out_path=pathlib.Path(d, "conditions"), in_loader=initial_serializer, @@ -167,7 +167,7 @@ def test_valid_serializer_mix( **common_settings ) main( - "test_cli.experiment_runner", + "test_system.experiment_runner", in_path=pathlib.Path(d, "conditions"), out_path=pathlib.Path(d, "data"), in_loader=experimental_serializer, @@ -175,7 +175,7 @@ def test_valid_serializer_mix( **common_settings ) main( - "test_cli.theorist", + "test_system.theorist", in_path=pathlib.Path(d, "data"), out_path=pathlib.Path(d, "theory"), in_loader=experiment_runner_serializer, diff --git a/tests/test_load_dump_state.py b/tests/test_unit_state_load_dump.py similarity index 100% rename from tests/test_load_dump_state.py rename to tests/test_unit_state_load_dump.py From 9db494def7d9471ac8e5cd417544000608f4d0da Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 16:02:20 -0400 Subject: [PATCH 25/33] test: rename test files --- tests/{test_unit_state_load_dump.py => test_unit.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_unit_state_load_dump.py => test_unit.py} (100%) diff --git a/tests/test_unit_state_load_dump.py b/tests/test_unit.py similarity index 100% rename from tests/test_unit_state_load_dump.py rename to tests/test_unit.py From 6ce274f244aeaeaefba9b8da8e11106111754963 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Mon, 23 Oct 2023 18:33:50 -0400 Subject: [PATCH 26/33] refactor: reorder file --- src/autora/workflow/__main__.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 050c4bac..80795837 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -33,6 +33,21 @@ class SerializersSupported(str, Enum): ) +def _get_serializer_mode( + serializer: SerializersSupported, interface: Literal["load", "dump", "dumps"] +) -> Tuple[Callable, str]: + serializer_def = _serializer_dict[serializer] + module = serializer_def.module + interface_function_name = getattr(serializer_def, interface) + _logger.debug( + f"_get_serializer_mode: loading {interface_function_name=} from" f" {module=}" + ) + module = importlib.import_module(module) + function = getattr(module, interface_function_name) + file_mode = serializer_def.file_mode + return function, file_mode + + def main( fully_qualified_function_name: Annotated[ str, @@ -84,21 +99,6 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def _get_serializer_mode( - serializer: SerializersSupported, interface: Literal["load", "dump", "dumps"] -) -> Tuple[Callable, str]: - serializer_def = _serializer_dict[serializer] - module = serializer_def.module - interface_function_name = getattr(serializer_def, interface) - _logger.debug( - f"_get_serializer_mode: loading {interface_function_name=} from" f" {module=}" - ) - module = importlib.import_module(module) - function = getattr(module, interface_function_name) - file_mode = serializer_def.file_mode - return function, file_mode - - def load_state( path: Optional[pathlib.Path], loader: SerializersSupported = SerializersSupported.dill, From aaad3c9cc7acbb7969bd9312c5b5de2db5c12518 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 09:47:26 -0400 Subject: [PATCH 27/33] refactor: move serializer to a separate subpackage --- src/autora/serializer/__init__.py | 78 +++++++++++++++++ src/autora/{workflow => }/serializer/yaml_.py | 0 src/autora/workflow/__main__.py | 84 ++----------------- tests/test_system.py | 3 +- tests/test_unit.py | 2 +- 5 files changed, 87 insertions(+), 80 deletions(-) create mode 100644 src/autora/serializer/__init__.py rename src/autora/{workflow => }/serializer/yaml_.py (100%) diff --git a/src/autora/serializer/__init__.py b/src/autora/serializer/__init__.py new file mode 100644 index 00000000..dac9808b --- /dev/null +++ b/src/autora/serializer/__init__.py @@ -0,0 +1,78 @@ +import importlib +import logging +import pathlib +from collections import namedtuple +from enum import Enum +from typing import Callable, Dict, Literal, Optional, Tuple, Union + +from autora.state import State + +_logger = logging.getLogger(__name__) + + +class SerializersSupported(str, Enum): + """Listing of allowed serializers.""" + + dill = "dill" + pickle = "pickle" + yaml = "yaml" + + +_SerializerDef = namedtuple( + "_SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] +) +_serializer_dict: Dict[str, _SerializerDef] = dict( + pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), + yaml=_SerializerDef("autora.serializer.yaml_", "load", "dump", "dumps", ""), + dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), +) + + +def _get_serializer_mode( + serializer: SerializersSupported, interface: Literal["load", "dump", "dumps"] +) -> Tuple[Callable, str]: + serializer_def = _serializer_dict[serializer] + module = serializer_def.module + interface_function_name = getattr(serializer_def, interface) + _logger.debug( + f"_get_serializer_mode: loading {interface_function_name=} from" f" {module=}" + ) + module = importlib.import_module(module) + function = getattr(module, interface_function_name) + file_mode = serializer_def.file_mode + return function, file_mode + + +def load_state( + path: Optional[pathlib.Path], + loader: SerializersSupported = SerializersSupported.dill, +) -> Union[State, None]: + """Load a State object from a path.""" + if path is not None: + load, file_mode = _get_serializer_mode(loader, "load") + _logger.debug(f"load_state: loading from {path=}") + with open(path, f"r{file_mode}") as f: + state_ = load(f) + else: + _logger.debug(f"load_state: {path=} -> returning None") + state_ = None + return state_ + + +def dump_state( + state_: State, + path: Optional[pathlib.Path], + dumper: SerializersSupported = SerializersSupported.dill, +) -> None: + """Write a State object to a path.""" + if path is not None: + dump, file_mode = _get_serializer_mode(dumper, "dump") + _logger.debug(f"dump_state: dumping to {path=}") + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, f"w{file_mode}") as f: + dump(state_, f) + else: + dumps, _ = _get_serializer_mode(dumper, "dumps") + _logger.debug(f"dump_state: {path=} so writing to stdout") + print(dumps(state_)) + return diff --git a/src/autora/workflow/serializer/yaml_.py b/src/autora/serializer/yaml_.py similarity index 100% rename from src/autora/workflow/serializer/yaml_.py rename to src/autora/serializer/yaml_.py diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index 80795837..eb72c067 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -1,53 +1,16 @@ import importlib import logging import pathlib -from collections import namedtuple -from enum import Enum -from typing import Callable, Dict, Literal, Optional, Tuple, Union +from typing import Optional import typer from typing_extensions import Annotated -from autora.state import State +from autora.serializer import SerializersSupported, dump_state, load_state _logger = logging.getLogger(__name__) -class SerializersSupported(str, Enum): - """Listing of allowed serializers.""" - - dill = "dill" - pickle = "pickle" - yaml = "yaml" - - -_SerializerDef = namedtuple( - "_SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] -) -_serializer_dict: Dict[str, _SerializerDef] = dict( - pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), - yaml=_SerializerDef( - "autora.workflow.serializer.yaml_", "load", "dump", "dumps", "" - ), - dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), -) - - -def _get_serializer_mode( - serializer: SerializersSupported, interface: Literal["load", "dump", "dumps"] -) -> Tuple[Callable, str]: - serializer_def = _serializer_dict[serializer] - module = serializer_def.module - interface_function_name = getattr(serializer_def, interface) - _logger.debug( - f"_get_serializer_mode: loading {interface_function_name=} from" f" {module=}" - ) - module = importlib.import_module(module) - function = getattr(module, interface_function_name) - file_mode = serializer_def.file_mode - return function, file_mode - - def main( fully_qualified_function_name: Annotated[ str, @@ -82,7 +45,7 @@ def main( _configure_logger(debug, verbose) starting_state = load_state(in_path, in_loader) _logger.info(f"Starting State: {starting_state}") - function = load_function(fully_qualified_function_name) + function = _load_function(fully_qualified_function_name) ending_state = function(starting_state) _logger.info(f"Ending State: {ending_state}") dump_state(ending_state, out_path, out_dumper) @@ -99,50 +62,15 @@ def _configure_logger(debug, verbose): _logger.info("using INFO logging level") -def load_state( - path: Optional[pathlib.Path], - loader: SerializersSupported = SerializersSupported.dill, -) -> Union[State, None]: - """Load a State object from a path.""" - if path is not None: - load, file_mode = _get_serializer_mode(loader, "load") - _logger.debug(f"load_state: loading from {path=}") - with open(path, f"r{file_mode}") as f: - state_ = load(f) - else: - _logger.debug(f"load_state: {path=} -> returning None") - state_ = None - return state_ - - -def load_function(fully_qualified_function_name: str): +def _load_function(fully_qualified_function_name: str): """Load a function by its fully qualified name, `module.function_name`""" - _logger.debug(f"load_function: Loading function {fully_qualified_function_name}") + _logger.debug(f"_load_function: Loading function {fully_qualified_function_name}") module_name, function_name = fully_qualified_function_name.rsplit(".", 1) module = importlib.import_module(module_name) function = getattr(module, function_name) - _logger.debug(f"load_function: Loaded function {function} from {module}") + _logger.debug(f"_load_function: Loaded function {function} from {module}") return function -def dump_state( - state_: State, - path: Optional[pathlib.Path], - dumper: SerializersSupported = SerializersSupported.dill, -) -> None: - """Write a State object to a path.""" - if path is not None: - dump, file_mode = _get_serializer_mode(dumper, "dump") - _logger.debug(f"dump_state: dumping to {path=}") - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, f"w{file_mode}") as f: - dump(state_, f) - else: - dumps, _ = _get_serializer_mode(dumper, "dumps") - _logger.debug(f"dump_state: {path=} so writing to stdout") - print(dumps(state_)) - return - - if __name__ == "__main__": typer.run(main) diff --git a/tests/test_system.py b/tests/test_system.py index ab8b9c6a..de2a882f 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -10,9 +10,10 @@ from sklearn.linear_model import LinearRegression from autora.experimentalist.grid import grid_pool +from autora.serializer import SerializersSupported, load_state from autora.state import StandardState, State, estimator_on_state, on_state from autora.variable import Variable, VariableCollection -from autora.workflow.__main__ import SerializersSupported, load_state, main +from autora.workflow.__main__ import main _logger = logging.getLogger(__name__) diff --git a/tests/test_unit.py b/tests/test_unit.py index 6927693d..4be9b48d 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -5,8 +5,8 @@ from hypothesis import Verbosity, given, settings from hypothesis import strategies as st +from autora.serializer import SerializersSupported, dump_state, load_state from autora.state import StandardState -from autora.workflow.__main__ import SerializersSupported, dump_state, load_state @given( From 76613f4d4a5a1123362c4b4b6e512e43a166ec88 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 09:50:57 -0400 Subject: [PATCH 28/33] test: rename test_serializer file --- tests/{test_unit.py => test_serializer.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_unit.py => test_serializer.py} (100%) diff --git a/tests/test_unit.py b/tests/test_serializer.py similarity index 100% rename from tests/test_unit.py rename to tests/test_serializer.py From 938193b2fd07e2d102406602ccf7afcc980236ce Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 09:51:04 -0400 Subject: [PATCH 29/33] test: rename test_workflow file --- tests/{test_system.py => test_workflow.py} | 38 +++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) rename tests/{test_system.py => test_workflow.py} (83%) diff --git a/tests/test_system.py b/tests/test_workflow.py similarity index 83% rename from tests/test_system.py rename to tests/test_workflow.py index de2a882f..60e27a4f 100644 --- a/tests/test_system.py +++ b/tests/test_workflow.py @@ -56,33 +56,33 @@ def validate_model(state: Optional[State]): assert np.allclose(state.model.intercept_, [[0.5]]) -def test_nominal(): +def test_e2e_nominal(): """Test a basic standard chain of CLI calls using the default serializer. Equivalent to: - $ python -m autora.workflow test_system.initial_state --out-path start - $ python -m autora.workflow test_system.experimentalist --in-path start --out-path conditions - $ python -m autora.workflow test_system.experiment_runner --in-path conditions --out-path data - $ python -m autora.workflow test_system.theorist --in-path data --out-path theory + $ python -m autora.workflow test_workflow.initial_state --out-path start + $ python -m autora.workflow test_workflow.experimentalist --in-path start --out-path conditions + $ python -m autora.workflow test_workflow.experiment_runner --in-path conditions --out-path data + $ python -m autora.workflow test_workflow.theorist --in-path data --out-path theory """ with tempfile.TemporaryDirectory() as d: main( - "test_system.initial_state", + "test_workflow.initial_state", out_path=pathlib.Path(d, "start"), ) main( - "test_system.experimentalist", + "test_workflow.experimentalist", in_path=pathlib.Path(d, "start"), out_path=pathlib.Path(d, "conditions"), ) main( - "test_system.experiment_runner", + "test_workflow.experiment_runner", in_path=pathlib.Path(d, "conditions"), out_path=pathlib.Path(d, "data"), ) main( - "test_system.theorist", + "test_workflow.theorist", in_path=pathlib.Path(d, "data"), out_path=pathlib.Path(d, "theory"), ) @@ -93,7 +93,7 @@ def test_nominal(): @given(st.sampled_from(SerializersSupported), st.booleans(), st.booleans()) @settings(verbosity=Verbosity.verbose, deadline=500) -def test_serializers(serializer, verbose, debug): +def test_e2e_serializers(serializer, verbose, debug): """Test a basic standard chain of CLI calls using a single serializer.""" common_settings = dict( @@ -102,24 +102,24 @@ def test_serializers(serializer, verbose, debug): with tempfile.TemporaryDirectory() as d: main( - "test_system.initial_state", + "test_workflow.initial_state", out_path=pathlib.Path(d, "start"), **common_settings ) main( - "test_system.experimentalist", + "test_workflow.experimentalist", in_path=pathlib.Path(d, "start"), out_path=pathlib.Path(d, "conditions"), **common_settings ) main( - "test_system.experiment_runner", + "test_workflow.experiment_runner", in_path=pathlib.Path(d, "conditions"), out_path=pathlib.Path(d, "data"), **common_settings ) main( - "test_system.theorist", + "test_workflow.theorist", in_path=pathlib.Path(d, "data"), out_path=pathlib.Path(d, "theory"), **common_settings @@ -140,7 +140,7 @@ def test_serializers(serializer, verbose, debug): st.booleans(), ) @settings(verbosity=Verbosity.verbose, deadline=500) -def test_valid_serializer_mix( +def test_e2e_valid_serializer_mix( initial_serializer, experimental_serializer, experiment_runner_serializer, @@ -154,13 +154,13 @@ def test_valid_serializer_mix( with tempfile.TemporaryDirectory() as d: main( - "test_system.initial_state", + "test_workflow.initial_state", out_path=pathlib.Path(d, "start"), out_dumper=initial_serializer, **common_settings ) main( - "test_system.experimentalist", + "test_workflow.experimentalist", in_path=pathlib.Path(d, "start"), out_path=pathlib.Path(d, "conditions"), in_loader=initial_serializer, @@ -168,7 +168,7 @@ def test_valid_serializer_mix( **common_settings ) main( - "test_system.experiment_runner", + "test_workflow.experiment_runner", in_path=pathlib.Path(d, "conditions"), out_path=pathlib.Path(d, "data"), in_loader=experimental_serializer, @@ -176,7 +176,7 @@ def test_valid_serializer_mix( **common_settings ) main( - "test_system.theorist", + "test_workflow.theorist", in_path=pathlib.Path(d, "data"), out_path=pathlib.Path(d, "theory"), in_loader=experiment_runner_serializer, From 50c3c28512d5545761fbbc5cb6a6079e17540d97 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 09:58:29 -0400 Subject: [PATCH 30/33] refactor: update serializer --- src/autora/serializer/__init__.py | 2 +- src/autora/serializer/{yaml_.py => _yaml.py} | 0 tests/test_serializer.py | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) rename src/autora/serializer/{yaml_.py => _yaml.py} (100%) diff --git a/src/autora/serializer/__init__.py b/src/autora/serializer/__init__.py index dac9808b..b9f9a52b 100644 --- a/src/autora/serializer/__init__.py +++ b/src/autora/serializer/__init__.py @@ -23,7 +23,7 @@ class SerializersSupported(str, Enum): ) _serializer_dict: Dict[str, _SerializerDef] = dict( pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), - yaml=_SerializerDef("autora.serializer.yaml_", "load", "dump", "dumps", ""), + yaml=_SerializerDef("autora.serializer._yaml", "load", "dump", "dumps", ""), dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), ) diff --git a/src/autora/serializer/yaml_.py b/src/autora/serializer/_yaml.py similarity index 100% rename from src/autora/serializer/yaml_.py rename to src/autora/serializer/_yaml.py diff --git a/tests/test_serializer.py b/tests/test_serializer.py index 4be9b48d..5ae34935 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -15,6 +15,7 @@ ) @settings(verbosity=Verbosity.verbose) def test_load_inverts_dump(s, serializer): + """Test that each serializer can be used to serialize and deserialize a state object.""" with tempfile.TemporaryDirectory() as dir: path = pathlib.Path(dir, f"{str(uuid.uuid4())}") print(path, s) From 4ae5d8bc2baa02dedc969aa5db28b8f121b5a243 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 09:58:41 -0400 Subject: [PATCH 31/33] deps: simplify main dependencies --- pyproject.toml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9a172f28..13ca10cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,9 +11,6 @@ license = {file = "LICENSE"} dependencies = [ "autora-core>=4.0.0", - "scikit-learn", - "matplotlib", - "pandas", "typer[all]", ] @@ -24,12 +21,17 @@ dev = [ "autora-workflow[test]", ] docs = [ - "autora-core[docs]>=4.0.0" + "autora-core[docs]>=4.0.0", + "scikit-learn", + "matplotlib", + "pandas", ] test = [ "autora-core[test]>=4.0.0", + "autora-workflow[serializers]", "hypothesis", - "autora-workflow[serializers]" + "scikit-learn", + "pandas", ] cylc = [ "cylc-flow", From 904b58f45172fb14f7dc42d0405685b7dca46830 Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 11:08:36 -0400 Subject: [PATCH 32/33] refactor: make dict use enum as keys --- src/autora/serializer/__init__.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/autora/serializer/__init__.py b/src/autora/serializer/__init__.py index b9f9a52b..b67980c4 100644 --- a/src/autora/serializer/__init__.py +++ b/src/autora/serializer/__init__.py @@ -13,19 +13,23 @@ class SerializersSupported(str, Enum): """Listing of allowed serializers.""" - dill = "dill" pickle = "pickle" + dill = "dill" yaml = "yaml" _SerializerDef = namedtuple( "_SerializerDef", ["module", "load", "dump", "dumps", "file_mode"] ) -_serializer_dict: Dict[str, _SerializerDef] = dict( - pickle=_SerializerDef("pickle", "load", "dump", "dumps", "b"), - yaml=_SerializerDef("autora.serializer._yaml", "load", "dump", "dumps", ""), - dill=_SerializerDef("dill", "load", "dump", "dumps", "b"), -) +_serializer_dict: Dict[SerializersSupported, _SerializerDef] = { + SerializersSupported.pickle: _SerializerDef("pickle", "load", "dump", "dumps", "b"), + SerializersSupported.yaml: _SerializerDef( + "autora.serializer._yaml", "load", "dump", "dumps", "" + ), + SerializersSupported.dill: _SerializerDef("dill", "load", "dump", "dumps", "b"), +} + +_default_serializer = SerializersSupported.pickle def _get_serializer_mode( @@ -45,7 +49,7 @@ def _get_serializer_mode( def load_state( path: Optional[pathlib.Path], - loader: SerializersSupported = SerializersSupported.dill, + loader: SerializersSupported = _default_serializer, ) -> Union[State, None]: """Load a State object from a path.""" if path is not None: @@ -62,7 +66,7 @@ def load_state( def dump_state( state_: State, path: Optional[pathlib.Path], - dumper: SerializersSupported = SerializersSupported.dill, + dumper: SerializersSupported = _default_serializer, ) -> None: """Write a State object to a path.""" if path is not None: From 9318e553e5954e222161f2fc558e196a3be8e23f Mon Sep 17 00:00:00 2001 From: John Gerrard Holland Date: Tue, 24 Oct 2023 12:58:18 -0400 Subject: [PATCH 33/33] refactor: set default serializer in one place --- src/autora/serializer/__init__.py | 6 +++--- src/autora/workflow/__main__.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/autora/serializer/__init__.py b/src/autora/serializer/__init__.py index b67980c4..137aa1de 100644 --- a/src/autora/serializer/__init__.py +++ b/src/autora/serializer/__init__.py @@ -29,7 +29,7 @@ class SerializersSupported(str, Enum): SerializersSupported.dill: _SerializerDef("dill", "load", "dump", "dumps", "b"), } -_default_serializer = SerializersSupported.pickle +default_serializer = SerializersSupported.pickle def _get_serializer_mode( @@ -49,7 +49,7 @@ def _get_serializer_mode( def load_state( path: Optional[pathlib.Path], - loader: SerializersSupported = _default_serializer, + loader: SerializersSupported = default_serializer, ) -> Union[State, None]: """Load a State object from a path.""" if path is not None: @@ -66,7 +66,7 @@ def load_state( def dump_state( state_: State, path: Optional[pathlib.Path], - dumper: SerializersSupported = _default_serializer, + dumper: SerializersSupported = default_serializer, ) -> None: """Write a State object to a path.""" if path is not None: diff --git a/src/autora/workflow/__main__.py b/src/autora/workflow/__main__.py index eb72c067..18062f17 100644 --- a/src/autora/workflow/__main__.py +++ b/src/autora/workflow/__main__.py @@ -6,7 +6,12 @@ import typer from typing_extensions import Annotated -from autora.serializer import SerializersSupported, dump_state, load_state +from autora.serializer import ( + SerializersSupported, + default_serializer, + dump_state, + load_state, +) _logger = logging.getLogger(__name__) @@ -27,7 +32,7 @@ def main( typer.Option( help="(de)serializer to load the data", ), - ] = SerializersSupported.dill, + ] = default_serializer, out_path: Annotated[ Optional[pathlib.Path], typer.Option(help="Path to output the final state"), @@ -37,7 +42,7 @@ def main( typer.Option( help="serializer to save the data", ), - ] = SerializersSupported.dill, + ] = default_serializer, verbose: Annotated[bool, typer.Option(help="Turns on info logging level.")] = False, debug: Annotated[bool, typer.Option(help="Turns on debug logging level.")] = False, ):