diff --git a/.github/workflows/notebook.yml b/.github/workflows/notebook.yml index 6c02fe0..951eb7e 100644 --- a/.github/workflows/notebook.yml +++ b/.github/workflows/notebook.yml @@ -25,7 +25,6 @@ jobs: run: | pip install . conda install pytest - pip install nbmake==0.5 + pip install nbmake==1.5.3 pytest --nbmake "./notebooks/" - pytest diff --git a/.lift/config.toml b/.lift/config.toml deleted file mode 100644 index dba7269..0000000 --- a/.lift/config.toml +++ /dev/null @@ -1,3 +0,0 @@ -ignoreFiles = ''' -autokoopman/_version.py -''' \ No newline at end of file diff --git a/README.md b/README.md index 3d0a485..d0a0315 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,26 @@ The library architecture has a modular design, allowing users to implement custo See the [AutoKoopman Documentation](https://ethanjameslew.github.io/AutoKoopman/). +## Citing AutoKoopman + +AutoKoopman has been published as a tool paper at ATVA 2023. The preprint can be found [here](https://www.researchgate.net/profile/Ethan-Lew/publication/374805680_AutoKoopman_A_Toolbox_for_Automated_System_Identification_via_Koopman_Operator_Linearization/links/6537cf9f1d6e8a70704c7f31/AutoKoopman-A-Toolbox-for-Automated-System-Identification-via-Koopman-Operator-Linearization.pdf). + +If you cite AutoKoopman, please cite + +Lew, E., Hekal, A., Potomkin, K., Kochdumper, N., Hencey, B., Bak, S., & Bogomolov, S. (2023, October). Autokoopman: A toolbox for automated system identification via koopman operator linearization. In International Symposium on Automated Technology for Verification and Analysis (pp. 237-250). Cham: Springer Nature Switzerland. + +Bibtex: +``` +@inproceedings{lew2023autokoopman, + title={Autokoopman: A toolbox for automated system identification via koopman operator linearization}, + author={Lew, Ethan and Hekal, Abdelrahman and Potomkin, Kostiantyn and Kochdumper, Niklas and Hencey, Brandon and Bak, Stanley and Bogomolov, Sergiy}, + booktitle={International Symposium on Automated Technology for Verification and Analysis}, + pages={237--250}, + year={2023}, + organization={Springer} +} +``` + ## References [1] Williams, M. O., Kevrekidis, I. G., & Rowley, C. W. (2015). A data–driven approximation of the koopman operator: Extending dynamic mode decomposition. Journal of Nonlinear Science, 25, 1307-1346. diff --git a/autokoopman/autokoopman.py b/autokoopman/autokoopman.py index 47a8855..f247f0f 100644 --- a/autokoopman/autokoopman.py +++ b/autokoopman/autokoopman.py @@ -1,7 +1,7 @@ """ Main AutoKoopman Function (Convenience Function) """ -from typing import Callable, Union, Sequence, Optional, Tuple +from typing import Callable, Dict, Hashable, Union, Sequence, Optional, Tuple import numpy as np @@ -20,6 +20,7 @@ TrajectoryScoring, ) from autokoopman.estimator.koopman import KoopmanDiscEstimator +from autokoopman.estimator.koopman import KoopmanContinuousEstimator from autokoopman.tuner.gridsearch import GridSearchTuner from autokoopman.tuner.montecarlo import MonteCarloTuner from autokoopman.tuner.bayesianopt import BayesianOptTuner @@ -32,10 +33,10 @@ # valid string identifiers for the autokoopman magic obs_types = {"rff", "poly", "quadratic", "id", "deep"} opt_types = {"grid", "monte-carlo", "bopt"} -scoring_func_types = {"total", "end", "relative"} +scoring_func_types = {"total", "end", "relative", "weighted"} -def get_scoring_func(score_name): +def get_scoring_func(score_name, scoring_weights): """resolve scoring function from name or callable type""" # if callable, just return it if callable(score_name): @@ -46,6 +47,10 @@ def get_scoring_func(score_name): return TrajectoryScoring.end_point_score elif score_name == "relative": return TrajectoryScoring.relative_score + elif score_name == "weighted": + return lambda true, pred: TrajectoryScoring.weighted_score( + true, pred, scoring_weights + ) else: raise ValueError( f"Scoring function name {score_name} is not in available list (names are {scoring_func_types})" @@ -97,32 +102,41 @@ def get_parameter_space(obs_type, threshold_range, rank): ) -def get_estimator(obs_type, sampling_period, dim, obs, hyperparams, normalize): +def get_estimator( + obs_type, learn_continuous, sampling_period, dim, obs, hyperparams, normalize +): """from the myriad of user suppled switches, select the right estimator""" + + def inst_estimator(*args, **kwargs): + if learn_continuous: + return KoopmanContinuousEstimator(args[0], *args[2:], **kwargs) + else: + return KoopmanDiscEstimator(*args, **kwargs) + if obs_type == "rff": observables = kobs.IdentityObservable() | kobs.RFFObservable( dim, obs, hyperparams[0] ) - return KoopmanDiscEstimator( + return inst_estimator( observables, sampling_period, dim, rank=hyperparams[1], normalize=normalize ) elif obs_type == "quadratic": observables = kobs.IdentityObservable() | kobs.QuadraticObservable(dim) - return KoopmanDiscEstimator( + return inst_estimator( observables, sampling_period, dim, rank=hyperparams[0], normalize=normalize ) elif obs_type == "poly": observables = kobs.PolynomialObservable(dim, hyperparams[0]) - return KoopmanDiscEstimator( + return inst_estimator( observables, sampling_period, dim, rank=hyperparams[1], normalize=normalize ) elif obs_type == "id": observables = kobs.IdentityObservable() - return KoopmanDiscEstimator( + return inst_estimator( observables, sampling_period, dim, rank=hyperparams[0], normalize=normalize ) elif isinstance(obs_type, KoopmanObservable): - return KoopmanDiscEstimator( + return inst_estimator( obs_type, sampling_period, dim, rank=hyperparams[0], normalize=normalize ) else: @@ -132,6 +146,7 @@ def get_estimator(obs_type, sampling_period, dim, obs, hyperparams, normalize): def auto_koopman( training_data: Union[TrajectoriesData, Sequence[np.ndarray]], inputs_training_data: Optional[Sequence[np.ndarray]] = None, + learn_continuous: bool = False, sampling_period: Optional[float] = None, normalize: bool = False, opt: Union[str, HyperparameterTuner] = "monte-carlo", @@ -142,6 +157,9 @@ def auto_koopman( cost_func: Union[ str, Callable[[TrajectoriesData, TrajectoriesData], float] ] = "total", + scoring_weights: Optional[ + Union[Sequence[np.ndarray], Dict[Hashable, np.ndarray]] + ] = None, n_obs: int = 100, rank: Optional[Union[Tuple[int, int], Tuple[int, int, int]]] = None, grid_param_slices: int = 10, @@ -158,6 +176,7 @@ def auto_koopman( :param training_data: training trajectories data from which to learn the system :param inputs_training_data: optional input trajectories data from which to learn the system (this isn't needed if the training data has inputs already) + :param learn_continuous: whether to learn a continuous time or discrete time Koopman estimator :param sampling_period: (for discrete time system) sampling period of training data :param normalize: normalize the states of the training trajectories :param opt: hyperparameter optimizer {"grid", "monte-carlo", "bopt"} @@ -165,7 +184,7 @@ def auto_koopman( :param max_epochs: maximum number of training epochs :param n_splits: (for optimizers) if set, switches to k-folds bootstrap validation for the hyperparameter tuning. This is useful for things like RFF tuning where the results have noise. :param obs_type: (for koopman) koopman observables to use {"rff", "quadratic", "poly", "id", "deep"} - :param cost_func: cost function to use for hyperparameter optimization + :param cost_func: cost function to use for hyperparameter optimization {"total", "end", "relative"} :param n_obs: (for koopman) number of observables to use (if applicable) :param rank: (for koopman) rank range (start, stop) or (start, stop, step) :param grid_param_slices: (for grid tuner) resolution to slice continuous valued parameters into @@ -210,12 +229,24 @@ def auto_koopman( # 'estimator': } """ - training_data, sampling_period = _sanitize_training_data( - training_data, inputs_training_data, sampling_period, opt, obs_type + if cost_func == "weighted": + assert ( + scoring_weights is not None + ), f"weighted scoring requires scoring weights (currently None)" + + training_data, sampling_period, scoring_weights = _sanitize_training_data( + training_data, + inputs_training_data, + sampling_period, + opt, + obs_type, + scoring_weights, ) # get the hyperparameter map if obs_type in {"deep"}: + if learn_continuous: + raise ValueError("deep learning is only for discrete systems") modelmap = _deep_model_map( training_data, max_epochs, @@ -229,6 +260,7 @@ def auto_koopman( else: modelmap = _edmd_model_map( training_data, + learn_continuous, rank, obs_type, n_obs, @@ -255,8 +287,17 @@ def auto_koopman( else: raise ValueError(f"could not match a tuner to the string {opt}") - with hide_prints(): - res = gt.tune(nattempts=max_opt_iter, scoring_func=get_scoring_func(cost_func)) + if verbose: + res = gt.tune( + nattempts=max_opt_iter, + scoring_func=get_scoring_func(cost_func, scoring_weights), + ) + else: + with hide_prints(): + res = gt.tune( + nattempts=max_opt_iter, + scoring_func=get_scoring_func(cost_func, scoring_weights), + ) # pack results into out custom output result = { @@ -327,11 +368,19 @@ def get_model(self, hyperparams: Sequence): def _edmd_model_map( - training_data, rank, obs_type, n_obs, lengthscale, sampling_period, normalize + training_data, + learn_continuous, + rank, + obs_type, + n_obs, + lengthscale, + sampling_period, + normalize, ) -> HyperparameterMap: """model map for eDMD based methods - :param training_data: + :param training_data: trajectories training dataset + :param learn_continuous: whether to learn continuous time or discrete time Koopman estimator :param rank: set of ranks to try (of DMD rank parameter) :param obs_type: :param n_obs: some obs type require a number of observables @@ -363,7 +412,13 @@ def __init__(self): def get_model(self, hyperparams: Sequence): return get_estimator( - obs_type, sampling_period, dim, n_obs, hyperparams, normalize + obs_type, + learn_continuous, + sampling_period, + dim, + n_obs, + hyperparams, + normalize, ) # get the hyperparameter map @@ -371,7 +426,7 @@ def get_model(self, hyperparams: Sequence): def _sanitize_training_data( - training_data, inputs_training_data, sampling_period, opt, obs_type + training_data, inputs_training_data, sampling_period, opt, obs_type, scoring_weights ): """auto_koopman input sanitization""" @@ -395,20 +450,27 @@ def _sanitize_training_data( # convert the data to autokoopman trajectories if isinstance(training_data, TrajectoriesData): if not isinstance(training_data, UniformTimeTrajectoriesData): + assert scoring_weights is None, f"scoring weights must be None as interpolation is occuring" print( f"resampling trajectories as they need to be uniform time (sampling period {sampling_period})" ) training_data = training_data.interp_uniform_time(sampling_period) else: if not np.isclose(training_data.sampling_period, sampling_period): + assert scoring_weights is None, f"scoring weights must be None as interpolation is occuring" print( f"resampling trajectories because the sampling periods differ (original {training_data.sampling_period}, new {sampling_period})" ) training_data = training_data.interp_uniform_time(sampling_period) else: + if isinstance(training_data, dict): + assert isinstance(scoring_weights, dict), "training data has unordered keys, so scoring weights must be a dictionary with matching keys" + else: + scoring_weights = {idx: weights for idx, weights in enumerate(scoring_weights)} + # figure out how to add inputs training_iter = ( - training_data.items() if isinstance(training_data, dict) else training_data + training_data.items() if isinstance(training_data, dict) else enumerate(training_data) ) if inputs_training_data is not None: training_iter = [(n, x, inputs_training_data[n]) for n, x in training_iter] @@ -429,4 +491,4 @@ def _sanitize_training_data( } ) - return training_data, sampling_period + return training_data, sampling_period, scoring_weights diff --git a/autokoopman/benchmark/spring.py b/autokoopman/benchmark/spring.py index 76afe8d..324aae9 100644 --- a/autokoopman/benchmark/spring.py +++ b/autokoopman/benchmark/spring.py @@ -1,5 +1,5 @@ import sympy as sp # type: ignore -from numpy import cos, sin +from sympy import cos, sin import autokoopman.core.system as asys diff --git a/autokoopman/core/scoring.py b/autokoopman/core/scoring.py new file mode 100644 index 0000000..894a6d1 --- /dev/null +++ b/autokoopman/core/scoring.py @@ -0,0 +1,59 @@ +"""Scoring Metrics for Evaluation +""" +import numpy as np +from numpy.linalg import norm + +from typing import Dict, Hashable +from autokoopman.core.trajectory import TrajectoriesData + + +class TrajectoryScoring: + @staticmethod + def weighted_score( + true_data: TrajectoriesData, + prediction_data: TrajectoriesData, + weights: Dict[Hashable, np.ndarray], + ): + assert true_data.traj_names.issubset( + set(weights.keys()) + ) and prediction_data.traj_names.issubset( + set(weights.keys()) + ), f"Datasets trajectory names (true={true_data.traj_names}, prediction={prediction_data.traj_names}) and Weights keys ({weights.keys()}) must correspond!" + + # finalize the shapes weights + weights_f = {} + for k, w in weights.items(): + w = np.array(w) + if len(w.shape) == 1: + w = np.tile(np.atleast_2d(w).T, reps=(1, len(true_data.state_names))) + weights_f[k] = w + + absdiff = (prediction_data - true_data).abs() + end_errors = np.array( + [norm(weights_f[n] * s.states, axis=1) for n, s in absdiff._trajs.items()] + ) + return np.sum(np.concatenate(end_errors, axis=0)) + + @staticmethod + def end_point_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData): + errors = (prediction_data - true_data).norm() + end_errors = np.array([s.states[-1] for s in errors]) + return np.mean(end_errors) + + @staticmethod + def total_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData): + errors = (prediction_data - true_data).norm() + end_errors = np.array([s.states.flatten() for s in errors]) + return np.mean(np.concatenate(end_errors, axis=0)) + + @staticmethod + def relative_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData): + # TODO: implement this + err_term = [] + for k in prediction_data.traj_names: + pred_t = prediction_data[k] + true_t = true_data[k] + abs_error = np.linalg.norm(pred_t.states - true_t.states) + mean_error = np.linalg.norm(pred_t.states - np.mean(pred_t.states, axis=0)) + err_term.append(abs_error / mean_error) + return np.mean(np.array(err_term)) diff --git a/autokoopman/core/trajectory.py b/autokoopman/core/trajectory.py index 803b2f8..44d2372 100644 --- a/autokoopman/core/trajectory.py +++ b/autokoopman/core/trajectory.py @@ -210,6 +210,16 @@ def norm(self, axis=1) -> "Trajectory": threshold=self._threshold, ) + def abs(self) -> "Trajectory": + return Trajectory( + self.times, + np.abs(self.states), + None, + state_names=self.names, + input_names=None, + threshold=self._threshold, + ) + class UniformTimeTrajectory(Trajectory): """uniform time is a trajectory advanced by a sampling period""" @@ -491,6 +501,9 @@ def __add__(self, other: "TrajectoriesData"): def norm(self): return TrajectoriesData({k: v.norm() for k, v in self._trajs.items()}) + def abs(self): + return TrajectoriesData({k: v.abs() for k, v in self._trajs.items()}) + class UniformTimeTrajectoriesData(TrajectoriesData): """a dataset of uniform time trajectories""" diff --git a/autokoopman/core/tuner.py b/autokoopman/core/tuner.py index 7ff2733..9b5cb3b 100644 --- a/autokoopman/core/tuner.py +++ b/autokoopman/core/tuner.py @@ -1,3 +1,5 @@ +"""Model Tuning +""" import abc import random from typing import Sequence, Callable, TypedDict, Any @@ -9,6 +11,7 @@ UniformTimeTrajectory, UniformTimeTrajectoriesData, ) +from autokoopman.core.scoring import TrajectoryScoring from autokoopman.core.format import _clip_list from sklearn.model_selection import KFold @@ -136,33 +139,6 @@ class TuneResults(TypedDict): score: float -class TrajectoryScoring: - @staticmethod - def end_point_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData): - errors = (prediction_data - true_data).norm() - end_errors = np.array([s.states[-1] for s in errors]) - return np.mean(end_errors) - - @staticmethod - def total_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData): - errors = (prediction_data - true_data).norm() - end_errors = np.array([s.states.flatten() for s in errors]) - - return np.mean(np.concatenate(end_errors, axis=0)) - - @staticmethod - def relative_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData): - # TODO: implement this - err_term = [] - for k in prediction_data.traj_names: - pred_t = prediction_data[k] - true_t = true_data[k] - abs_error = np.linalg.norm(pred_t.states - true_t.states) - mean_error = np.linalg.norm(pred_t.states - np.mean(pred_t.states, axis=0)) - err_term.append(abs_error / mean_error) - return np.mean(np.array(err_term)) - - class HyperparameterTuner(abc.ABC): """ Tune a HyperparameterMap Object diff --git a/environment.yml b/environment.yml index 688dcce..5282e13 100644 --- a/environment.yml +++ b/environment.yml @@ -3,77 +3,15 @@ channels: - conda-forge - defaults dependencies: - - blas=1.0 - - ca-certificates=2023.08.22 - - cffi=1.15.1 - - filelock=3.9.0 - - gmp=6.2.1 - - gmpy2=2.1.2 - - jinja2=3.1.2 - - libcxx=16.0.6 - - libffi=3.3 - - libopenblas=0.3.21 - - libprotobuf=3.20.3 - - libsqlite=3.43.2 - - libuv=1.44.2 - - libzlib=1.2.13 - - llvm-openmp=14.0.6 - - markupsafe=2.1.1 - - mpc=1.1.0 - - mpfr=4.0.2 - - ncurses=6.4 - - networkx=3.1 - - ninja=1.10.2 - - ninja-base=1.10.2 - - numpy-base=1.26.0 - - openssl=1.1.1w - - pip=23.3.1 - - pycparser=2.21 - - python=3.9.0 - - pytorch=2.0.1 - - readline=8.2 - - setuptools=68.2.2 - - sleef=3.5.1 - - sqlite=3.43.2 - - sympy=1.11.1 - - tk=8.6.13 - - typing-extensions=4.7.1 - - typing_extensions=4.7.1 - - tzdata=2023c - - wheel=0.41.2 - - xz=5.2.6 - - zlib=1.2.13 + - pysindy=1.3.0 + - pip=24.0 + - matplotlib>=3.3.4 + - sympy>=1.9 + - pandas>=1.1.5 + - pydata-sphinx-theme==0.7.2 + - conda-forge::scikit-learn - pip: - - attrs==22.2.0 - - cmake==3.25.2 - - contourpy==1.0.7 - - cycler==0.11.0 - - cython==0.29.33 - - decorator==5.1.1 - - derivative==0.5.3 - - exceptiongroup==1.1.0 - - fonttools==4.38.0 - - gpy==1.10.0 - - gpyopt==1.2.6 - - iniconfig==2.0.0 - - joblib==1.2.0 - - kiwisolver==1.4.4 - - matplotlib==3.6.3 - - mpmath==1.2.1 - - numpy==1.21.0 - - packaging==23.0 - - pandas==1.5.3 - - paramz==0.9.5 - - pillow==9.4.0 - - pluggy==1.0.0 - - pyparsing==3.0.9 - - pysindy==1.7.2 - - pytest==7.2.1 - - python-dateutil==2.8.2 - - pytz==2022.7.1 - - scikit-learn==1.0.2 - - scipy==1.10.0 - - six==1.16.0 - - threadpoolctl==3.1.0 - - tomli==2.0.1 - - tqdm==4.64.1 + - gpy==1.13.1 + - gpyopt==1.2.6 + - torch==2.2.1 + - sphinx_mdinclude==0.5.1 \ No newline at end of file diff --git a/notebooks/deep-koopman.ipynb b/notebooks/deep-koopman.ipynb index 9ac7cf3..2ce6f1f 100644 --- a/notebooks/deep-koopman.ipynb +++ b/notebooks/deep-koopman.ipynb @@ -96,6 +96,8 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", + "import sys\n", + "sys.path.append(\"..\")\n", "# this is the convenience function\n", "from autokoopman import auto_koopman" ] @@ -248,7 +250,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/notebooks/weighted-cost-func.ipynb b/notebooks/weighted-cost-func.ipynb new file mode 100644 index 0000000..a81416a --- /dev/null +++ b/notebooks/weighted-cost-func.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "01a2f054-0d0e-497a-8bb9-d5d395ca7c04", + "metadata": {}, + "source": [ + "# Weighted Cost Function\n", + "\n", + "Shows how to use the cost function requested in [issue #84](https://github.com/EthanJamesLew/AutoKoopman/issues/84)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfdb47e2-0ea0-4bf8-8279-8500ff3cf21f", + "metadata": {}, + "outputs": [], + "source": [ + "# the notebook imports\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "# this is the convenience function\n", + "from autokoopman import auto_koopman" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "291d3409-1c8c-44cb-8380-44f08019b57d", + "metadata": {}, + "outputs": [], + "source": [ + "# for a complete example, let's create an example dataset using an included benchmark system\n", + "import autokoopman.benchmark.fhn as fhn\n", + "fhn = fhn.FitzHughNagumo()\n", + "training_data = fhn.solve_ivps(\n", + " initial_states=np.random.uniform(low=-2.0, high=2.0, size=(10, 2)),\n", + " tspan=[0.0, 10.0],\n", + " sampling_period=0.1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2d42e41-46c2-467c-9ce7-9bd6a7c509a1", + "metadata": {}, + "outputs": [], + "source": [ + "# create trajectories as numpy array and create a weights array\n", + "trajectories = []\n", + "weights = []\n", + "\n", + "# create weights for every time point\n", + "for idx, traj in enumerate(training_data):\n", + " trajectories.append(traj.states)\n", + " \n", + " # uniform weights\n", + " #weights.append(np.ones(len(traj.states)) / len(traj.states) )\n", + "\n", + " # distance weights\n", + " #weights.append(traj.norm().states / (np.max(traj.norm().states) * len(traj.states)) )\n", + " \n", + " # individual state weights\n", + " #w = np.abs(traj.states) / 10.0\n", + " #w[:, 0] = 0.0\n", + " #w = np.ones(traj.states.shape) / 10.0\n", + " w = traj.abs().states\n", + " weights.append(w)\n", + "\n", + "# you can also use a dict to name the trajectories if using TrajectoriesData (numpy arrays are named by their index number)\n", + "#weights = {idx: w for idx, w in enumerate(weights)}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98510aa7-3416-4181-a493-00500be53f61", + "metadata": {}, + "outputs": [], + "source": [ + "# learn model from data\n", + "experiment_results = auto_koopman(\n", + " trajectories, # list of trajectories\n", + " sampling_period=0.1, # sampling period of trajectory snapshots\n", + " obs_type=\"rff\", # use Random Fourier Features Observables\n", + " cost_func=\"weighted\", # use \"weighted\" cost function\n", + " scoring_weights=weights, # pass weights as required for cost_func=\"weighted\"\n", + " opt=\"grid\", # grid search to find best hyperparameters\n", + " n_obs=200, # maximum number of observables to try\n", + " max_opt_iter=200, # maximum number of optimization iterations\n", + " grid_param_slices=5, # for grid search, number of slices for each parameter\n", + " n_splits=5, # k-folds validation for tuning, helps stabilize the scoring\n", + " rank=(1, 200, 40) # rank range (start, stop, step) DMD hyperparameter\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "476c496d-56c5-477b-9579-2c0121b3247d", + "metadata": {}, + "outputs": [], + "source": [ + "# view our custom weighted cost\n", + "experiment_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb4dfdcf-01ca-4cbc-966d-3f531d8475ea", + "metadata": {}, + "outputs": [], + "source": [ + "# get the model from the experiment results\n", + "model = experiment_results['tuned_model']\n", + "\n", + "# simulate using the learned model\n", + "iv = [0.5, 0.1]\n", + "trajectory = model.solve_ivp(\n", + " initial_state=iv,\n", + " tspan=(0.0, 10.0),\n", + " sampling_period=0.1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b1e329c-c25c-442d-8b76-146924a6e46b", + "metadata": {}, + "outputs": [], + "source": [ + "# simulate the ground truth for comparison\n", + "true_trajectory = fhn.solve_ivp(\n", + " initial_state=iv,\n", + " tspan=(0.0, 10.0),\n", + " sampling_period=0.1\n", + ")\n", + "\n", + "plt.figure(figsize=(10, 6))\n", + "\n", + "# plot the results\n", + "plt.plot(*trajectory.states.T, label='Trajectory Prediction')\n", + "plt.plot(*true_trajectory.states.T, label='Ground Truth')\n", + "\n", + "plt.xlabel(\"$x_1$\")\n", + "plt.ylabel(\"$x_2$\")\n", + "plt.grid()\n", + "plt.legend()\n", + "plt.title(\"FHN Test Trajectory Plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1458259-6c92-46e5-91a3-f56e53633b35", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9626e981-a8b3-40e4-b70c-d95e6dbee7ef", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt index 7851b64..bc7f46d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ pydata-sphinx-theme==0.7.2 sphinx_mdinclude==0.5.1 gpy==1.10.0 gpyopt==1.2.6 -torch==1.12.1 -pysindy==1.7.2 +torch==2.2.1 +scikit-learn==1.4.1.post1 \ No newline at end of file diff --git a/test/jupytext/linear.md b/test/jupytext/linear.md new file mode 100644 index 0000000..7ee30d7 --- /dev/null +++ b/test/jupytext/linear.md @@ -0,0 +1,53 @@ +## A Simple Linear System + +```python +import sys +sys.path.append("../..") + +from autokoopman import SymbolicContinuousSystem, auto_koopman +import sympy as sp +import numpy as np + +class SimpleLinear(SymbolicContinuousSystem): + def __init__(self): + x1, x2 = sp.symbols("x1 x2") + xdot = [ + 1.2 * x1 + 0.5 * x2, + -0.7 * x1 + 0.1 * x2 + ] + super().__init__((x1, x2), xdot) +``` + +```python +sys = SimpleLinear() +training_data = sys.solve_ivps( + initial_states=np.random.uniform(low=-2.0, high=2.0, size=(10, 2)), + tspan=[0.0, 10.0], + sampling_period=0.1 +) +``` + +```python +# learn model from data +experiment_results = auto_koopman( + training_data, # list of trajectories + sampling_period=0.1, # sampling period of trajectory snapshots + learn_continuous=True, + obs_type="id", # use Random Fourier Features Observables + opt="grid", # grid search to find best hyperparameters + n_obs=200, # maximum number of observables to try + max_opt_iter=200, # maximum number of optimization iterations + grid_param_slices=5, # for grid search, number of slices for each parameter + n_splits=5, # k-folds validation for tuning, helps stabilize the scoring + normalize = False, + rank=(1, 200, 40) # rank range (start, stop, step) DMD hyperparameter +) +``` + +```python +experiment_results['tuned_model'].A +``` + +```python + +``` diff --git a/test/unit_test/test_autokoopman.py b/test/unit_test/test_autokoopman.py index 4f96df8..3bf1c63 100644 --- a/test/unit_test/test_autokoopman.py +++ b/test/unit_test/test_autokoopman.py @@ -27,18 +27,29 @@ def test_autokoopman(obs, opt, cost, normalize): np.random.seed(0) # given issue #29, let's make these differently sized + sp = 0.1 ivs = np.random.uniform(low=-2.0, high=2.0, size=(20, 2)) + t_ends = [np.random.random() + 1.0 for idx in range(len(ivs))] + lengths = [int(t_end // sp) for t_end in t_ends] training_data = UniformTimeTrajectoriesData( { idx: fhn.solve_ivp( initial_state=iv, - tspan=[0.0, np.random.random() + 1.0], - sampling_period=0.1, + tspan=[0.0, t_ends[idx]], + sampling_period=sp, ) for idx, iv in enumerate(ivs) } ) + # produce scoring weights if the cost is weighted + if cost == "weighted": + scoring_weights = { + idx: np.ones((length,)) * 0.01 for idx, length in enumerate(lengths) + } + else: + scoring_weights = None + # learn model from data # make the run as short as possible but still be meaningful for catching errors experiment_results = auto_koopman( @@ -47,6 +58,72 @@ def test_autokoopman(obs, opt, cost, normalize): obs_type=obs, opt=opt, cost_func=cost, + scoring_weights=scoring_weights, + n_obs=20, + max_opt_iter=2, + grid_param_slices=2, + n_splits=2, + rank=(10, 12, 1), + max_epochs=1, + torch_device="cpu", + normalize=normalize, + ) + + +@pytest.mark.parametrize( + "obs, opt, cost, normalize", + auto_config, +) +def test_autokoopman_np(obs, opt, cost, normalize): + """test the case of autokoopman using numpy arrays""" + + # for a complete example, let's create an example dataset using an included benchmark system + import autokoopman.benchmark.fhn as fhn + + fhn = fhn.FitzHughNagumo() + np.random.seed(0) + + # given issue #29, let's make these differently sized + sp = 0.1 + ivs = np.random.uniform(low=-2.0, high=2.0, size=(20, 2)) + t_ends = [np.random.random() + 1.0 for idx in range(len(ivs))] + lengths = [int(t_end // sp) for t_end in t_ends] + training_data = UniformTimeTrajectoriesData( + { + idx: fhn.solve_ivp( + initial_state=iv, + tspan=[0.0, t_ends[idx]], + sampling_period=sp, + ) + for idx, iv in enumerate(ivs) + } + ) + + # produce scoring weights if the cost is weighted + if cost == "weighted": + scoring_weights = { + idx: np.ones((length,)) * 0.01 for idx, length in enumerate(lengths) + } + else: + scoring_weights = None + + # put into numpy arrays for testing + training_data_np = [] + scoring_weights_np = [] + for name in training_data.traj_names: + training_data_np.append(training_data[name].states) + if scoring_weights is not None: + scoring_weights_np.append(scoring_weights[name]) + + # learn model from data + # make the run as short as possible but still be meaningful for catching errors + experiment_results = auto_koopman( + training_data_np, + sampling_period=sp, + obs_type=obs, + opt=opt, + cost_func=cost, + scoring_weights=scoring_weights_np, n_obs=20, max_opt_iter=2, grid_param_slices=2,