Skip to content

Commit

Permalink
Merge pull request #85 from EthanJamesLew/enhancement/reweighting
Browse files Browse the repository at this point in the history
Draft: AutoKoopman Enhancements for Falsification Research
  • Loading branch information
EthanJamesLew authored Mar 12, 2024
2 parents 188ece0 + 05627b9 commit 4a8a327
Show file tree
Hide file tree
Showing 14 changed files with 525 additions and 131 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/notebook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ jobs:
run: |
pip install .
conda install pytest
pip install nbmake==0.5
pip install nbmake==1.5.3
pytest --nbmake "./notebooks/"
pytest
3 changes: 0 additions & 3 deletions .lift/config.toml

This file was deleted.

20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,26 @@ The library architecture has a modular design, allowing users to implement custo
See the
[AutoKoopman Documentation](https://ethanjameslew.github.io/AutoKoopman/).

## Citing AutoKoopman

AutoKoopman has been published as a tool paper at ATVA 2023. The preprint can be found [here](https://www.researchgate.net/profile/Ethan-Lew/publication/374805680_AutoKoopman_A_Toolbox_for_Automated_System_Identification_via_Koopman_Operator_Linearization/links/6537cf9f1d6e8a70704c7f31/AutoKoopman-A-Toolbox-for-Automated-System-Identification-via-Koopman-Operator-Linearization.pdf).

If you cite AutoKoopman, please cite

Lew, E., Hekal, A., Potomkin, K., Kochdumper, N., Hencey, B., Bak, S., & Bogomolov, S. (2023, October). Autokoopman: A toolbox for automated system identification via koopman operator linearization. In International Symposium on Automated Technology for Verification and Analysis (pp. 237-250). Cham: Springer Nature Switzerland.

Bibtex:
```
@inproceedings{lew2023autokoopman,
title={Autokoopman: A toolbox for automated system identification via koopman operator linearization},
author={Lew, Ethan and Hekal, Abdelrahman and Potomkin, Kostiantyn and Kochdumper, Niklas and Hencey, Brandon and Bak, Stanley and Bogomolov, Sergiy},
booktitle={International Symposium on Automated Technology for Verification and Analysis},
pages={237--250},
year={2023},
organization={Springer}
}
```

## References

<a id="1">[1]</a> Williams, M. O., Kevrekidis, I. G., & Rowley, C. W. (2015). A data–driven approximation of the koopman operator: Extending dynamic mode decomposition. Journal of Nonlinear Science, 25, 1307-1346.
Expand Down
102 changes: 82 additions & 20 deletions autokoopman/autokoopman.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Main AutoKoopman Function (Convenience Function)
"""
from typing import Callable, Union, Sequence, Optional, Tuple
from typing import Callable, Dict, Hashable, Union, Sequence, Optional, Tuple

import numpy as np

Expand All @@ -20,6 +20,7 @@
TrajectoryScoring,
)
from autokoopman.estimator.koopman import KoopmanDiscEstimator
from autokoopman.estimator.koopman import KoopmanContinuousEstimator
from autokoopman.tuner.gridsearch import GridSearchTuner
from autokoopman.tuner.montecarlo import MonteCarloTuner
from autokoopman.tuner.bayesianopt import BayesianOptTuner
Expand All @@ -32,10 +33,10 @@
# valid string identifiers for the autokoopman magic
obs_types = {"rff", "poly", "quadratic", "id", "deep"}
opt_types = {"grid", "monte-carlo", "bopt"}
scoring_func_types = {"total", "end", "relative"}
scoring_func_types = {"total", "end", "relative", "weighted"}


def get_scoring_func(score_name):
def get_scoring_func(score_name, scoring_weights):
"""resolve scoring function from name or callable type"""
# if callable, just return it
if callable(score_name):
Expand All @@ -46,6 +47,10 @@ def get_scoring_func(score_name):
return TrajectoryScoring.end_point_score
elif score_name == "relative":
return TrajectoryScoring.relative_score
elif score_name == "weighted":
return lambda true, pred: TrajectoryScoring.weighted_score(
true, pred, scoring_weights
)
else:
raise ValueError(
f"Scoring function name {score_name} is not in available list (names are {scoring_func_types})"
Expand Down Expand Up @@ -97,32 +102,41 @@ def get_parameter_space(obs_type, threshold_range, rank):
)


def get_estimator(obs_type, sampling_period, dim, obs, hyperparams, normalize):
def get_estimator(
obs_type, learn_continuous, sampling_period, dim, obs, hyperparams, normalize
):
"""from the myriad of user suppled switches, select the right estimator"""

def inst_estimator(*args, **kwargs):
if learn_continuous:
return KoopmanContinuousEstimator(args[0], *args[2:], **kwargs)
else:
return KoopmanDiscEstimator(*args, **kwargs)

if obs_type == "rff":
observables = kobs.IdentityObservable() | kobs.RFFObservable(
dim, obs, hyperparams[0]
)
return KoopmanDiscEstimator(
return inst_estimator(
observables, sampling_period, dim, rank=hyperparams[1], normalize=normalize
)
elif obs_type == "quadratic":
observables = kobs.IdentityObservable() | kobs.QuadraticObservable(dim)
return KoopmanDiscEstimator(
return inst_estimator(
observables, sampling_period, dim, rank=hyperparams[0], normalize=normalize
)
elif obs_type == "poly":
observables = kobs.PolynomialObservable(dim, hyperparams[0])
return KoopmanDiscEstimator(
return inst_estimator(
observables, sampling_period, dim, rank=hyperparams[1], normalize=normalize
)
elif obs_type == "id":
observables = kobs.IdentityObservable()
return KoopmanDiscEstimator(
return inst_estimator(
observables, sampling_period, dim, rank=hyperparams[0], normalize=normalize
)
elif isinstance(obs_type, KoopmanObservable):
return KoopmanDiscEstimator(
return inst_estimator(
obs_type, sampling_period, dim, rank=hyperparams[0], normalize=normalize
)
else:
Expand All @@ -132,6 +146,7 @@ def get_estimator(obs_type, sampling_period, dim, obs, hyperparams, normalize):
def auto_koopman(
training_data: Union[TrajectoriesData, Sequence[np.ndarray]],
inputs_training_data: Optional[Sequence[np.ndarray]] = None,
learn_continuous: bool = False,
sampling_period: Optional[float] = None,
normalize: bool = False,
opt: Union[str, HyperparameterTuner] = "monte-carlo",
Expand All @@ -142,6 +157,9 @@ def auto_koopman(
cost_func: Union[
str, Callable[[TrajectoriesData, TrajectoriesData], float]
] = "total",
scoring_weights: Optional[
Union[Sequence[np.ndarray], Dict[Hashable, np.ndarray]]
] = None,
n_obs: int = 100,
rank: Optional[Union[Tuple[int, int], Tuple[int, int, int]]] = None,
grid_param_slices: int = 10,
Expand All @@ -158,14 +176,15 @@ def auto_koopman(
:param training_data: training trajectories data from which to learn the system
:param inputs_training_data: optional input trajectories data from which to learn the system (this isn't needed if the training data has inputs already)
:param learn_continuous: whether to learn a continuous time or discrete time Koopman estimator
:param sampling_period: (for discrete time system) sampling period of training data
:param normalize: normalize the states of the training trajectories
:param opt: hyperparameter optimizer {"grid", "monte-carlo", "bopt"}
:param max_opt_iter: maximum iterations for the tuner to use
:param max_epochs: maximum number of training epochs
:param n_splits: (for optimizers) if set, switches to k-folds bootstrap validation for the hyperparameter tuning. This is useful for things like RFF tuning where the results have noise.
:param obs_type: (for koopman) koopman observables to use {"rff", "quadratic", "poly", "id", "deep"}
:param cost_func: cost function to use for hyperparameter optimization
:param cost_func: cost function to use for hyperparameter optimization {"total", "end", "relative"}
:param n_obs: (for koopman) number of observables to use (if applicable)
:param rank: (for koopman) rank range (start, stop) or (start, stop, step)
:param grid_param_slices: (for grid tuner) resolution to slice continuous valued parameters into
Expand Down Expand Up @@ -210,12 +229,24 @@ def auto_koopman(
# 'estimator': <autokoopman.estimator.koopman.KoopmanDiscEstimator at 0x7f0f92ff0610>}
"""

training_data, sampling_period = _sanitize_training_data(
training_data, inputs_training_data, sampling_period, opt, obs_type
if cost_func == "weighted":
assert (
scoring_weights is not None
), f"weighted scoring requires scoring weights (currently None)"

training_data, sampling_period, scoring_weights = _sanitize_training_data(
training_data,
inputs_training_data,
sampling_period,
opt,
obs_type,
scoring_weights,
)

# get the hyperparameter map
if obs_type in {"deep"}:
if learn_continuous:
raise ValueError("deep learning is only for discrete systems")
modelmap = _deep_model_map(
training_data,
max_epochs,
Expand All @@ -229,6 +260,7 @@ def auto_koopman(
else:
modelmap = _edmd_model_map(
training_data,
learn_continuous,
rank,
obs_type,
n_obs,
Expand All @@ -255,8 +287,17 @@ def auto_koopman(
else:
raise ValueError(f"could not match a tuner to the string {opt}")

with hide_prints():
res = gt.tune(nattempts=max_opt_iter, scoring_func=get_scoring_func(cost_func))
if verbose:
res = gt.tune(
nattempts=max_opt_iter,
scoring_func=get_scoring_func(cost_func, scoring_weights),
)
else:
with hide_prints():
res = gt.tune(
nattempts=max_opt_iter,
scoring_func=get_scoring_func(cost_func, scoring_weights),
)

# pack results into out custom output
result = {
Expand Down Expand Up @@ -327,11 +368,19 @@ def get_model(self, hyperparams: Sequence):


def _edmd_model_map(
training_data, rank, obs_type, n_obs, lengthscale, sampling_period, normalize
training_data,
learn_continuous,
rank,
obs_type,
n_obs,
lengthscale,
sampling_period,
normalize,
) -> HyperparameterMap:
"""model map for eDMD based methods
:param training_data:
:param training_data: trajectories training dataset
:param learn_continuous: whether to learn continuous time or discrete time Koopman estimator
:param rank: set of ranks to try (of DMD rank parameter)
:param obs_type:
:param n_obs: some obs type require a number of observables
Expand Down Expand Up @@ -363,15 +412,21 @@ def __init__(self):

def get_model(self, hyperparams: Sequence):
return get_estimator(
obs_type, sampling_period, dim, n_obs, hyperparams, normalize
obs_type,
learn_continuous,
sampling_period,
dim,
n_obs,
hyperparams,
normalize,
)

# get the hyperparameter map
return _ModelMap()


def _sanitize_training_data(
training_data, inputs_training_data, sampling_period, opt, obs_type
training_data, inputs_training_data, sampling_period, opt, obs_type, scoring_weights
):
"""auto_koopman input sanitization"""

Expand All @@ -395,20 +450,27 @@ def _sanitize_training_data(
# convert the data to autokoopman trajectories
if isinstance(training_data, TrajectoriesData):
if not isinstance(training_data, UniformTimeTrajectoriesData):
assert scoring_weights is None, f"scoring weights must be None as interpolation is occuring"
print(
f"resampling trajectories as they need to be uniform time (sampling period {sampling_period})"
)
training_data = training_data.interp_uniform_time(sampling_period)
else:
if not np.isclose(training_data.sampling_period, sampling_period):
assert scoring_weights is None, f"scoring weights must be None as interpolation is occuring"
print(
f"resampling trajectories because the sampling periods differ (original {training_data.sampling_period}, new {sampling_period})"
)
training_data = training_data.interp_uniform_time(sampling_period)
else:
if isinstance(training_data, dict):
assert isinstance(scoring_weights, dict), "training data has unordered keys, so scoring weights must be a dictionary with matching keys"
else:
scoring_weights = {idx: weights for idx, weights in enumerate(scoring_weights)}

# figure out how to add inputs
training_iter = (
training_data.items() if isinstance(training_data, dict) else training_data
training_data.items() if isinstance(training_data, dict) else enumerate(training_data)
)
if inputs_training_data is not None:
training_iter = [(n, x, inputs_training_data[n]) for n, x in training_iter]
Expand All @@ -429,4 +491,4 @@ def _sanitize_training_data(
}
)

return training_data, sampling_period
return training_data, sampling_period, scoring_weights
2 changes: 1 addition & 1 deletion autokoopman/benchmark/spring.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sympy as sp # type: ignore
from numpy import cos, sin
from sympy import cos, sin

import autokoopman.core.system as asys

Expand Down
59 changes: 59 additions & 0 deletions autokoopman/core/scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Scoring Metrics for Evaluation
"""
import numpy as np
from numpy.linalg import norm

from typing import Dict, Hashable
from autokoopman.core.trajectory import TrajectoriesData


class TrajectoryScoring:
@staticmethod
def weighted_score(
true_data: TrajectoriesData,
prediction_data: TrajectoriesData,
weights: Dict[Hashable, np.ndarray],
):
assert true_data.traj_names.issubset(
set(weights.keys())
) and prediction_data.traj_names.issubset(
set(weights.keys())
), f"Datasets trajectory names (true={true_data.traj_names}, prediction={prediction_data.traj_names}) and Weights keys ({weights.keys()}) must correspond!"

# finalize the shapes weights
weights_f = {}
for k, w in weights.items():
w = np.array(w)
if len(w.shape) == 1:
w = np.tile(np.atleast_2d(w).T, reps=(1, len(true_data.state_names)))
weights_f[k] = w

absdiff = (prediction_data - true_data).abs()
end_errors = np.array(
[norm(weights_f[n] * s.states, axis=1) for n, s in absdiff._trajs.items()]
)
return np.sum(np.concatenate(end_errors, axis=0))

@staticmethod
def end_point_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData):
errors = (prediction_data - true_data).norm()
end_errors = np.array([s.states[-1] for s in errors])
return np.mean(end_errors)

@staticmethod
def total_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData):
errors = (prediction_data - true_data).norm()
end_errors = np.array([s.states.flatten() for s in errors])
return np.mean(np.concatenate(end_errors, axis=0))

@staticmethod
def relative_score(true_data: TrajectoriesData, prediction_data: TrajectoriesData):
# TODO: implement this
err_term = []
for k in prediction_data.traj_names:
pred_t = prediction_data[k]
true_t = true_data[k]
abs_error = np.linalg.norm(pred_t.states - true_t.states)
mean_error = np.linalg.norm(pred_t.states - np.mean(pred_t.states, axis=0))
err_term.append(abs_error / mean_error)
return np.mean(np.array(err_term))
13 changes: 13 additions & 0 deletions autokoopman/core/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,16 @@ def norm(self, axis=1) -> "Trajectory":
threshold=self._threshold,
)

def abs(self) -> "Trajectory":
return Trajectory(
self.times,
np.abs(self.states),
None,
state_names=self.names,
input_names=None,
threshold=self._threshold,
)


class UniformTimeTrajectory(Trajectory):
"""uniform time is a trajectory advanced by a sampling period"""
Expand Down Expand Up @@ -491,6 +501,9 @@ def __add__(self, other: "TrajectoriesData"):
def norm(self):
return TrajectoriesData({k: v.norm() for k, v in self._trajs.items()})

def abs(self):
return TrajectoriesData({k: v.abs() for k, v in self._trajs.items()})


class UniformTimeTrajectoriesData(TrajectoriesData):
"""a dataset of uniform time trajectories"""
Expand Down
Loading

0 comments on commit 4a8a327

Please sign in to comment.