diff --git a/Julia/Manifest.toml b/Julia/Manifest.toml index 77309a3..1434449 100644 --- a/Julia/Manifest.toml +++ b/Julia/Manifest.toml @@ -2,13 +2,19 @@ julia_version = "1.10.3" manifest_format = "2.0" -project_hash = "27831aa37d3f7798daa210cbedbee49af6d6a89c" +project_hash = "67387cdd1a478cc1fcb3c008519a364575bf3c4a" [[deps.ANSIColoredPrinters]] git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" version = "0.0.1" +[[deps.ASL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6252039f98492252f9e47c312c8ffda0e3b9e78d" +uuid = "ae81ac8f-d209-56e5-92de-9978fef736f9" +version = "0.1.3+0" + [[deps.AbstractTrees]] git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" @@ -125,6 +131,12 @@ git-tree-sha1 = "70232f82ffaab9dc52585e0dd043b5e0c6b714f1" uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" version = "0.1.12" +[[deps.CodecBzip2]] +deps = ["Bzip2_jll", "Libdl", "TranscodingStreams"] +git-tree-sha1 = "9b1ca1aa6ce3f71b3d1840c538a8210a043625eb" +uuid = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +version = "0.8.2" + [[deps.CodecZlib]] deps = ["TranscodingStreams", "Zlib_jll"] git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73" @@ -472,6 +484,12 @@ git-tree-sha1 = "eb8fed28f4994600e29beef49744639d985a04b2" uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" version = "0.1.16" +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "ca0f6bf568b4bfc807e7537f081c81e35ceca114" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.10.0+0" + [[deps.HypergeometricFunctions]] deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685" @@ -499,6 +517,18 @@ git-tree-sha1 = "b7bc05649af456efc75d178846f47006c2c4c3c7" uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" version = "0.13.6" +[[deps.Ipopt]] +deps = ["Ipopt_jll", "LinearAlgebra", "MathOptInterface", "OpenBLAS32_jll", "PrecompileTools"] +git-tree-sha1 = "3cb39c81e713245d61de9d2aafa53489e3ab6e9b" +uuid = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +version = "1.6.2" + +[[deps.Ipopt_jll]] +deps = ["ASL_jll", "Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "MUMPS_seq_jll", "SPRAL_jll", "libblastrampoline_jll"] +git-tree-sha1 = "546c40fd3718c65d48296dd6cec98af9904e3ca4" +uuid = "9cc047cb-c261-5740-88fc-0cf96f7bdcc7" +version = "300.1400.1400+0" + [[deps.IrrationalConstants]] git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" @@ -528,6 +558,18 @@ git-tree-sha1 = "3336abae9a713d2210bb57ab484b1e065edd7d23" uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" version = "3.0.2+0" +[[deps.JuMP]] +deps = ["LinearAlgebra", "MacroTools", "MathOptInterface", "MutableArithmetics", "OrderedCollections", "PrecompileTools", "Printf", "SparseArrays"] +git-tree-sha1 = "07385c772da34d91fc55d6c704b6224302082ba0" +uuid = "4076af6c-e467-56ae-b986-b466b2749572" +version = "1.21.1" + + [deps.JuMP.extensions] + JuMPDimensionalDataExt = "DimensionalData" + + [deps.JuMP.weakdeps] + DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" + [[deps.LAME_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c" @@ -697,6 +739,18 @@ weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] SpecialFunctionsExt = "SpecialFunctions" +[[deps.METIS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "1fd0a97409e418b78c53fac671cf4622efdf0f21" +uuid = "d00139f3-1899-568f-a2f0-47f597d42d70" +version = "5.1.2+0" + +[[deps.MUMPS_seq_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "METIS_jll", "libblastrampoline_jll"] +git-tree-sha1 = "840b83c65b27e308095c139a457373850b2f5977" +uuid = "d7ed1dd3-d0ae-5e8e-bfb4-87a502085b8d" +version = "500.600.201+0" + [[deps.MacroTools]] deps = ["Markdown", "Random"] git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" @@ -718,6 +772,12 @@ git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" version = "0.1.2" +[[deps.MathOptInterface]] +deps = ["BenchmarkTools", "CodecBzip2", "CodecZlib", "DataStructures", "ForwardDiff", "JSON", "LinearAlgebra", "MutableArithmetics", "NaNMath", "OrderedCollections", "PrecompileTools", "Printf", "SparseArrays", "SpecialFunctions", "Test", "Unicode"] +git-tree-sha1 = "9cc5acd6b76174da7503d1de3a6f8cf639b6e5cb" +uuid = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +version = "1.29.0" + [[deps.MbedTLS]] deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" @@ -747,6 +807,12 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" uuid = "14a3606d-f60d-562e-9121-12d972cd8159" version = "2023.1.10" +[[deps.MutableArithmetics]] +deps = ["LinearAlgebra", "SparseArrays", "Test"] +git-tree-sha1 = "a3589efe0005fc4718775d8641b2de9060d23f73" +uuid = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" +version = "1.4.4" + [[deps.NaNMath]] deps = ["OpenLibm_jll"] git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" @@ -786,6 +852,12 @@ git-tree-sha1 = "887579a3eb005446d514ab7aeac5d1d027658b8f" uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" version = "1.3.5+1" +[[deps.OpenBLAS32_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6065c4cff8fee6c6770b277af45d5082baacdba1" +uuid = "656ef2d0-ae68-5445-9ca0-591084a874a2" +version = "0.3.24+0" + [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" @@ -1032,6 +1104,12 @@ git-tree-sha1 = "3aac6d68c5e57449f5b9b865c9ba50ac2970c4cf" uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" version = "0.6.42" +[[deps.SPRAL_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "Libdl", "METIS_jll", "libblastrampoline_jll"] +git-tree-sha1 = "34b9dacd687cace8aa4d550e3e9bb8615f1a61e9" +uuid = "319450e9-13b8-58e8-aa9f-8fd1420848ab" +version = "2024.1.18+0" + [[deps.Scratch]] deps = ["Dates"] git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" diff --git a/Julia/Project.toml b/Julia/Project.toml index 0ba4acc..2e268b6 100644 --- a/Julia/Project.toml +++ b/Julia/Project.toml @@ -9,6 +9,8 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -26,10 +28,12 @@ Distributions = "0.25.107" Documenter = "1.4.1" FiniteDiff = "2.23.0" ForwardDiff = "0.10.36" +Ipopt = "1.6.2" +JuMP = "1.21.1" Plots = "1.40.2" RobotDynamics = "=0.4.7" SpecialFunctions = "2.3.1" StaticArrays = "1.9.3" Statistics = "1.10.0" StatsBase = "0.34.3" -TrajectoryOptimization = "=0.7.0" \ No newline at end of file +TrajectoryOptimization = "=0.7.0" diff --git a/Julia/README.md b/Julia/README.md index 86dd36d..a49e016 100644 --- a/Julia/README.md +++ b/Julia/README.md @@ -1,9 +1,11 @@ # Julia [![Dev](https://img.shields.io/badge/docs-stable-blue?logo=Julia&logoColor=white)](https://TUM-ITR.github.io/PGopt) -This folder contains the Julia implementation of `PGopt`, which does not require proprietary software. The open-source solver [Altro](https://github.com/RoboticExplorationLab/Altro.jl) is used for the optimization. The results presented in the paper were generated with this version, and the software reproduces these results exactly. +This folder contains the Julia implementation of `PGopt`. In this version, the solvers [Altro](https://github.com/RoboticExplorationLab/Altro.jl) and [IPOPT](https://coin-or.github.io/Ipopt/) can be employed to solve the optimal control problem (OCP). -**Please note that this version has several limitations: only cost functions of the form $J_H=\sum\nolimits_{t=0}^H \frac{1}{2} u_t R u_t$, measurement functions of the form $y=x_{1:n_y}$, and output constraints of the form $y_\mathrm{min} \leq y \leq y_\mathrm{max}$ are supported.** +The results presented in the paper were generated with the solver Altro. Please note that this implementation has several limitations: only cost functions of the form $J_H=\sum\nolimits_{t=0}^H \frac{1}{2} u_t R u_t$, measurement functions of the form $y=x_{1:n_y}$, and output constraints of the form $y_\mathrm{min} \leq y \leq y_\mathrm{max}$ are supported. + +The solver IPOPT is more general than Altro, and this implementation allows arbitrary cost functions $J_H(u_{1:H},x_{1:H},y_{1:H})$, measurement functions $y=g(x,u)$, and constraints $h(u_{1:H},x_{1:H},y_{1:H})$. However, using IPOPT together with the proprietary [HSL Linear Solvers for Julia](https://licences.stfc.ac.uk/product/libhsl) (`HSL_jll.jl`) is recommended. A license (free to academics) is required. To execute the code, install [Julia](https://julialang.org/). Navigate to this folder, start a Pkg REPL (press `]` in a Julia REPL), and install the dependencies via ``` @@ -11,7 +13,7 @@ pkg>activate . pkg>instantiate ``` -Then execute the scripts `autocorrelation.jl`, `PG_OCP_known_basis_functions.jl`, or `PG_generic_basis_functions.jl` in the folder `examples`. +Then execute the example scripts in the folder `examples.` Tested with Windows 11 and Julia 1.10. @@ -22,18 +24,18 @@ The following examples can be found in the folder [examples](examples). This script reproduces the normalized auto-correlation function plot (Figure 1) in Section V-B of the paper.
- +
Assuming knowledge of the basis functions, samples are drawn from the posterior distribution over model parameters and latent state trajectories using the function `particle_Gibbs()` without thinning. Afterward, the autocorrelation is plotted using the function `plot_autocorrelation()`. The runtime of the script is about 10 minutes on a standard laptop. -### PG_OCP_known_basis_functions.jl -This script reproduces the results of the optimal control approach with known basis functions (Figure 2) given in Section V-B of the paper. +### PG_OCP_known_basis_functions_Altro.jl +This script reproduces the results of the optimal control approach with known basis functions (Figure 2) given in Section V-B of the paper using the solver Altro.- +
``` @@ -42,51 +44,62 @@ Cardinality of the support sub-sample (s): 7 Max. constraint violation probability (1-epsilon): 10.22 % ``` -First, the algorithm and simulation parameters are defined, and training data is generated. Then, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP_greedy_guarantees()`, which solves the scenario OCP and inferres probabilistic constraint satisfaction guarantees by greedily removing constraints and solving the corresponding reduced OCP. +First, the algorithm and simulation parameters are defined, and training data is generated. Then, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP_greedy_guarantees_Altro()`, which solves the scenario OCP using Altro and inferres probabilistic constraint satisfaction guarantees by greedily removing constraints and solving the corresponding reduced OCP. The runtime of the script is about 2 hours on a standard laptop. For the results in Table II of the paper, this script is repeated with seeds 1:100. -### PG_OCP_generic_basis_functions.jl +### PG_OCP_known_basis_functions_Ipopt.jl +This script is similar to `PG_OCP_known_basis_functions_Altro.jl`, but the solver IPOPT is used instead of Altro. Due to the different solver, the results differ slightly from the ones presented in the paper. + +### PG_OCP_generic_basis_functions_Altro.jl This script reproduces the results of the optimal control approach with generic basis functions (Figure 3) given in Section V-C of the paper.- +
-The method presented in the paper ["A flexible state–space model for learning nonlinear dynamical systems"](https://doi.org/10.1016/j.automatica.2017.02.030) is utilized to systematically derive basis functions and priors for the parameters based on a reduced-rank GP approximation. Afterward, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP()`, which solves the scenario OCP. +The method presented in the paper ["A flexible state–space model for learning nonlinear dynamical systems"](https://doi.org/10.1016/j.automatica.2017.02.030) is utilized to systematically derive basis functions and priors for the parameters based on a reduced-rank GP approximation. Afterward, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP_Altro()`, which solves the scenario OCP using Altro. The runtime of the script is about 2 hours on a standard laptop. Using an improved phi function can reduce the runtime to about 50 minutes, but the results change slightly due to numerical reasons. Further explanations can be found in the source code. For the results in Table IV of the paper, this script is repeated with seeds 1:100. +### PG_OCP_generic_basis_functions_Ipopt.jl +This script is similar to `PG_OCP_generic_basis_functions_Altro.jl`, but the solver IPOPT is used instead of Altro. Due to the different solver, the results differ slightly from the ones presented in the paper. + ## List of source code files The following files in the folder [src](src) contain the PGopt source code. ### PGopt.jl -Defines the module `PGopt`, which contains the functions defined in `particle_Gibbs.jl` and `optimal_control_Altro.jl`. +Defines the module `PGopt`, which contains the functions defined in `particle_Gibbs.jl`, `optimal_control_Altro.jl`, and `optimal_control_Ipopt.jl`. ### particle_Gibbs.jl This file contains the particle Gibbs sampler (function `particle_Gibbs()`) and several auxiliary functions that can, for example, be used to test the predictions. -| Function | Purpose | +| Function | Purpose | |-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------| -| particle_Gibbs() | Run particle Gibbs sampler with ancestor sampling to obtain samples from the joint parameter and state posterior distribution. | -| test_prediction() | Simulate the PGS samples forward in time and compare the predictions to the test data. | -| plot_predictions() | Plot the predictions and the test data. | -| plot_autocorrelation() | Plot the autocorrelation function (ACF) of the PG samples. | -| systematic_resampling() | Helper function for particle_Gibbs(). Sample indices according to the weights. | -| MNIW_sample() | Helper function for particle_Gibbs(). Sample new model parameters from the distribution conditional on the previously sampled state trajectory. | - -Detailed explanations of the individual functions can be found as comments in the file. +| particle_Gibbs() | Run particle Gibbs sampler with ancestor sampling to obtain samples from the joint parameter and state posterior distribution. | +| test_prediction() | Simulate the PGS samples forward in time and compare the predictions to the test data. | +| plot_predictions() | Plot the predictions and the test data. | +| plot_autocorrelation() | Plot the autocorrelation function (ACF) of the PG samples. | +| systematic_resampling() | Helper function for particle_Gibbs(). Sample indices according to the weights. | +| MNIW_sample() | Helper function for particle_Gibbs(). Sample new model parameters from the distribution conditional on the previously sampled state trajectory. | ### optimal_control_Altro.jl This file contains all the functions required to solve the scenario OCP and to derive the theoretical guarantees. The solver Altro is used for the optimization. -| Function | Purpose | +| Function | Purpose | +|-----------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| solve_PG_OCP_Altro() | Solve the scenario optimal control problem using Altro. | +| solve_PG_OCP_Altro_greedy_guarantees() | Solve the scenario optimal control problem using Altro and determine a support sub-sample with cardinality s via a greedy constraint removal. Based on the cardinality s, calculate a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory u_{0:H} is applied to the unknown system. | +| epsilon() | Determine epsilon based on the cardinality s. | +| RobotDynamics.discrete_dynamics() | Helper function for solve_PG_OCP(). | + +### optimal_control_Ipopt.jl +This file contains all the functions required to solve the scenario OCP and to derive the theoretical guarantees. The solver IPOPT is used for the optimization. +| Function | Purpose | |-----------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| solve_PG_OCP() | Solve the scenario optimal control problem. | -| solve_PG_OCP_greedy_guarantees() | Solve the scenario optimal control problem and determine a support sub-sample with cardinality s via a greedy constraint removal. Based on the cardinality s, calculate a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory u_{0:H} is applied to the unknown system. | -| epsilon() | Determine epsilon based on the cardinality s. | -| RobotDynamics.discrete_dynamics() | Helper function for solve_PG_OCP(). | +| solve_PG_OCP_Ipopt() | Solve the scenario optimal control problem using IPOPT. | +| solve_PG_OCP_Ipopt_greedy_guarantees() | Solve the scenario optimal control problem using IPOPT and determine a support sub-sample with cardinality s via a greedy constraint removal. Based on the cardinality s, calculate a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory u_{0:H} is applied to the unknown system. | Detailed explanations of the individual functions can be found as comments in the file. \ No newline at end of file diff --git a/Julia/docs/src/examples/PG_OCP_generic_basis_functions.md b/Julia/docs/src/examples/PG_OCP_generic_basis_functions.md index c48cac3..25d748a 100644 --- a/Julia/docs/src/examples/PG_OCP_generic_basis_functions.md +++ b/Julia/docs/src/examples/PG_OCP_generic_basis_functions.md @@ -4,13 +4,11 @@ This example reproduces the results of the optimal control approach with generic ![autocorrelation](../assets/PG_OCP_generic_basis_functions.svg) -The method presented in the paper ["A flexible state–space model for learning nonlinear dynamical systems"](https://doi.org/10.1016/j.automatica.2017.02.030) is utilized to systematically derive basis functions and priors for the parameters based on a reduced-rank GP approximation. Afterward, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP()`, which solves the scenario OCP. +The method presented in the paper ["A flexible state–space model for learning nonlinear dynamical systems"](https://doi.org/10.1016/j.automatica.2017.02.030) is utilized to systematically derive basis functions and priors for the parameters based on a reduced-rank GP approximation. Afterward, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP_Altro()` (or `solve_PG_OCP_Ipopt()` in case IPOPT is used), which solves the scenario OCP using the solver Altro. -For the results in Table IV of the paper, this script is repeated with seeds 1:100. +A Julia script that contains all the steps described in the following and exactly reproduces Figure 2 of the paper can be found at `PGopt/Julia/examples/PG_OCP_generic_basis_functions_Altro.jl`. For the results in Table IV of the paper, this script is repeated with seeds 1:100. The runtime of the script is about 2 hours on a standard laptop. Using an improved function `phi()`, can reduce the runtime to about 50 minutes, but the results change slightly due to numerical reasons. Further explanations are given below. -A Julia script that contains all the steps described here can be found in the folder `PGopt/Julia/examples`. - -The runtime of the script is about 2 hours on a standard laptop. Using an improved function `phi()` can reduce the runtime to about 50 minutes, but the results change slightly due to numerical reasons. Further explanations are given below. +A similar example that utilizes the solver IPOPT can be found at `PGopt/Julia/examples/PG_OCP_generic_basis_functions_Ipopt.jl`. Due to the different solver, the results differ slightly from the ones presented in the paper. ## Define parameters First, load packages and initialize. @@ -68,7 +66,7 @@ In the following, all possible vectors ``j`` are constructed (i.e., `j_vec`). Th ```julia cart_prod_sets = Array{Any}(undef, n_z) # array of arrays; cart_prod_sets[i] corresponds to the i-th set to be considered for the Cartesian product, i.e., [1 : n_basis[i]]. for i = 1:n_z - cart_prod_sets[i] = Array(1:n_phi_dims[i]) + cart_prod_sets[i] = Array(1:n_phi_dims[i]) end subscript_values = Array{Int64}(undef, n_z) # contains the equivalent subscript values corresponding to a given single index i @@ -76,20 +74,20 @@ variants = [1; cumprod(n_phi_dims[1:end-1])] # required to convert the single in # Construct Cartesian product and calculate spectral densities. for i in 1:n_phi - # Convert the single index i to the equivalent subscript values. - remaining = i - 1 - for j in n_z:-1:1 - subscript_values[j] = floor(remaining / variants[j]) + 1 - remaining = mod(remaining, variants[j]) - end - - # Fill j_vec with the values belonging to the respective subscript indices. - for j in 1:n_z - j_vec[i, 1, j] = cart_prod_sets[j][subscript_values[j]] - end - - # Calculate the eigenvalue of the Laplace operator corresponding to the vector j_vec[i, 1, :] - see eq. (9) (right-hand side). - lambda[i, :] = (pi .* j_vec[i, 1, :] ./ (2 * dropdims(L, dims=tuple(findall(size(L) .== 1)...)))) .^ 2 + # Convert the single index i to the equivalent subscript values. + remaining = i - 1 + for j in n_z:-1:1 + subscript_values[j] = floor(remaining / variants[j]) + 1 + remaining = mod(remaining, variants[j]) + end + + # Fill j_vec with the values belonging to the respective subscript indices. + for j in 1:n_z + j_vec[i, 1, j] = cart_prod_sets[j][subscript_values[j]] + end + + # Calculate the eigenvalue of the Laplace operator corresponding to the vector j_vec[i, 1, :] - see eq. (9) (right-hand side). + lambda[i, :] = (pi .* j_vec[i, 1, :] ./ (2 * dropdims(L, dims=tuple(findall(size(L) .== 1)...)))) .^ 2 end # Reverse j_vec. @@ -101,18 +99,18 @@ There are two implementations of the phi function. This implementation is the original implementation, which is slow but exactly reproduces the results given in the paper. ```julia function phi_sampling(x, u) - # Initialize. - z = vcat(u, x) # augmented state - phi = Array{Float64}(undef, n_phi, size(z, 2)) - - Threads.@threads for i in axes(z, 2) - phi_temp = ones(n_phi) - for k in axes(z, 1) - phi_temp .= phi_temp .* ((1 ./ (sqrt.(L[:, :, k]))) .* sin.((pi .* j_vec[:, :, k] .* (z[k, i] .+ L[:, :, k])) ./ (2 .* L[:, :, k]))) - end - phi[:, i] .= phi_temp - end - return phi + # Initialize. + z = vcat(u, x) # augmented state + phi = Array{Float64}(undef, n_phi, size(z, 2)) + + Threads.@threads for i in axes(z, 2) + phi_temp = ones(n_phi) + for k in axes(z, 1) + phi_temp .= phi_temp .* ((1 ./ (sqrt.(L[:, :, k]))) .* sin.((pi .* j_vec[:, :, k] .* (z[k, i] .+ L[:, :, k])) ./ (2 .* L[:, :, k]))) + end + phi[:, i] .= phi_temp + end + return phi end ``` ### Efficient implementation @@ -124,15 +122,15 @@ This is the improved implementation, which is significantly faster but yields sl L_sqrt_inv = 1 ./ sqrt.(L) pi_j_over_2L = pi .* j_vec ./ (2 .* L) function phi_sampling(x, u) - # Initialize. - z = vcat(u, x) # augmented state - phi = ones(n_phi, size(z, 2)) + # Initialize. + z = vcat(u, x) # augmented state + phi = ones(n_phi, size(z, 2)) - for k in axes(z, 1) - phi .= phi .* (L_sqrt_inv[:, :, k] .* sin.(pi_j_over_2L[:, :, k] * (z[k, :] .+ L[:, :, k])')) - end + for k in axes(z, 1) + phi .= phi .* (L_sqrt_inv[:, :, k] .* sin.(pi_j_over_2L[:, :, k] * (z[k, :] .+ L[:, :, k])')) + end - return phi + return phi end ``` ## Define prior @@ -146,7 +144,7 @@ Determine the parameters of the matrix normal prior (with mean matrix ``0``, rig ```julia V_diagonal = Array{Float64}(undef, size(lambda, 1)) # diagonal of V for i in axes(lambda, 1) - V_diagonal[i] = sf^2 * sqrt(opnorm(2 * pi * Diagonal(repeat(l, trunc(Int, n_z / size(l, 1))) .^ 2))) * exp.(-(pi^2 * transpose(sqrt.(lambda[i, :])) * Diagonal(repeat(l, trunc(Int, n_z / size(l, 1))) .^ 2) * sqrt.(lambda[i, :])) / 2) + V_diagonal[i] = sf^2 * sqrt(opnorm(2 * pi * Diagonal(repeat(l, trunc(Int, n_z / size(l, 1))) .^ 2))) * exp.(-(pi^2 * transpose(sqrt.(lambda[i, :])) * Diagonal(repeat(l, trunc(Int, n_z / size(l, 1))) .^ 2) * sqrt.(lambda[i, :])) / 2) end V = Diagonal(V_diagonal) ``` @@ -171,7 +169,7 @@ Generate training data. ```julia # Parameters for data generation T = 2000 # number of steps for training -T_test = 500 # number of steps used for testing (via forward simulation - see below) +T_test = 500 # number of steps used for testing (via forward simulation - see below) T_all = T + T_test # Unknown system @@ -193,8 +191,8 @@ x = Array{Float64}(undef, n_x, T_all + 1) # true latent state trajectory x[:, 1] = rand(x_init_dist, 1) # random initial state y = Array{Float64}(undef, n_y, T_all) # output trajectory (measured) for t in 1:T_all - x[:, t+1] = f_true(x[:, t], u[:, t]) + rand(mvn_v_true, 1) - y[:, t] = g_true(x[:, t], u[:, t]) + rand(mvn_e_true, 1) + x[:, t+1] = f_true(x[:, t], u[:, t]) + rand(mvn_v_true, 1) + y[:, t] = g_true(x[:, t], u[:, t]) + rand(mvn_e_true, 1) end # Split data into training and test data. @@ -213,10 +211,11 @@ PG_samples = particle_Gibbs(u_training, y_training, K, K_b, k_d, N, phi, Lambda_ time_sampling = time() - sampling_timer ``` -## Define and solve optimal control problem -Afterward, define the optimal control problem of the form +## Define and solve optimal control problem using Altro +In the following, the optimal control problem is defined and solved using the solver Altro. An example using the solver IPOPT is given in the next section. +With the Altro solver, problems of the following form can be solved -``\min \sum_{t=0}^{H} \frac{1}{2} u_t \operatorname{diag}(R_{\mathrm{cost}}) u_t`` +``\min \sum_{t=0}^{H} \frac{1}{2} u_t \operatorname{diag}(R_{\mathrm{cost}}) u_t`` subject to: ```math @@ -243,9 +242,153 @@ y_min = reshape([-fill(Inf, 20); 2 * ones(6); -fill(Inf, 15)], (1, H)) # min sys R_cost_diag = [2] # diagonal of R_cost ``` -Solve the optimal control problem. In this case, no formal guarantees for the constraint satisfaction can be derived since Assumption 1 is not satisfied as the employed basis functions cannot represent the actual dynamics with arbitrary precision. +Solve the optimal control problem using the solver Altro. In this case, no formal guarantees for the constraint satisfaction can be derived since Assumption 1 is not satisfied as the employed basis functions cannot represent the actual dynamics with arbitrary precision. +```julia +u_opt, x_opt, y_opt, J_opt, penalty_max = solve_PG_OCP_Altro(PG_samples, phi_opt, R, H, u_min, u_max, y_min, y_max, R_cost_diag; K_pre_solve=20, opts=opts)[[1, 2, 3, 4, 8]] +``` +Finally, apply the input trajectory to the actual system and plot the output trajectories. +```julia +# Apply input trajectory to the actual system. +y_sys = Array{Float64}(undef, n_y, H) +x_sys = Array{Float64}(undef, n_x, H) +x_sys[:, 1] = x_training[:, end] +u_sys = [u_opt 0] +for t in 1:H + if t >= 2 + x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) + end + y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) +end + +# Plot predictions. +plot_predictions(y_opt, y_sys; plot_percentiles=false, y_min=y_min, y_max=y_max) +``` + +## Define and solve optimal control problem using Ipopt +Besides the solver Altro, IPOPT can be used to solve the OCP. The solver IPOPT is more general than Altro, and this implementation allows arbitrary cost functions ``J_H(u_{1:H},x_{1:H},y_{1:H})``, measurement functions ``y=g(x,u)``, and constraints ``h(u_{1:H},x_{1:H},y_{1:H})``. + +First, load the necessary packages. Further information on how to install `HSL_jll` can be found [here](../index.md). +```julia +using JuMP +import HSL_jll +``` +Then, set up the OCP. +```julia +# Set up OCP. +# Horizon +H = 41 + +# Define constraints for u. +u_max = repeat([5], 1, H) # max control input +u_min = repeat([-5], 1, H) # min control input +n_input_const = sum(isfinite.(u_min)) + sum(isfinite.(u_max)) + +# Define constraints for y. +y_max = reshape(fill(Inf, H), (1, H)) # max system output +y_min = reshape([-fill(Inf, 20); 2 * ones(6); -fill(Inf, 15)], (1, H)) # min system output +n_output_const = sum(isfinite.(y_min)) + sum(isfinite.(y_max)) +``` +The following functions define the input and output constraints. These functions return the constraint vectors ``h_1(u_{1:H})`` (`bounded_input()`) and ``h_2(u_{1:H},x_{1:H}^[k],y_{1:H}^[k])`` (`bounded_output()`). Feasible solutions must satisfy ``h_1(u_{1:H}) \leq 0`` and ``h_2(u_{1:H},x_{1:H}^[k],y_{1:H}^[k]) \leq 0 \; \forall k``. The functions should be callable with arrays of type `VariableRef` and `<:Number`. +```julia +function bounded_input(u::Array{VariableRef}) + # Initialize constraint vector. + h_u = Array{AffExpr}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +function bounded_input(u::Array{<:Number}) + # Initialize constraint vector. + h_u = Array{Float64}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +function bounded_output(u::Array{VariableRef}, x::Array{VariableRef}, y::Array{VariableRef}) + # Initialize constraint vector. + h_scenario = Array{AffExpr}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end + +function bounded_output(u::Array{<:Number}, x::Array{<:Number}, y::Array{<:Number}) + # Initialize constraint vector. + h_scenario = Array{Float64}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end +``` +Define the cost function. In this case the objective is ``\min \sum_{t=0}^{H} u_t^2``. +```julia +function cost_function(u) + cost = sum(u.^2) + return cost +end +``` +Then, set the solver's options. The option `"linear_solver" => "ma57"` requires the proprietary [HSL Linear Solvers for Julia](https://licences.stfc.ac.uk/product/libhsl). +```julia +Ipopt_options = Dict("max_iter" => 10000, "tol" => 1e-8, "hsllib" => HSL_jll.libhsl_path, "linear_solver" => "ma57") +``` +Solve the optimal control problem using the solver IPOPT. In this case, no formal guarantees for the constraint satisfaction can be derived since Assumption 1 is not satisfied as the employed basis functions cannot represent the actual dynamics with arbitrary precision. +Since the cost function depends only on the control inputs ``u_{0:H}``, the optional argument `J_u` is set to `true`. ```julia -x_opt, u_opt, y_opt, J_opt, penalty_max = solve_PG_OCP(PG_samples, phi_opt, R, H, u_min, u_max, y_min, y_max, R_cost_diag; K_pre_solve=20, opts=opts)[[1, 2, 3, 4, 8]] +u_opt, x_opt, y_opt, J_opt, solve_successful, iterations, mu = solve_PG_OCP_Ipopt(PG_samples, phi, g, R, H, cost_function, bounded_output, bounded_input; J_u=true, K_pre_solve=20, solver_opts=Ipopt_options) ``` Finally, apply the input trajectory to the actual system and plot the output trajectories. ```julia @@ -255,10 +398,10 @@ x_sys = Array{Float64}(undef, n_x, H) x_sys[:, 1] = x_training[:, end] u_sys = [u_opt 0] for t in 1:H - if t >= 2 - x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) - end - y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) + if t >= 2 + x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) + end + y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) end # Plot predictions. diff --git a/Julia/docs/src/examples/PG_OCP_known_basis_functions.md b/Julia/docs/src/examples/PG_OCP_known_basis_functions.md index 3f7bd35..6084d27 100644 --- a/Julia/docs/src/examples/PG_OCP_known_basis_functions.md +++ b/Julia/docs/src/examples/PG_OCP_known_basis_functions.md @@ -10,13 +10,11 @@ Cardinality of the support sub-sample (s): 7 Max. constraint violation probability (1-epsilon): 10.22 % ``` -First, the algorithm and simulation parameters are defined, and training data is generated. Then, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP_greedy_guarantees()`, which solves the scenario OCP and inferres probabilistic constraint satisfaction guarantees by greedily removing constraints and solving the corresponding reduced OCP. +First, the algorithm and simulation parameters are defined, and training data is generated. Then, by calling the function `particle_Gibbs()`, samples are drawn from the posterior distribution using particle Gibbs sampling. These samples are then passed to the function `solve_PG_OCP_Altro_greedy_guarantees()` (or `solve_PG_OCP_Ipopt_greedy_guarantees()` in case IPOPT is used), which solves the scenario OCP using the solver Altro and inferres probabilistic constraint satisfaction guarantees by greedily removing constraints and solving the corresponding reduced OCP. -For the results in Table II of the paper, this script is repeated with seeds 1:100. +A Julia script that contains all the steps described in the following and exactly reproduces Figure 2 of the paper can be found at `PGopt/Julia/examples/PG_OCP_known_basis_functions_Altro.jl`. For the results in Table II of the paper, this script is repeated with seeds 1:100. The runtime of the script is about 2 hours on a standard laptop. -A Julia script that contains all the steps described here can be found in the folder `PGopt/Julia/examples`. - -The runtime of the script is about 2 hours on a standard laptop. +A similar example that utilizes the solver IPOPT can be found at `PGopt/Julia/examples/PG_OCP_known_basis_functions_Ipopt.jl`. Due to the different solver, the results differ slightly from the ones presented in the paper. ## Define prior and parameters First, load packages and initialize. @@ -82,7 +80,7 @@ Generate training data. ```julia # Parameters for data generation T = 2000 # number of steps for training -T_test = 500 # number of steps used for testing (via forward simulation - see below) +T_test = 500 # number of steps used for testing (via forward simulation - see below) T_all = T + T_test # Unknown system @@ -104,8 +102,8 @@ x = Array{Float64}(undef, n_x, T_all + 1) # true latent state trajectory x[:, 1] = rand(x_init_dist, 1) # random initial state y = Array{Float64}(undef, n_y, T_all) # output trajectory (measured) for t in 1:T_all - x[:, t+1] = f_true(x[:, t], u[:, t]) + rand(mvn_v_true, 1) - y[:, t] = g_true(x[:, t], u[:, t]) + rand(mvn_e_true, 1) + x[:, t+1] = f_true(x[:, t], u[:, t]) + rand(mvn_v_true, 1) + y[:, t] = g_true(x[:, t], u[:, t]) + rand(mvn_e_true, 1) end # Split data into training and test data. @@ -124,10 +122,11 @@ PG_samples = particle_Gibbs(u_training, y_training, K, K_b, k_d, N, phi, Lambda_ time_sampling = time() - sampling_timer ``` -## Define and solve optimal control problem -Afterward, define the optimal control problem of the form +## Define and solve optimal control problem using Altro +In the following, the optimal control problem is defined and solved using the solver Altro. An example using the solver IPOPT is given in the next section. +With the Altro solver, problems of the following form can be solved -``\min \sum_{t=0}^{H} \frac{1}{2} u_t \operatorname{diag}(R_{\mathrm{cost}}) u_t`` +``\min \sum_{t=0}^{H} \frac{1}{2} u_t \operatorname{diag}(R_{\mathrm{cost}}) u_t`` subject to: ```math @@ -158,10 +157,159 @@ Select the confidence parameter for the theoretical guarantees. ```julia β = 0.01 ``` -Solve the optimal control problem and determine a support sub-sample with cardinality ``s`` via a greedy constraint removal. +Solve the optimal control problem using the solver Altro and determine a support sub-sample with cardinality ``s`` via a greedy constraint removal. +Based on the cardinality ``s``, a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory ``u_{0:H}`` is applied to the unknown system is calculated (i.e., ``1-\epsilon`` is determined). +```julia +u_opt, x_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations = solve_PG_OCP_Altro_greedy_guarantees(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; K_pre_solve=20) +``` +Finally, apply the input trajectory to the actual system and plot the output trajectories. +```julia +# Apply input trajectory to the actual system. +y_sys = Array{Float64}(undef, n_y, H) +x_sys = Array{Float64}(undef, n_x, H) +x_sys[:, 1] = x_training[:, end] +u_sys = [u_opt 0] +for t in 1:H + if t >= 2 + x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) + end + y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) +end + +# Plot predictions. +plot_predictions(y_opt, y_sys; plot_percentiles=false, y_min=y_min, y_max=y_max) +``` + +## Define and solve optimal control problem using Ipopt +Besides the solver Altro, IPOPT can be used to solve the OCP. The solver IPOPT is more general than Altro, and this implementation allows arbitrary cost functions ``J_H(u_{1:H},x_{1:H},y_{1:H})``, measurement functions ``y=g(x,u)``, and constraints ``h(u_{1:H},x_{1:H},y_{1:H})``. + +First, load the necessary packages. +```julia +using JuMP +import HSL_jll # requires a proprietary license +``` +Then, set up the OCP. +```julia +# Set up OCP. +# Horizon +H = 41 + +# Define constraints for u. +u_max = repeat([5], 1, H) # max control input +u_min = repeat([-5], 1, H) # min control input +n_input_const = sum(isfinite.(u_min)) + sum(isfinite.(u_max)) + +# Define constraints for y. +y_max = reshape(fill(Inf, H), (1, H)) # max system output +y_min = reshape([-fill(Inf, 20); 2 * ones(6); -fill(Inf, 15)], (1, H)) # min system output +n_output_const = sum(isfinite.(y_min)) + sum(isfinite.(y_max)) +``` +The following functions define the input and output constraints. These functions return the constraint vectors ``h_1(u_{1:H})`` (`bounded_input()`) and ``h_2(u_{1:H},x_{1:H}^[k],y_{1:H}^[k])`` (`bounded_output()`). Feasible solutions must satisfy ``h_1(u_{1:H}) \leq 0`` and ``h_2(u_{1:H},x_{1:H}^[k],y_{1:H}^[k]) \leq 0 \; \forall k``. The functions should be callable with arrays of type `VariableRef` and `<:Number`. +```julia +function bounded_input(u::Array{VariableRef}) + # Initialize constraint vector. + h_u = Array{AffExpr}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +function bounded_input(u::Array{<:Number}) + # Initialize constraint vector. + h_u = Array{Float64}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +function bounded_output(u::Array{VariableRef}, x::Array{VariableRef}, y::Array{VariableRef}) + # Initialize constraint vector. + h_scenario = Array{AffExpr}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end + +function bounded_output(u::Array{<:Number}, x::Array{<:Number}, y::Array{<:Number}) + # Initialize constraint vector. + h_scenario = Array{Float64}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end +``` +Define the cost function. In this case the objective is ``\min \sum_{t=0}^{H} u_t^2``. +```julia +function cost_function(u) + cost = sum(u.^2) + return cost +end +``` +Then, set the solver's options. The option `"linear_solver" => "ma57"` requires the proprietary [HSL Linear Solvers for Julia](https://licences.stfc.ac.uk/product/libhsl). +```julia +Ipopt_options = Dict("max_iter" => 10000, "tol" => 1e-8, "hsllib" => HSL_jll.libhsl_path, "linear_solver" => "ma57") +``` +Select the confidence parameter for the theoretical guarantees. +```julia +β = 0.01 +``` +Solve the optimal control problem using the solver IPOPT and determine a support sub-sample with cardinality ``s`` via a greedy constraint removal. Based on the cardinality ``s``, a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory ``u_{0:H}`` is applied to the unknown system is calculated (i.e., ``1-\epsilon`` is determined). +Since the cost function depends only on the control inputs ``u_{0:H}``, the optional argument `J_u` is set to `true`. ```julia -x_opt, u_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations = solve_PG_OCP_greedy_guarantees(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; K_pre_solve=20) +u_opt, x_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations = solve_PG_OCP_Ipopt_greedy_guarantees(PG_samples, phi, g, R, H, cost_function, bounded_output, bounded_input, β; J_u=true, K_pre_solve=5, solver_opts=copy(Ipopt_options)) ``` Finally, apply the input trajectory to the actual system and plot the output trajectories. ```julia @@ -171,10 +319,10 @@ x_sys = Array{Float64}(undef, n_x, H) x_sys[:, 1] = x_training[:, end] u_sys = [u_opt 0] for t in 1:H - if t >= 2 - x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) - end - y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) + if t >= 2 + x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) + end + y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) end # Plot predictions. diff --git a/Julia/docs/src/examples/autocorrelation.md b/Julia/docs/src/examples/autocorrelation.md index 6ec6de5..2384fa8 100644 --- a/Julia/docs/src/examples/autocorrelation.md +++ b/Julia/docs/src/examples/autocorrelation.md @@ -6,7 +6,7 @@ This example reproduces the normalized auto-correlation function plot (Figure 1) Assuming knowledge of the basis functions, samples are drawn from the posterior distribution over model parameters and latent state trajectories using the function `particle_Gibbs()` without thinning. Afterward, the autocorrelation is plotted using the function `plot_autocorrelation()`. -A Julia script that contains all the steps described here can be found in the folder `PGopt/Julia/examples`. +A Julia script that contains all the steps described here can be found at `PGopt/Julia/examples/autocorrelation.jl`. The runtime of the script is about 10 minutes on a standard laptop. diff --git a/Julia/docs/src/index.md b/Julia/docs/src/index.md index 321dd95..4866feb 100644 --- a/Julia/docs/src/index.md +++ b/Julia/docs/src/index.md @@ -5,16 +5,18 @@ The approach is explained in the paper "Learning-Based Optimal Control with Performance Guarantees for Unknown Systems with Latent States", available as a preprint on [arXiv](https://arxiv.org/abs/2303.17963). ## Versions -This document describes the Julia implementation of `PGopt`, which does not require proprietary software. The open-source solver [Altro](https://github.com/RoboticExplorationLab/Altro.jl) is used for the optimization. The results presented in the paper were generated with this version, and the software reproduces these results exactly. +This document describes the Julia implementation of `PGopt`. In this version, the solvers [Altro](https://github.com/RoboticExplorationLab/Altro.jl) and [IPOPT](https://coin-or.github.io/Ipopt/) can be employed to solve the optimal control problem (OCP). -**Please note that this version has several limitations: only cost functions of the form $J_H=\sum\nolimits_{t=0}^H \frac{1}{2} u_t R u_t$, measurement functions of the form $y=x_{1:n_y}$, and output constraints of the form $y_\mathrm{min} \leq y \leq y_\mathrm{max}$ are supported.** +The results presented in the paper were generated with the solver Altro. Please note that this implementation has several limitations: only cost functions of the form $J_H=\sum\nolimits_{t=0}^H \frac{1}{2} u_t R u_t$, measurement functions of the form $y=x_{1:n_y}$, and output constraints of the form $y_\mathrm{min} \leq y \leq y_\mathrm{max}$ are supported. -Besides the Julia implementation, there is also a MATLAB implementation, which is more general and allows arbitrary cost functions $J_H(u_{1:H},x_{1:H},y_{1:H})$, measurement functions $y=g(x,u)$, and constraints $h(u_{1:H},x_{1:H},y_{1:H})$. Further information can be found [here](https://github.com/TUM-ITR/PGopt/tree/main/MATLAB). +The solver IPOPT is more general than Altro, and this implementation allows arbitrary cost functions $J_H(u_{1:H},x_{1:H},y_{1:H})$, measurement functions $y=g(x,u)$, and constraints $h(u_{1:H},x_{1:H},y_{1:H})$. However, using IPOPT together with the proprietary [HSL Linear Solvers for Julia](https://licences.stfc.ac.uk/product/libhsl) (`HSL_jll.jl`) is recommended. A license (free to academics) is required. + +Besides the Julia implementation, there is also a MATLAB implementation that utilizes [CasADi](https://web.casadi.org/) and [IPOPT](https://coin-or.github.io/Ipopt/). Further information can be found [here](https://github.com/TUM-ITR/PGopt/tree/main/MATLAB). ## Installation `PGopt` can be installed using the Julia package manager. Start a Pkg REPL (press `]` in a Julia REPL), and install `PGopt` via ``` -pkg>add https://github.com/TUM-ITR/PGopt:Julia +pkg> add https://github.com/TUM-ITR/PGopt:Julia ``` Alternatively, to inspect the source code more easily, download the source code from [GitHub](https://github.com/TUM-ITR/PGopt). Navigate to the folder `PGopt/Julia`, start a Pkg REPL (press `]` in a Julia REPL), and install the dependencies via @@ -23,4 +25,4 @@ pkg>activate . pkg>instantiate ``` -You can then execute the scripts `autocorrelation.jl`, `PG_OCP_known_basis_functions.jl`, or `PG_generic_basis_functions.jl` in the folder `PGopt/Julia/examples`. \ No newline at end of file +You can then execute the examples in the folder `PGopt/Julia/examples`. \ No newline at end of file diff --git a/Julia/docs/src/list_of_functions.md b/Julia/docs/src/list_of_functions.md index ccd616a..9015fe1 100644 --- a/Julia/docs/src/list_of_functions.md +++ b/Julia/docs/src/list_of_functions.md @@ -2,12 +2,14 @@ ```@docs particle_Gibbs(u_training, y_training, K, K_b, k_d, N, phi::Function, Lambda_Q, ell_Q, Q_init, V, A_init, x_init_dist, g, R; x_prim=nothing) -solve_PG_OCP(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, active_constraints=nothing, opts=nothing, print_progress=true) -solve_PG_OCP_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, opts=nothing, print_progress=true) +solve_PG_OCP_Altro(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, active_constraints=nothing, opts=nothing, print_progress=true) +solve_PG_OCP_Altro_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, opts=nothing, print_progress=true) +solve_PG_OCP_Ipopt(PG_samples::Vector{PG_sample}, phi::Function, g::Function, R, H, J, h_scenario, h_u; J_u=false, x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, solver_opts=nothing, print_progress=true) +solve_PG_OCP_Ipopt_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, g::Function, R, H, J, h_scenario, h_u, β; J_u=false, x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, solver_opts=nothing, print_progress=true) test_prediction(PG_samples::Vector{PG_sample}, phi::Function, g, R, k_n, u_test, y_test) plot_predictions(y_pred, y_test; plot_percentiles=false, y_min=nothing, y_max=nothing) plot_autocorrelation(PG_samples::Vector{PG_sample}; max_lag=0) epsilon(s::Int64, K::Int64, β::Float64) MNIW_sample(Phi, Psi, Sigma, V, Lambda_Q, ell_Q, T) systematic_resampling(W, N) -``` \ No newline at end of file +``` diff --git a/Julia/examples/PG_OCP_generic_basis_functions.jl b/Julia/examples/PG_OCP_generic_basis_functions_Altro.jl similarity index 98% rename from Julia/examples/PG_OCP_generic_basis_functions.jl rename to Julia/examples/PG_OCP_generic_basis_functions_Altro.jl index e58eb37..f5b8e59 100644 --- a/Julia/examples/PG_OCP_generic_basis_functions.jl +++ b/Julia/examples/PG_OCP_generic_basis_functions_Altro.jl @@ -7,7 +7,6 @@ using PGopt using LinearAlgebra using Random using Distributions -using Printf using Plots using Altro @@ -260,7 +259,7 @@ opts.static_bp = false opts.square_root = true # Solve PG OCP. -x_opt, u_opt, y_opt, J_opt, penalty_max = solve_PG_OCP(PG_samples, phi_opt, R, H, u_min, u_max, y_min, y_max, R_cost_diag; K_pre_solve=20, opts=opts)[[1, 2, 3, 4, 8]] +u_opt, x_opt, y_opt, J_opt, penalty_max = solve_PG_OCP_Altro(PG_samples, phi_opt, R, H, u_min, u_max, y_min, y_max, R_cost_diag; K_pre_solve=20, opts=opts)[[1, 2, 3, 4, 8]] # Apply input trajectory to the actual system. y_sys = Array{Float64}(undef, n_y, H) diff --git a/Julia/examples/PG_OCP_generic_basis_functions_Ipopt.jl b/Julia/examples/PG_OCP_generic_basis_functions_Ipopt.jl new file mode 100644 index 0000000..6ac7447 --- /dev/null +++ b/Julia/examples/PG_OCP_generic_basis_functions_Ipopt.jl @@ -0,0 +1,317 @@ +# This code produces results for the optimal control approach with generic basis functions similar to the ones given in Section V-B (Fig. 3) of the paper +# "Learning-Based Optimal Control with Performance Guarantees for Unknown Systems with Latent States", available as pre-print on arXiv: https://arxiv.org/abs/2303.17963. +# The solver Ipopt is used to solve the optimal control problem. +# Since, for the results in the paper, the solver Altro was used to solve the optimal control problem, the results are not exactly reproduced. + +using PGopt +using LinearAlgebra +using Random +using Distributions +using Plots +using JuMP +import HSL_jll + +# Specify seed (for reproducible results). +Random.seed!(82) + +# Time sampling algorithm. +sampling_timer = time() + +# Learning parameters +K = 100 # number of PG samples +k_d = 50 # number of samples to be skipped to decrease correlation (thinning) +K_b = 1000 # length of burn-in period +N = 30 # number of particles of the particle filter + +# Number of states, etc. +n_x = 2 # number of states +n_u = 1 # number of control inputs +n_y = 1 # number of outputs +n_z = n_x + n_u # number of augmented states + +# Generate generic basis functions and priors based on a reduced-rank GP approximation. +# See the paper +# A. Svensson and T. B. Schön, "A flexible state–space model for learning nonlinear dynamical systems," Automatica, vol. 80, pp. 189–199, 2017. +# and the code provided in the supplementary material for further explanations. The equation numbers given in the following refer to this paper. +n_phi_x = [5 5] # number of basis functions for each state +n_phi_u = 5 # number of basis functions for the control input +n_phi_dims = [n_phi_u n_phi_x] # array containing the number of basis functions for each input dimension +n_phi = prod(n_phi_dims) # total number of basis functions +l_x = 20 +L_x = [l_x l_x] # interval lengths for x +L_u = 10 # interval length for u +L = zeros(1, 1, n_z) # array containing the interval lengths +L[1, 1, :] = [L_u L_x] + +# Hyperparameters of the squared exponential kernel +l = [2] # length scale +sf = 100 # scale factor + +# Initialize. +j_vec = zeros(n_phi, 1, n_z) # contains all possible vectors j; j_vec[i, 1, :] corresponds to the vector j in eq. (5) for basis function i +lambda = zeros(n_phi, n_z) # lambda[i, :] corresponds to the vector λ in eq. (9) (right-hand side) for basis function i + +# In the following, all possible vectors j are constructed (i.e., j_vec). The possible combinations correspond to the Cartesian product [1 : n_basis[1]] x ... x [1 : n_basis[end]]. +cart_prod_sets = Array{Any}(undef, n_z) # array of arrays; cart_prod_sets[i] corresponds to the i-th set to be considered for the Cartesian product, i.e., [1 : n_basis[i]]. +for i = 1:n_z + cart_prod_sets[i] = Array(1:n_phi_dims[i]) +end + +subscript_values = Array{Int64}(undef, n_z) # contains the equivalent subscript values corresponding to a given single index i +variants = [1; cumprod(n_phi_dims[1:end-1])] # required to convert the single index i to the equivalent subscript value + +# Construct Cartesian product and calculate spectral densities. +for i in 1:n_phi + # Convert the single index i to the equivalent subscript values. + remaining = i - 1 + for j in n_z:-1:1 + subscript_values[j] = floor(remaining / variants[j]) + 1 + remaining = mod(remaining, variants[j]) + end + + # Fill j_vec with the values belonging to the respective subscript indices. + for j in 1:n_z + j_vec[i, 1, j] = cart_prod_sets[j][subscript_values[j]] + end + + # Calculate the eigenvalue of the Laplace operator corresponding to the vector j_vec[i, 1, :] - see eq. (9) (right-hand side). + lambda[i, :] = (pi .* j_vec[i, 1, :] ./ (2 * dropdims(L, dims=tuple(findall(size(L) .== 1)...)))) .^ 2 +end + +# Reverse j_vec. +j_vec = reverse(j_vec, dims=3) + +# Define basis functions phi. +# This function evaluates Φ_{1 : n_x+n_u} according to eq. (5). +# Precompute. +L_sqrt_inv = 1 ./ sqrt.(L) +pi_j_over_2L = pi .* j_vec ./ (2 .* L) +function phi_sampling(x, u) + # Initialize. + z = vcat(u, x) # augmented state + phi = ones(n_phi, size(z, 2)) + + for k in axes(z, 1) + phi .= phi .* (L_sqrt_inv[:, :, k] .* sin.(pi_j_over_2L[:, :, k] * (z[k, :] .+ L[:, :, k])')) + end + + return phi +end + +# Prior for Q - inverse Wishart distribution +ell_Q = 10 # degrees of freedom +Lambda_Q = 100 * I(n_x) # scale matrix + +# Prior for A - matrix normal distribution (mean matrix = 0, right covariance matrix = Q (see above), left covariance matrix = V) +# V is derived from the GP approximation according to eq. (8b), (11a), and (9). +V_diagonal = Array{Float64}(undef, size(lambda, 1)) # diagonal of V +for i in axes(lambda, 1) + V_diagonal[i] = sf^2 * sqrt(opnorm(2 * pi * Diagonal(repeat(l, trunc(Int, n_z / size(l, 1))) .^ 2))) * exp.(-(pi^2 * transpose(sqrt.(lambda[i, :])) * Diagonal(repeat(l, trunc(Int, n_z / size(l, 1))) .^ 2) * sqrt.(lambda[i, :])) / 2) +end +V = Diagonal(V_diagonal) + +# Initial guess for model parameters +Q_init = Lambda_Q # initial Q +A_init = zeros(n_x, n_phi) # initial A + +# Normally distributed initial state +x_init_mean = [2, 2] # mean +x_init_var = 1 * I # variance +x_init_dist = MvNormal(x_init_mean, x_init_var) + +# Define measurement model - assumed to be known (without loss of generality). +# Make sure that g(x, u) is defined in vectorized form, i.e., g(zeros(n_x, N), zeros(n_u, N)) should return a matrix of dimension (n_y, N). +g(x, u) = [1 0] * x # observation function +R = 0.1 # variance of zero-mean Gaussian measurement noise + +# Parameters for data generation +T = 2000 # number of steps for training +T_test = 500 # number of steps for testing +T_all = T + T_test + +# Generate training data. +# Choose the actual system (to be learned) and generate input-output data of length T_all. +# The system is of the form +# x_t+1 = f_true(x_t, u_t) + N(0, Q_true) +# y_t = g_true(x_t, u_t) + N(0, R_true). + +# Unknown system +f_true(x, u) = [0.8 * x[1, :] - 0.5 * x[2, :] + 0.1 * cos.(3 * x[1, :]) .* x[2, :]; 0.4 * x[1, :] + 0.5 * x[2, :] + (ones(size(x, 2)) + 0.3 * sin.(2 * x[2, :])) .* u[1, :]] # true state transition function +Q_true = [0.03 -0.004; -0.004 0.01] # true process noise variance +mvn_v_true = MvNormal(zeros(n_x), Q_true) # true process noise distribution +g_true = g # true measurement function +R_true = R # true measurement noise variance +mvn_e_true = MvNormal(zeros(n_y), R_true) # true measurement noise distribution + +# Input trajectory used to generate training and test data +mvn_u_training = Normal(0, 3) # training input distribution +u_training = rand(mvn_u_training, (1, T)) # training inputs +u_test = 3 * sin.(2 * pi * (1 / T_test) * (Array(1:T_test)' .- 1)) # test inputs +u = reshape([u_training u_test], (n_u, T_all)) # training + test inputs + +# Generate data by forward simulation. +x = Array{Float64}(undef, n_x, T_all + 1) # true latent state trajectory +x[:, 1] = rand(x_init_dist, 1) # random initial state +y = Array{Float64}(undef, n_y, T_all) # output trajectory (measured) +for t in 1:T_all + x[:, t+1] = f_true(x[:, t], u[:, t]) + rand(mvn_v_true, 1) + y[:, t] = g_true(x[:, t], u[:, t]) + rand(mvn_e_true, 1) +end + +# Split data into training and test data. +u_training = u[:, 1:T] +x_training = x[:, 1:T+1] +y_training = y[:, 1:T] + +u_test = u[:, T+1:end] +x_test = x[:, T+1:end] +y_test = y[:, T+1:end] + +# Plot data. +# plot(Array(1:T_all), u[1,:], label="input", lw=2, legend=:topright); +# plot!(Array(1:T_all), y[1,:], label="output", lw=2); +# xlabel!("t"); +# ylabel!("u | y"); + +# Learn models. +# Result: K models of the type +# x_t+1 = PG_samples[i].A*phi(x_t, u_t) + N(0, PG_samples[i].Q), +# where phi are the basis functions defined above. +PG_samples = particle_Gibbs(u_training, y_training, K, K_b, k_d, N, phi_sampling, Lambda_Q, ell_Q, Lambda_Q, V, A_init, x_init_dist, g, R) + +time_sampling = time() - sampling_timer + +# Test the models with the test data by simulating it forward in time. +# test_prediction(PG_samples, phi, g, R, 10, u_test, y_test) + +# Plot autocorrelation. +# plot_autocorrelation(PG_samples; max_lag=K-1) + +# Set up OCP. +# Horizon +H = 41 + +# Define constraints for u. +u_max = repeat([5], 1, H) # max control input +u_min = repeat([-5], 1, H) # min control input +n_input_const = sum(isfinite.(u_min)) + sum(isfinite.(u_max)) + +function bounded_input(u::Array{VariableRef}) + # Initialize constraint vector. + h_u = Array{AffExpr}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +function bounded_input(u::Array{<:Number}) + # Initialize constraint vector. + h_u = Array{Float64}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +# Define constraints for y. +y_max = reshape(fill(Inf, H), (1, H)) # max system output +y_min = reshape([-fill(Inf, 20); 2 * ones(6); -fill(Inf, 15)], (1, H)) # min system output +n_output_const = sum(isfinite.(y_min)) + sum(isfinite.(y_max)) + +function bounded_output(u::Array{VariableRef}, x::Array{VariableRef}, y::Array{VariableRef}) + # Initialize constraint vector. + h_scenario = Array{AffExpr}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end + +function bounded_output(u::Array{<:Number}, x::Array{<:Number}, y::Array{<:Number}) + # Initialize constraint vector. + h_scenario = Array{Float64}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end + +# Define cost function. +# Objective: min ∑_{∀t} u_t^2. +function cost_function(u) + cost = sum(u .^ 2) + return cost +end + +# Ipopt options +Ipopt_options = Dict("max_iter" => 10000, "tol" => 1e-8, "hsllib" => HSL_jll.libhsl_path, "linear_solver" => "ma57") + +# Confidence parameter for the theoretical guarantees +β = 0.01 + +# Solve the PG OCP. +u_opt, x_opt, y_opt, J_opt, solve_successful, iterations, mu = solve_PG_OCP_Ipopt(PG_samples, phi, g, R, H, cost_function, bounded_output, bounded_input; J_u=true, K_pre_solve=20, solver_opts=Ipopt_options) + +# Apply input trajectory to the actual system. +y_sys = Array{Float64}(undef, n_y, H) +x_sys = Array{Float64}(undef, n_x, H) +x_sys[:, 1] = x_training[:, end] +u_sys = [u_opt 0] +for t in 1:H + if t >= 2 + x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) + end + y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) +end + +# Plot predictions. +plot_predictions(y_opt, y_sys; plot_percentiles=false, y_min=y_min, y_max=y_max) \ No newline at end of file diff --git a/Julia/examples/PG_OCP_known_basis_functions.jl b/Julia/examples/PG_OCP_known_basis_functions_Altro.jl similarity index 94% rename from Julia/examples/PG_OCP_known_basis_functions.jl rename to Julia/examples/PG_OCP_known_basis_functions_Altro.jl index 5788c83..a04f644 100644 --- a/Julia/examples/PG_OCP_known_basis_functions.jl +++ b/Julia/examples/PG_OCP_known_basis_functions_Altro.jl @@ -7,7 +7,6 @@ using PGopt using LinearAlgebra using Random using Distributions -using Printf using Plots using Altro @@ -136,10 +135,10 @@ R_cost_diag = [2] # diagonal of R_cost β = 0.01 # Solve the PG OCP. -# x_opt, u_opt, y_opt, J_opt, run_status, iter, iterations_outer, penalty_max = solve_PG_OCP(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; K_pre_solve=20) +# u_opt, x_opt, y_opt, J_opt, run_status, iter, iterations_outer, penalty_max = solve_PG_OCP_Altro(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; K_pre_solve=20) # Solve the PG OCP and determine complexity s and max constraint violation probability via greedy algorithm. -x_opt, u_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations = solve_PG_OCP_greedy_guarantees(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; K_pre_solve=20) +u_opt, x_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations = solve_PG_OCP_Altro_greedy_guarantees(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; K_pre_solve=20) # Apply input trajectory to the actual system. y_sys = Array{Float64}(undef, n_y, H) diff --git a/Julia/examples/PG_OCP_known_basis_functions_Ipopt.jl b/Julia/examples/PG_OCP_known_basis_functions_Ipopt.jl new file mode 100644 index 0000000..3846301 --- /dev/null +++ b/Julia/examples/PG_OCP_known_basis_functions_Ipopt.jl @@ -0,0 +1,251 @@ +# This code produces results for the optimal control approach with known basis functions similar to the ones given in Section V-B (Fig. 2) of the paper +# "Learning-Based Optimal Control with Performance Guarantees for Unknown Systems with Latent States", available as pre-print on arXiv: https://arxiv.org/abs/2303.17963. +# The solver Ipopt is used to solve the optimal control problem. +# Since, for the results in the paper, the solver Altro was used to solve the optimal control problem, the results are not exactly reproduced. + +using PGopt +using LinearAlgebra +using Random +using Distributions +using Plots +using JuMP +import HSL_jll + +# Specify seed (for reproducible results). +Random.seed!(82) + +# Time PGS algorithm. +sampling_timer = time() + +# Learning parameters +K = 200 # number of PG samples +k_d = 50 # number of samples to be skipped to decrease correlation (thinning) +K_b = 1000 # length of burn-in period +N = 30 # number of particles of the particle filter + +# Number of states, etc. +n_x = 2 # number of states +n_u = 1 # number of control inputs +n_y = 1 # number of outputs + +# Define basis functions - assumed to be known in this example. +# Make sure that phi(x, u) is defined in vectorized form, i.e., phi(zeros(n_x, N), zeros(n_u, N)) should return a matrix of dimension (n_phi, N). +# Scaling the basis functions facilitates the exploration of the posterior distribution and reduces the required thinning parameter k_d. +n_phi = 5 # number of basis functions +phi(x, u) = [0.1 * x[1, :] 0.1 * x[2, :] u[1, :] 0.01 * cos.(3 * x[1, :]) .* x[2, :] 0.1 * sin.(2 * x[2, :]) .* u[1, :]]' # basis functions + +# Prior for Q - inverse Wishart distribution +ell_Q = 10 # degrees of freedom +Lambda_Q = 100 * I(n_x) # scale matrix + +# Prior for A - matrix normal distribution (mean matrix = 0, right covariance matrix = Q (see above), left covariance matrix = V) +V = Diagonal(10 * ones(n_phi)) # left covariance matrix + +# Initial guess for model parameters +Q_init = Lambda_Q # initial Q +A_init = zeros(n_x, n_phi) # initial A + +# Normally distributed initial state +x_init_mean = [2, 2] # mean +x_init_var = 1 * I # variance +x_init_dist = MvNormal(x_init_mean, x_init_var) + +# Define measurement model - assumed to be known (without loss of generality). +# Make sure that g(x, u) is defined in vectorized form, i.e., g(zeros(n_x, N), zeros(n_u, N)) should return a matrix of dimension (n_y, N). +g(x, u) = [1 0] * x # observation function +R = 0.1 # variance of zero-mean Gaussian measurement noise + +# Parameters for data generation +T = 2000 # number of steps for training +T_test = 500 # number of steps used for testing (via forward simulation - see below) +T_all = T + T_test + +# Generate training data. +# Choose the actual system (to be learned) and generate input-output data of length T_all. +# The system is of the form +# x_t+1 = f_true(x_t, u_t) + N(0, Q_true), +# y_t = g_true(x_t, u_t) + N(0, R_true). + +# Unknown system +f_true(x, u) = [0.8 * x[1, :] - 0.5 * x[2, :] + 0.1 * cos.(3 * x[1, :]) .* x[2, :]; 0.4 * x[1, :] + 0.5 * x[2, :] + (ones(size(x, 2)) + 0.3 * sin.(2 * x[2, :])) .* u[1, :]] # true state transition function +Q_true = [0.03 -0.004; -0.004 0.01] # true process noise variance +mvn_v_true = MvNormal(zeros(n_x), Q_true) # true process noise distribution +g_true = g # true measurement function +R_true = R # true measurement noise variance +mvn_e_true = MvNormal(zeros(n_y), R_true) # true measurement noise distribution + +# Input trajectory used to generate training and test data +mvn_u_training = Normal(0, 3) # training input distribution +u_training = rand(mvn_u_training, (1, T)) # training inputs +u_test = 3 * sin.(2 * pi * (1 / T_test) * (Array(1:T_test)' .- 1)) # test inputs +u = reshape([u_training u_test], (n_u, T_all)) # training + test inputs + +# Generate data by forward simulation. +x = Array{Float64}(undef, n_x, T_all + 1) # true latent state trajectory +x[:, 1] = rand(x_init_dist, 1) # random initial state +y = Array{Float64}(undef, n_y, T_all) # output trajectory (measured) +for t in 1:T_all + x[:, t+1] = f_true(x[:, t], u[:, t]) + rand(mvn_v_true, 1) + y[:, t] = g_true(x[:, t], u[:, t]) + rand(mvn_e_true, 1) +end + +# Split data into training and test data. +u_training = u[:, 1:T] +x_training = x[:, 1:T+1] +y_training = y[:, 1:T] + +u_test = u[:, T+1:end] +x_test = x[:, T+1:end] +y_test = y[:, T+1:end] + +# Plot data. +# plot(Array(1:T_all), u[1,:], label="input", lw=2, legend=:topright); +# plot!(Array(1:T_all), y[1,:], label="output", lw=2); +# xlabel!("t"); +# ylabel!("u | y"); + +# Learn models. +# Result: K models of the type +# x_t+1 = PG_samples[i].A*phi(x_t, u_t) + N(0, PG_samples[i].Q), +# where phi are the basis functions defined above. +PG_samples = particle_Gibbs(u_training, y_training, K, K_b, k_d, N, phi, Lambda_Q, ell_Q, Q_init, V, A_init, x_init_dist, g, R) + +time_sampling = time() - sampling_timer + +# Test the models with the test data by simulating it forward in time. +# test_prediction(PG_samples, phi, g, R, 10, u_test, y_test) + +# Plot autocorrelation. +# plot_autocorrelation(PG_samples; max_lag=K-1) + +# Set up OCP. +# Horizon +H = 41 + +# Define constraints for u. +u_max = repeat([5], 1, H) # max control input +u_min = repeat([-5], 1, H) # min control input +n_input_const = sum(isfinite.(u_min)) + sum(isfinite.(u_max)) + +function bounded_input(u::Array{VariableRef}) + # Initialize constraint vector. + h_u = Array{AffExpr}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +function bounded_input(u::Array{<:Number}) + # Initialize constraint vector. + h_u = Array{Float64}(undef, n_input_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_u + if isfinite(u_min[n, t]) + h_u[i] = u_min[n, t] - u[n, t] + i += 1 + end + if isfinite(u_max[n, t]) + h_u[i] = u[n, t] - u_max[n, t] + i += 1 + end + end + end + return h_u +end + +# Define constraints for y. +y_max = reshape(fill(Inf, H), (1, H)) # max system output +y_min = reshape([-fill(Inf, 20); 2 * ones(6); -fill(Inf, 15)], (1, H)) # min system output +n_output_const = sum(isfinite.(y_min)) + sum(isfinite.(y_max)) + +function bounded_output(u::Array{VariableRef}, x::Array{VariableRef}, y::Array{VariableRef}) + # Initialize constraint vector. + h_scenario = Array{AffExpr}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end + +function bounded_output(u::Array{<:Number}, x::Array{<:Number}, y::Array{<:Number}) + # Initialize constraint vector. + h_scenario = Array{Float64}(undef, n_output_const) + + # Construct constraint vector - constraints are only considered if they are finite. + i = 1 + for t in 1:H + for n in 1:n_y + if isfinite(y_min[n, t]) + h_scenario[i] = y_min[n, t] - y[n, t] + i += 1 + end + if isfinite(y_max[n, t]) + h_scenario[i] = y[n, t] - y_max[n, t] + i += 1 + end + end + end + return h_scenario +end + +# Define cost function. +# Objective: min ∑_{∀t} u_t^2. +function cost_function(u) + cost = sum(u .^ 2) + return cost +end + +# Ipopt options +Ipopt_options = Dict("max_iter" => 10000, "tol" => 1e-8, "hsllib" => HSL_jll.libhsl_path, "linear_solver" => "ma57") + +# Confidence parameter for the theoretical guarantees +β = 0.01 + +# Solve the PG OCP. +# u_opt, x_opt, y_opt, J_opt, solve_successful, iterations, mu = solve_PG_OCP_Ipopt(PG_samples, phi, g, R, H, cost_function, bounded_output, bounded_input; J_u=true, K_pre_solve=20, solver_opts=Ipopt_options) + +# Solve the PG OCP and determine complexity s and max constraint violation probability via greedy algorithm. +u_opt, x_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations = solve_PG_OCP_Ipopt_greedy_guarantees(PG_samples, phi, g, R, H, cost_function, bounded_output, bounded_input, β; J_u=true, K_pre_solve=5, solver_opts=copy(Ipopt_options)) + +# Apply input trajectory to the actual system. +y_sys = Array{Float64}(undef, n_y, H) +x_sys = Array{Float64}(undef, n_x, H) +x_sys[:, 1] = x_training[:, end] +u_sys = [u_opt 0] +for t in 1:H + if t >= 2 + x_sys[:, t] = f_true(x_sys[:, t-1], u_sys[:, t-1]) + rand(mvn_v_true, 1) + end + y_sys[:, t] = g_true(x_sys[:, t], u_sys[:, t]) + rand(mvn_e_true, 1) +end + +# Plot predictions. +plot_predictions(y_opt, y_sys; plot_percentiles=false, y_min=y_min, y_max=y_max) \ No newline at end of file diff --git a/Julia/src/PGopt.jl b/Julia/src/PGopt.jl index a9328e8..c8b5180 100644 --- a/Julia/src/PGopt.jl +++ b/Julia/src/PGopt.jl @@ -13,8 +13,11 @@ using RobotDynamics using ForwardDiff using FiniteDiff using SpecialFunctions +using JuMP +using Ipopt -export PG_sample, particle_Gibbs, systematic_resampling, MNIW_sample, test_prediction, plot_predictions, plot_autocorrelation, solve_PG_OCP, solve_PG_OCP_greedy_guarantees, epsilon + +export PG_sample, particle_Gibbs, systematic_resampling, MNIW_sample, test_prediction, plot_predictions, plot_autocorrelation, epsilon, solve_PG_OCP_Altro, solve_PG_OCP_Altro_greedy_guarantees, solve_PG_OCP_Ipopt, solve_PG_OCP_Ipopt_greedy_guarantees # Struct for the samples of the PGS algorithm mutable struct PG_sample @@ -27,4 +30,5 @@ end include("particle_Gibbs.jl") include("optimal_control_Altro.jl") +include("optimal_control_Ipopt.jl") end \ No newline at end of file diff --git a/Julia/src/optimal_control_Altro.jl b/Julia/src/optimal_control_Altro.jl index 5fe28cc..e966157 100644 --- a/Julia/src/optimal_control_Altro.jl +++ b/Julia/src/optimal_control_Altro.jl @@ -1,3 +1,51 @@ +""" + epsilon(s::Int64, K::Int64, β::Float64) + +Determine the parameter ``\\epsilon``. ``1-\\epsilon`` corresponds to a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory ``u_{0:H}`` is applied to the unknown system. +``\\epsilon`` is the unique solution over the interval ``(0,1)`` of the polynomial equation in the ``v`` variable: + +``\\binom{K}{s}(1-v)^{K-s}-\\frac{\\beta}{K}\\sum_{m=s}^{K-1}\\binom{m}{s}(1-v)^{m-s}=0``. + +# Arguments +- `s`: cardinality of the support sub-sample +- `K`: number of scenarios +- `β`: confidence parameter + +This function is based on the paper + + S. Garatti and M. C. Campi, “Risk and complexity in scenario optimization,” Mathematical Programming, vol. 191, no. 1, pp. 243–279, 2022. + +and the code provided in the appendix. +""" +function epsilon(s::Int64, K::Int64, β::Float64) + alphaU = beta_inc_inv(K - s + 1, s, β)[2] + m1 = Array(s:K)' + aux1 = sum(triu(log.(ones(K - s + 1) * m1), 1), dims=2) + aux2 = sum(triu(log.(ones(K - s + 1) * (m1 .- s)), 1), dims=2) + coeffs1 = aux2 - aux1 + m2 = Array(K+1:4*K)' + aux3 = sum(tril(log.(ones(3 * K) .* m2)), dims=2) + aux4 = sum(tril(log.(ones(3 * K) .* (m2 .- s))), dims=2) + coeffs2 = aux3 - aux4 + t1 = 0 + t2 = 1 - alphaU + poly1 = 1 + β / (2 * K) - β / (2 * K) * sum(exp.(coeffs1 .- (K .- m1') * log(t1))) - β / (6 * K) * sum(exp.(coeffs2 .+ (m2' .- K) * log(t1))) + poly2 = 1 + β / (2 * K) - β / (2 * K) * sum(exp.(coeffs1 .- (K .- m1') * log(t2))) - β / (6 * K) * sum(exp.(coeffs2 .+ (m2' .- K) * log(t2))) + if !(poly1 * poly2 > 0) + while t2 - t1 > 1e-10 + t = (t1 + t2) / 2 + polyt = 1 + β / (2 * K) - β / (2 * K) * sum(exp.(coeffs1 .- (K .- m1') * log(t))) - β / (6 * K) * sum(exp.(coeffs2 .+ (m2' .- K) * log(t))) + if polyt > 0 + t2 = t + else + t1 = t + end + end + ϵ = t1 + end + return ϵ +end + # Define RobotDynamics object - see RobotDynamics.jl for explanations. # The PG samples are combined into a single model with state x_vec containing the K*n_x states of the individual models. RobotDynamics.@autodiff struct PGS_model_obj <: RobotDynamics.DiscreteDynamics @@ -59,9 +107,9 @@ function RobotDynamics.discrete_dynamics!(PGS_model::PGS_model_obj, x_vec_n, x_v end """ - solve_PG_OCP(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, active_constraints=nothing, opts=nothing, print_progress=true) + solve_PG_OCP_Altro(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, active_constraints=nothing, opts=nothing, print_progress=true) -Solve the optimal control problem of the following form: +Solve the optimal control problem of the following form using Altro.jl: ``\\min \\sum_{t=0}^{H} \\frac{1}{2} u_t \\operatorname{diag}(R_{\\mathrm{cost}}) u_t`` @@ -69,10 +117,10 @@ subject to: ```math \\begin{aligned} \\forall k, \\forall t \\\\ -x_{t+1}^{[k]} &= f_{\\theta^{[k]}}(x_t^{[k]}, u_t) + v_t^{[k]} \\\\ -x_{t, 1:n_y}^{[k]} &\\geq y_{\\mathrm{min},\\ t} - e_t^{[k]} \\\\ -x_{t, 1:n_y}^{[k]} &\\leq y_{\\mathrm{max},\\ t} - e_t^{[k]} \\\\ -u_t &\\geq u_{\\mathrm{min},\\ t} \\\\ +x_{t+1}^{[k]} &= f_{\\theta^{[k]}}(x_t^{[k]}, u_t) + v_t^{[k]}, \\\\ +x_{t, 1:n_y}^{[k]} &\\geq y_{\\mathrm{min},\\ t} - e_t^{[k]}, \\\\ +x_{t, 1:n_y}^{[k]} &\\leq y_{\\mathrm{max},\\ t} - e_t^{[k]}, \\\\ +u_t &\\geq u_{\\mathrm{min},\\ t}, \\\\ u_t &\\leq u_{\\mathrm{max},\\ t}. \\end{aligned} ``` @@ -99,7 +147,7 @@ Further note that the states of the individual models (``x^{[1:K]}``) are combin - `opts`: SolverOptions struct containing options of the solver - `print_progress`: if set to true, the progress is printed """ -function solve_PG_OCP(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, active_constraints=nothing, opts=nothing, print_progress=true) +function solve_PG_OCP_Altro(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, active_constraints=nothing, opts=nothing, print_progress=true) # Time optimization. optimization_timer = time() @@ -171,7 +219,7 @@ function solve_PG_OCP(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, end # Solve problem that only contains K_pre_solve randomly selected scenarios. - u_init, max_penalty = solve_PG_OCP(PG_samples[k_pre_solve], phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0_pre_solve, v_vec=v_vec[:, :, k_pre_solve], e_vec=e_vec[:, :, k_pre_solve], K_pre_solve=0, opts=opts, print_progress=print_progress)[[2, 8]] + u_init, max_penalty = solve_PG_OCP_Altro(PG_samples[k_pre_solve], phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0_pre_solve, v_vec=v_vec[:, :, k_pre_solve], e_vec=e_vec[:, :, k_pre_solve], K_pre_solve=0, opts=opts, print_progress=print_progress)[[1, 8]] # Increase penalty - this can further decrease the runtime but might be unstable. # opts.penalty_initial = 10*opts.penalty_initial @@ -295,8 +343,8 @@ function solve_PG_OCP(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, end # Extract the solution and convert to matrices - copy is used to avoid problems with the garbage collection. - x_opt = copy(permutedims(reshape(hcat(Vector.(states(solver))...), (n_x, K, H)), (1, 3, 2))) u_opt = copy(reshape(hcat(Vector.(controls(solver))...), (n_u, H - 1))) + x_opt = copy(permutedims(reshape(hcat(Vector.(states(solver))...), (n_x, K, H)), (1, 3, 2))) J_opt = copy(cost(solver)) # Get the output via the measurement function y_t^k = x_t^k[1 : n_y] + e_t^k. @@ -307,61 +355,13 @@ function solve_PG_OCP(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, end end - return x_opt, u_opt, y_opt, J_opt, copy(Integer(status(solver))), copy(solver.stats.iterations), copy(solver.stats.iterations_outer), copy(solver.stats.penalty_max) -end - -""" - epsilon(s::Int64, K::Int64, β::Float64) - -Determine the parameter ``\\epsilon``. ``1-\\epsilon`` corresponds to a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory ``u_{0:H}`` is applied to the unknown system. -``\\epsilon`` is the unique solution over the interval ``(0,1)`` of the polynomial equation in the ``v`` variable: - -``\\binom{K}{s}(1-v)^{K-s}-\\frac{\\beta}{K}\\sum_{m=s}^{K-1}\\binom{m}{s}(1-v)^{m-s}=0``. - -# Arguments -- `s`: cardinality of the support sub-sample -- `K`: number of scenarios -- `β`: confidence parameter - -This function is based on the paper - - S. Garatti and M. C. Campi, “Risk and complexity in scenario optimization,” Mathematical Programming, vol. 191, no. 1, pp. 243–279, 2022. - -and the code provided in the appendix. -""" -function epsilon(s::Int64, K::Int64, β::Float64) - alphaU = beta_inc_inv(K - s + 1, s, β)[2] - m1 = Array(s:K)' - aux1 = sum(triu(log.(ones(K - s + 1) * m1), 1), dims=2) - aux2 = sum(triu(log.(ones(K - s + 1) * (m1 .- s)), 1), dims=2) - coeffs1 = aux2 - aux1 - m2 = Array(K+1:4*K)' - aux3 = sum(tril(log.(ones(3 * K) .* m2)), dims=2) - aux4 = sum(tril(log.(ones(3 * K) .* (m2 .- s))), dims=2) - coeffs2 = aux3 - aux4 - t1 = 0 - t2 = 1 - alphaU - poly1 = 1 + β / (2 * K) - β / (2 * K) * sum(exp.(coeffs1 .- (K .- m1') * log(t1))) - β / (6 * K) * sum(exp.(coeffs2 .+ (m2' .- K) * log(t1))) - poly2 = 1 + β / (2 * K) - β / (2 * K) * sum(exp.(coeffs1 .- (K .- m1') * log(t2))) - β / (6 * K) * sum(exp.(coeffs2 .+ (m2' .- K) * log(t2))) - if !(poly1 * poly2 > 0) - while t2 - t1 > 1e-10 - t = (t1 + t2) / 2 - polyt = 1 + β / (2 * K) - β / (2 * K) * sum(exp.(coeffs1 .- (K .- m1') * log(t))) - β / (6 * K) * sum(exp.(coeffs2 .+ (m2' .- K) * log(t))) - if polyt > 0 - t2 = t - else - t1 = t - end - end - ϵ = t1 - end - return ϵ + return u_opt, x_opt, y_opt, J_opt, copy(Integer(status(solver))), copy(solver.stats.iterations), copy(solver.stats.iterations_outer), copy(solver.stats.penalty_max) end """ - solve_PG_OCP_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, opts=nothing, print_progress=true) + solve_PG_OCP_Altro_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, opts=nothing, print_progress=true) -Solve the following optimal control problem and determine a support sub-sample with cardinality ``s`` via a greedy constraint removal. +Solve the following optimal control problem using Altro.jl and determine a support sub-sample with cardinality ``s`` via a greedy constraint removal. Based on the cardinality ``s``, a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory ``u_{0:H}`` is applied to the unknown system is calculated (i.e., ``1-\\epsilon`` is determined). ``\\min \\sum_{t=0}^{H} \\frac{1}{2} u_t \\operatorname{diag}(R_{\\mathrm{cost}}) u_t`` @@ -370,10 +370,10 @@ subject to: ```math \\begin{aligned} \\forall k, \\forall t \\\\ -x_{t+1}^{[k]} &= f_{\\theta^{[k]}}(x_t^{[k]}, u_t) + v_t^{[k]} \\\\ -x_{t, 1:n_y}^{[k]} &\\geq y_{\\mathrm{min},\\ t} - e_t^{[k]} \\\\ -x_{t, 1:n_y}^{[k]} &\\leq y_{\\mathrm{max},\\ t} - e_t^{[k]} \\\\ -u_t &\\geq u_{\\mathrm{min},\\ t} \\\\ +x_{t+1}^{[k]} &= f_{\\theta^{[k]}}(x_t^{[k]}, u_t) + v_t^{[k]}, \\\\ +x_{t, 1:n_y}^{[k]} &\\geq y_{\\mathrm{min},\\ t} - e_t^{[k]}, \\\\ +x_{t, 1:n_y}^{[k]} &\\leq y_{\\mathrm{max},\\ t} - e_t^{[k]}, \\\\ +u_t &\\geq u_{\\mathrm{min},\\ t}, \\\\ u_t &\\leq u_{\\mathrm{max},\\ t}. \\end{aligned} ``` @@ -400,7 +400,7 @@ Further note that the states of the individual models (``x^{[1:K]}``) are combin - `opts`: SolverOptions struct containing options of the solver - `print_progress`: if set to true, the progress is printed """ -function solve_PG_OCP_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, opts=nothing, print_progress=true) +function solve_PG_OCP_Altro_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, R, H, u_min, u_max, y_min, y_max, R_cost_diag, β; x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, opts=nothing, print_progress=true) # Time first optimization. first_solve_timer = time() @@ -469,13 +469,13 @@ function solve_PG_OCP_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Func # Solve the OCP once with all constraints to get an initialization for all following optimizations. println("### Startet optimization of fully constrained problem") - u_init, max_penalty = solve_PG_OCP(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=K_pre_solve, opts=opts, print_progress=print_progress)[[2, 8]] + u_init, max_penalty = solve_PG_OCP_Altro(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=K_pre_solve, opts=opts, print_progress=print_progress)[[1, 8]] # The OCP is solved again with initialization to obtain the optimal input u_opt. opts.iterations = 1000 opts.penalty_initial = max_penalty[end] # increase initial penalty println("### Startet optimization of fully constrained problem with initialization") - x_opt, u_opt, y_opt, J_opt, termination_status, iterations, iterations_outer = solve_PG_OCP(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=0, opts=opts, print_progress=print_progress)[1:end-1] + u_opt, x_opt, y_opt, J_opt, termination_status, iterations, iterations_outer = solve_PG_OCP_Altro(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=0, opts=opts, print_progress=print_progress)[1:end-1] # Determine guarantees. # If a feasible u_opt is found, probabilistic constraint satisfaction guarantees are derived by greedily removing constraints to determine a support sub-sample S. @@ -518,7 +518,7 @@ function solve_PG_OCP_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Func # Catch errors that might occur during the optimization, e.g., out-of-memory errors. try # Solve the OCP with reduced constraint set. - u_opt_temp, termination_status_temp = solve_PG_OCP(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=0, active_constraints=temp_scenarios, opts=opts, print_progress=print_progress)[[2, 5]] + u_opt_temp, termination_status_temp = solve_PG_OCP_Altro(PG_samples, phi, R, H, u_min, u_max, y_min, y_max, R_cost_diag; x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=0, active_constraints=temp_scenarios, opts=opts, print_progress=print_progress)[[1, 5]] # If the optimization is successful and the solution does not change, permanently remove the constraints corresponding to the PG samples with index i from the constraint set. if ((termination_status_temp == 2) && (maximum(abs.(u_opt_temp - u_opt)) == 0)) @@ -552,5 +552,5 @@ function solve_PG_OCP_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Func epsilon_perc = NaN time_guarantees = NaN end - return x_opt, u_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations + return u_opt, x_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations end \ No newline at end of file diff --git a/Julia/src/optimal_control_Ipopt.jl b/Julia/src/optimal_control_Ipopt.jl new file mode 100644 index 0000000..9e89b50 --- /dev/null +++ b/Julia/src/optimal_control_Ipopt.jl @@ -0,0 +1,393 @@ +""" + solve_PG_OCP_Ipopt(PG_samples::Vector{PG_sample}, phi::Function, g::Function, R, H, J, h_scenario, h_u; J_u=false, x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, solver_opts=nothing, print_progress=true) + +Solve the optimal control problem of the following form using Ipopt: + +``\\min_{u_{0:H},\\; \\overline{J_H}} \\overline{J_H}`` + +subject to: +```math +\\begin{aligned} +\\forall k, \\forall t \\\\ +x_{t+1}^{[k]} &= f_{\\theta^{[k]}}(x_t^{[k]}, u_t) + v_t^{[k]}, \\\\ +y_{t}^{[k]} &= g_{\\theta^{[k]}}(x_t^{[k]}, u_t) + w_t^{[k]}, \\\\ +J_H^{[k]} &= J_H(u_{0:H}, x_{0:H}^{[k]}, y_{0:H}^{[k]}) \\leq \\overline{J_H}, \\\\ +h(u_{0:H},x_{0:H}^{[k]},y_{0:H}^{[k]}) \\leq 0. +\\end{aligned} +``` + +# Arguments +- `PG_samples`: PG samples +- `phi`: basis functions +- `g`: observation function +- `R`: variance of zero-mean Gaussian measurement noise - only used if e_vec is not passed +- `H`: horizon of the OCP +- `J`: function with input arguments (u_1:H, x_1:H, y_1:H) (or (u_1:H) if J_u is set true) that returns the cost to be minimized +- `h_scenario`: function with input arguments (u_1:H, x_1:H, y_1:H) that returns the constraint vector belonging to a scenario; a feasible solution must satisfy h_scenario <= 0 for all scenarios. +- `h_u`: function with input argument u_1:H that returns the constraint vector for u; a feasible solution satisfy yield h_u <= 0 +- `J_u`: set to true if cost depends only on inputs u - this accelerates the optimization +- `x_vec_0`: vector with K*n_x elements containing the initial state of all models - if not provided, the initial states are sampled based on the PGS samples +- `v_vec`: array of dimension n_x x H x K that contains the process noise for all models and all timesteps - if not provided, the noise is sampled based on the PGS samples +- `e_vec`: array of dimension n_y x H x K that contains the measurement noise for all models and all timesteps - if not provided, the noise is sampled based on the provided R +- `u_init`: initial guess for the optimal trajectory +- `K_pre_solve`: if K_pre_solve > 0, an initial guess for the optimal trajectory is obtained by solving the OCP with only K_pre_solve < K models +- `opts`: SolverOptions struct containing options of the solver +- `print_progress`: if set to true, the progress is printed +""" +function solve_PG_OCP_Ipopt(PG_samples::Vector{PG_sample}, phi::Function, g::Function, R, H, J, h_scenario, h_u; J_u=false, x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, solver_opts=nothing, print_progress=true) + # Time optimization. + optimization_timer = time() + + # Get number of states, etc. + K = size(PG_samples, 1) + n_u = size(PG_samples[1].u_m1, 1) + n_x = size(PG_samples[1].A, 1) + n_y = size(R, 1) + + # Sample initial state x_vec_0 if not provided. + if x_vec_0 === nothing + x_vec_0 = Array{Float64}(undef, n_x * K) + for k in 1:K + # Get model. + A = PG_samples[k].A + Q = PG_samples[k].Q + f(x, u) = A * phi(x, u) + mvn_v_model = MvNormal(zeros(n_x), Q) + + # Sample state at t=-1. + star = systematic_resampling(PG_samples[k].w_m1, 1) + x_m1 = PG_samples[k].x_m1[:, star] + + # Propagate. + x_vec_0[(k-1)*n_x+1:n_x*k] = f(x_m1, PG_samples[k].u_m1) + rand(mvn_v_model) + end + end + + # Sample process noise array v_vec if not provided. + if v_vec === nothing + v_vec = Array{Float64}(undef, n_x, H, K) + for k in 1:K + Q = PG_samples[k].Q + mvn_v_model = MvNormal(zeros(n_x), Q) + v_vec[:, :, k] = rand(mvn_v_model, H) + end + end + + # Sample measurement noise array e_vec if not provided. + if e_vec === nothing + e_vec = Array{Float64}(undef, n_y, H, K) + mvn_e = MvNormal(zeros(n_y), R) + for k in 1:K + e_vec[:, :, k] = rand(mvn_e, H) + end + end + + # Determine initialization. + # If K_pre_solve is provided, a problem that only considers K_pre_solve randomly selected scenarios is solved first to obtain an initialization for the problem with all K scenarios. + # With a good initialization the runtime of the optimization is reduced. + if u_init === nothing + if (0 < K_pre_solve) && (K_pre_solve < K) + # Sample the K_pre_solve scenarios that are considered for the initialization. + k_pre_solve = sample(1:K, K_pre_solve) + + # Get the initial states of the considered scenarios. + x_vec_0_pre_solve = Array{Float64}(undef, n_x * K_pre_solve) + for k in 1:K_pre_solve + x_vec_0_pre_solve[(k-1)*n_x+1:n_x*k] = x_vec_0[(k_pre_solve[k]-1)*n_x+1:n_x*k_pre_solve[k]] + end + + if print_progress + println("###### Startet pre-solving step") + end + + # Solve problem that only contains K_pre_solve randomly selected scenarios. + u_init = solve_PG_OCP_Ipopt(PG_samples[k_pre_solve], phi, g, R, H, J, h_scenario, h_u; J_u=J_u, x_vec_0=x_vec_0_pre_solve, v_vec=v_vec[:, :, k_pre_solve], e_vec=e_vec[:, :, k_pre_solve], K_pre_solve=0, solver_opts=solver_opts, print_progress=print_progress)[1] + + if print_progress + println("###### Pre-solving step complete, switching back to the original problem") + end + else + u_init = zeros(n_u, H) + end + end + + # Determine initial guess for X and Y. + X_init = Array{Float64}(undef, n_x * K, H + 1) # initial guess for X + X_init[:, 1] .= x_vec_0 + Y_init = Array{Float64}(undef, n_y * K, H) # initial guess for Y + for k in 1:K + A = PG_samples[k].A + f(x, u) = A * phi(x, u) + for t in 1:H + X_init[n_x*(k-1)+1:n_x*k, t+1] = f(X_init[n_x*(k-1)+1:n_x*k, t], u_init[:, t]) + v_vec[:, t, k] + Y_init[n_y*(k-1)+1:n_y*k, t] = g(X_init[n_x*(k-1)+1:n_x*k, t], u_init[:, t]) + e_vec[:, t, k] + end + end + + # Set up OCP. + OCP = Model(Ipopt.Optimizer) + @variable(OCP, U[i=1:n_u, j=1:H], start = u_init[i, j]) + @variable(OCP, X[i=1:n_x*K, j=1:H+1], start = X_init[i, j]) + @variable(OCP, Y[i=1:n_y*K, j=1:H], start = Y_init[i, j]) + + # Set the initial state. + for i in 1:n_x*K + fix(X[i, 1], x_vec_0[i]; force=true) + end + + # Set options. + for opt in solver_opts + set_attributes(OCP, opt) + end + + if !print_progress + set_silent(OCP) + end + + # Define objective + if J_u # cost function depends only on u, i.e., J_max = J + @objective(OCP, Min, J(U)) + else + J_max = @variable(OCP, J_max) + @objective(OCP, Min, J_max) + for k in 1:K + @constraint(OCP, J(U, X[n_x*(k-1)+1:n_x*k, :], Y[n_y*(k-1)+1:n_y*k, :]) <= J_max) + end + end + + # Add dynamic and additional constraints for all scenarios. + for k in 1:K + # Get current model. + A = PG_samples[k].A + f(x, u) = A * phi(x, u) + + # Add dynamic constraints + for t in 1:H + @constraint(OCP, X[n_x*(k-1)+1:n_x*k, t+1] .== f(X[n_x*(k-1)+1:n_x*k, t], U[:, t]) + v_vec[:, t, k]) + @constraint(OCP, Y[n_y*(k-1)+1:n_y*k, t] .== g(X[n_x*(k-1)+1:n_x*k, t], U[:, t]) + e_vec[:, t, k]) + end + + # Add scenario constraints. + @constraint(OCP, h_scenario(U, X[n_x*(k-1)+1:n_x*k, :], Y[n_y*(k-1)+1:n_y*k, :]) .<= 0) + end + + # Add constraints for the input. + @constraint(OCP, h_u(U) .<= 0) + + # Add callback that stores barrier parameter mu. + barrier_param_mu = Float64[] + + function get_mu( + alg_mod::Cint, + iter_count::Cint, + obj_value::Float64, + inf_pr::Float64, + inf_du::Float64, + mu::Float64, + d_norm::Float64, + regularization_size::Float64, + alpha_du::Float64, + alpha_pr::Float64, + ls_trials::Cint) + push!(barrier_param_mu, mu) + return true + end + + MOI.set(OCP, Ipopt.CallbackFunction(), get_mu) + + # Solve OCP. + if print_progress + println("### Started optimization algorithm") + end + + optimize!(OCP) + time_optimization = time() - optimization_timer + + if print_progress + @printf("### Optimization complete\nRuntime: %.2f s\n", time_optimization) + end + + u_opt = value.(U) + x_opt = reshape(value.(X)', (n_x, H + 1, K)) + y_opt = reshape(value.(Y)', (n_y, H, K)) + J_opt = objective_value(OCP) + solve_successful = is_solved_and_feasible(OCP) + iterations = MOI.get(OCP, MOI.BarrierIterations()) + mu = barrier_param_mu[end] + + return u_opt, x_opt, y_opt, J_opt, solve_successful, iterations, mu +end + +""" + solve_PG_OCP_Ipopt_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, g::Function, R, H, J, h_scenario, h_u, β; J_u=false, x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, solver_opts=nothing, print_progress=true) + +Solve the sample-based optimal control problem using Ipopt and determine a support sub-sample with cardinality s via a greedy constraint removal. +Based on the cardinality s, a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory u_{0:H} is applied to the unknown system is calculated. + +``\\min_{u_{0:H},\\; \\overline{J_H}} \\overline{J_H}`` + +subject to: +```math +\\begin{aligned} +\\forall k, \\forall t \\\\ +x_{t+1}^{[k]} &= f_{\\theta^{[k]}}(x_t^{[k]}, u_t) + v_t^{[k]}, \\\\ +y_{t}^{[k]} &= g_{\\theta^{[k]}}(x_t^{[k]}, u_t) + w_t^{[k]}, \\\\ +J_H^{[k]} &= J_H(u_{0:H}, x_{0:H}^{[k]}, y_{0:H}^{[k]}) \\leq \\overline{J_H}, \\\\ +h(u_{0:H},x_{0:H}^{[k]},y_{0:H}^{[k]}) \\leq 0. +\\end{aligned} +``` + +# Arguments +- `PG_samples`: PG samples +- `phi`: basis functions +- `g`: observation function +- `R`: variance of zero-mean Gaussian measurement noise - only used if e_vec is not passed +- `H`: horizon of the OCP +- `J`: function with input arguments (u_1:H, x_1:H, y_1:H) (or (u_1:H) if J_u is set true) that returns the cost to be minimized +- `h_scenario`: function with input arguments (u_1:H, x_1:H, y_1:H) that returns the constraint vector belonging to a scenario; a feasible solution must satisfy h_scenario <= 0 for all scenarios. +- `h_u`: function with input argument u_1:H that returns the constraint vector for u; a feasible solution satisfy yield h_u <= 0 +- `β`: confidence parameter +- `J_u`: set to true if cost depends only on inputs u - this accelerates the optimization +- `x_vec_0`: vector with K*n_x elements containing the initial state of all models - if not provided, the initial states are sampled based on the PGS samples +- `v_vec`: array of dimension n_x x H x K that contains the process noise for all models and all timesteps - if not provided, the noise is sampled based on the PGS samples +- `e_vec`: array of dimension n_y x H x K that contains the measurement noise for all models and all timesteps - if not provided, the noise is sampled based on the provided R +- `u_init`: initial guess for the optimal trajectory +- `K_pre_solve`: if K_pre_solve > 0, an initial guess for the optimal trajectory is obtained by solving the OCP with only K_pre_solve < K models +- `opts`: SolverOptions struct containing options of the solver +- `print_progress`: if set to true, the progress is printed +""" +function solve_PG_OCP_Ipopt_greedy_guarantees(PG_samples::Vector{PG_sample}, phi::Function, g::Function, R, H, J, h_scenario, h_u, β; J_u=false, x_vec_0=nothing, v_vec=nothing, e_vec=nothing, u_init=nothing, K_pre_solve=0, solver_opts=nothing, print_progress=true) + # Time first optimization. + first_solve_timer = time() + + # Get number of states, etc. + K = size(PG_samples, 1) + n_u = size(PG_samples[1].u_m1, 1) + n_x = size(PG_samples[1].A, 1) + n_y = size(R, 1) + + # Sample initial state x_vec_0 if not provided. + if x_vec_0 === nothing + x_vec_0 = Array{Float64}(undef, n_x * K) + for k in 1:K + # Get model. + A = PG_samples[k].A + Q = PG_samples[k].Q + f(x, u) = A * phi(x, u) + mvn_v_model = MvNormal(zeros(n_x), Q) + + # Sample state at t=-1. + star = systematic_resampling(PG_samples[k].w_m1, 1) + x_m1 = PG_samples[k].x_m1[:, star] + + # Propagate. + x_vec_0[(k-1)*n_x+1:n_x*k] = f(x_m1, PG_samples[k].u_m1) + rand(mvn_v_model) + end + end + + # Sample process noise array v_vec if not provided. + if v_vec === nothing + v_vec = Array{Float64}(undef, n_x, H, K) + for k in 1:K + Q = PG_samples[k].Q + mvn_v_model = MvNormal(zeros(n_x), Q) + v_vec[:, :, k] = rand(mvn_v_model, H) + end + end + + # Sample measurement noise array e_vec if not provided. + if e_vec === nothing + e_vec = Array{Float64}(undef, n_y, H, K) + mvn_e = MvNormal(zeros(n_y), R) + for k in 1:K + e_vec[:, :, k] = rand(mvn_e, H) + end + end + + num_failed_optimizations = 0 # number of failed optimizations during the computation of the guarantees + + # Solve the OCP once with all constraints to get an initialization for all following optimizations. + println("### Startet optimization of fully constrained problem") + + u_init, mu = solve_PG_OCP_Ipopt(PG_samples, phi, g, R, H, J, h_scenario, h_u; J_u=J_u, x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=K_pre_solve, solver_opts=solver_opts, print_progress=print_progress)[[1, 7]] + + # The OCP is solved again with initialization to obtain the optimal input u_opt. + solver_opts["mu_init"] = mu + println("### Startet optimization of fully constrained problem with initialization") + u_opt, x_opt, y_opt, J_opt, solve_successful, iter = solve_PG_OCP_Ipopt(PG_samples, phi, g, R, H, J, h_scenario, h_u; J_u=J_u, x_vec_0=x_vec_0, v_vec=v_vec, e_vec=e_vec, u_init=u_init, K_pre_solve=K_pre_solve, solver_opts=solver_opts, print_progress=print_progress)[1:6] + + # Determine guarantees. + # If a feasible u_opt is found, probabilistic constraint satisfaction guarantees are derived by greedily removing constraints to determine a support sub-sample S. + if solve_successful + time_first_solve = time() - first_solve_timer + + # Time computation of guarantees. + guarantees_timer = time() + + # Determine support sub-samples and guarantees for the generalization of the resulting input trajectory. + println("### Started search for support sub-sample") + + # Reduce number of iterations - if the number of iterations of the original OCP is exceeded, the solution will likely be different, and the optimization can be stopped. + solver_opts["max_iter"] = 2 * iter + + # Sort scenarios according to the distance to the constraint boundary - removing the scenarios with the largest distance to the constraint boundary first usually yields better results. + h_scenario_max = Array{Float64}(undef, K) # minimum distance of the scenarios to the constraint boundary + for i in 1:K + h_scenario_max[i] = maximum(h_scenario(u_opt, x_opt[:, :, i], y_opt[:, :, i])) + end + scenarios_sorted = sortperm(h_scenario_max; rev=true) # sort scenarios + + # Pre-allocate. + u_opt_temp = Array{Float64}(undef, n_u, H - 1) + active_scenarios = scenarios_sorted + + # Greedily remove constraints and check whether the solution changes to determine a support sub-sample. + for i in 1:K + # Print progress. + @printf("Startet optimization with new constraint set\nIteration: %i/%i\n", i, K) + + # Temporaily remove the constraints corresponding to the PG samples with index i from the constraint set. + temp_scenarios = active_scenarios[active_scenarios.!==scenarios_sorted[i]] + + # Get the initial states of the active scenarios. + x_vec_0_temp = Array{Float64}(undef, n_x * length(temp_scenarios)) + for k in eachindex(temp_scenarios) + x_vec_0_temp[(k-1)*n_x+1:n_x*k] = x_vec_0[(temp_scenarios[k]-1)*n_x+1:n_x*temp_scenarios[k]] + end + + # Solve the OCP with reduced constraint set. + u_opt_temp, J_opt_temp, solve_successful_temp = solve_PG_OCP_Ipopt(PG_samples[temp_scenarios], phi, g, R, H, J, h_scenario, h_u; J_u=J_u, x_vec_0=x_vec_0_temp, v_vec=v_vec[:, :, temp_scenarios], e_vec=e_vec[:, :, temp_scenarios], u_init=u_init, K_pre_solve=0, solver_opts=solver_opts, print_progress=print_progress)[[1, 4, 5]] + + # If the optimization is successful and the solution does not change, permanently remove the constraints corresponding to the PG samples with index i from the constraint set. + # A valid subsample has the same local minimum. However, since the numerical solver does not reach this minimum exactly, a threshold value is used here to check whether the solutions are the same. + if solve_successful_temp && all(abs.(u_opt_temp - u_opt) .< 1e-6) && all(abs.(J_opt_temp - J_opt) .< 1e-6) + active_scenarios = temp_scenarios + elseif !solve_successful_temp + @warn "Optimization with temporarily removed constraints failed. Proceeding with next candidate for a support sub-sample." + num_failed_optimizations += 1 + end + end + + # Determine the cardinality of the support sub-sample. + s = length(active_scenarios) + + # Based on the cardinality of the support sub-sample, determine the parameter ϵ. + # 1-ϵ corresponds to a bound on the probability that the incurred cost exceeds the worst-case cost or that the constraints are violated when the input trajectory u_{0:H} is applied to the unknown system. + epsilon_prob = epsilon(s, K, β) + epsilon_perc = epsilon_prob * 100 + + # Print s, ϵ, and runtime. + time_guarantees = time() - guarantees_timer + @printf("### Support sub sample found\nCardinality of the support sub-sample (s): %i\nMax. constraint violation probability (1-epsilon): %.2f %%\nTime to compute u*: %.2f s\nTime to compute 1-epsilon: %.2f s\n", s, 100 - epsilon_perc, time_first_solve, time_guarantees) + else + # In case the initial problem is infeasible, skip the computation of guarantees. + @warn "No feasible solution found for the initial problem. Skipping computation of guarantees." + time_first_solve = NaN + J_opt = NaN + s = NaN + epsilon_prob = NaN + epsilon_perc = NaN + time_guarantees = NaN + end + return u_opt, x_opt, y_opt, J_opt, s, epsilon_prob, epsilon_perc, time_first_solve, time_guarantees, num_failed_optimizations +end \ No newline at end of file diff --git a/Julia/src/particle_Gibbs.jl b/Julia/src/particle_Gibbs.jl index aba513a..bb628d1 100644 --- a/Julia/src/particle_Gibbs.jl +++ b/Julia/src/particle_Gibbs.jl @@ -1,7 +1,8 @@ """ systematic_resampling(W, N) -Sample `N` indices according to the weights `W`. +Sample `N` indices according to the weights `W`. +This function could be replaced by the function `wsample` from StatsBase.jl but is kept to minimize the differences between the Julia and the MATLAB implementations. # Arguments - `W`: vector containing the weights of the particles diff --git a/README.md b/README.md index decb1af..5bb3683 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,15 @@ Two versions of the algorithm are currently available: a [Julia implementation]( ## Julia [![Dev](https://img.shields.io/badge/docs-stable-blue?logo=Julia&logoColor=white)](https://TUM-ITR.github.io/PGopt) -In order to ensure the reproducibility of the results presented in the paper without reliance on proprietary software, a Julia implementation using the [Altro](https://github.com/RoboticExplorationLab/Altro.jl) solver is provided. This version was used for the results presented in the paper and reproduces them exactly. However, this version has some limitations: only cost functions of the form $J_H=\sum\nolimits_{t=0}^H \frac{1}{2} u_t R u_t$, measurement functions of the form $y=x_{1:n_y}$, and output constraints of the form $y_\mathrm{min} \leq y \leq y_\mathrm{max}$ are supported. +In order to ensure the reproducibility of the results presented in the paper without reliance on proprietary software, a Julia implementation that utilizes the solver [Altro](https://github.com/RoboticExplorationLab/Altro.jl) to solve the OCP is provided. This version was used for the results presented in the paper and reproduces them exactly. However, this version has some limitations: only cost functions of the form $J_H=\sum\nolimits_{t=0}^H \frac{1}{2} u_t R u_t$, measurement functions of the form $y=x_{1:n_y}$, and output constraints of the form $y_\mathrm{min} \leq y \leq y_\mathrm{max}$ are supported. + +Besides the Julia implementation that utilizes Altro, there is also an implementation that utilizes the solver [IPOPT](https://coin-or.github.io/Ipopt/). This implementation allows arbitrary cost functions $J_H(u_{1:H},x_{1:H},y_{1:H})$, measurement functions $y=g(x,u)$, and constraints $h(u_{1:H},x_{1:H},y_{1:H})$. However, using IPOPT together with the proprietary [HSL Linear Solvers for Julia](https://licences.stfc.ac.uk/product/libhsl) (`HSL_jll.jl`) is recommended. A license (free to academics) is required. Further information can be found in the [PGopt Julia documentation](Julia/README.md). ## MATLAB -The MATLAB implementation is more general than the Julia implementation and allows arbitrary cost functions $J_H(u_{1:H},x_{1:H},y_{1:H})$, measurement functions $y=g(x,u)$, and constraints $h(u_{1:H},x_{1:H},y_{1:H})$. [CasADi](https://web.casadi.org/) and [IPOPT](https://coin-or.github.io/Ipopt/) are used to solve the scenario optimal control problem. In addition, the proprietary [HSL Linear Solvers](https://licences.stfc.ac.uk/product/coin-hsl) are used, which significantly accelerate the optimization. A license for the HSL Linear Solvers (free to academics) is required. +The MATLAB implementation allows arbitrary cost functions $J_H(u_{1:H},x_{1:H},y_{1:H})$, measurement functions $y=g(x,u)$, and constraints $h(u_{1:H},x_{1:H},y_{1:H})$. [CasADi](https://web.casadi.org/) and [IPOPT](https://coin-or.github.io/Ipopt/) are used to solve the scenario optimal control problem. In addition, the proprietary [HSL Linear Solvers](https://licences.stfc.ac.uk/product/coin-hsl) are used, which significantly accelerate the optimization. A license for the HSL Linear Solvers (free to academics) is required. Further information can be found in the [PGopt MATLAB documentation](MATLAB/README.md). @@ -27,9 +29,9 @@ Further information can be found in the [PGopt MATLAB documentation](MATLAB/READ If you found this software useful for your research, consider citing us. ``` @article{PMCMC_OCP_arXiv_2023, - title={Learning-Based Optimal Control with Performance Guarantees for Unknown Systems with Latent States}, - author={Lefringhausen, Robert and Srithasan, Supitsana and Lederer, Armin and Hirche, Sandra}, - journal={arXiv preprint arXiv:2303.17963}, - year={2023} + title={Learning-Based Optimal Control with Performance Guarantees for Unknown Systems with Latent States}, + author={Lefringhausen, Robert and Srithasan, Supitsana and Lederer, Armin and Hirche, Sandra}, + journal={arXiv preprint arXiv:2303.17963}, + year={2023} } -``` +``` \ No newline at end of file