diff --git a/example/CouplingExample.jl b/example/CouplingExample.jl index 5c4c4c1f..587dd6aa 100644 --- a/example/CouplingExample.jl +++ b/example/CouplingExample.jl @@ -32,13 +32,13 @@ function coupling_example() 0.1 0; 0 0; 0 0.1]) - dyn = Dynamics(A, [B₁, B₂]) + dyn = LinearDynamics(A, [B₁, B₂]) # Costs reflecting the preferences above. Q₁ = zeros(8, 8) Q₁[5, 5] = 1.0 Q₁[7, 7] = 1.0 - c₁ = Cost(Q₁) + c₁ = QuadraticCost(Q₁) add_control_cost!(c₁, 1, 1 * diagm([1, 1])) add_control_cost!(c₁, 2, zeros(2, 2)) @@ -51,7 +51,7 @@ function coupling_example() Q₂[7, 7] = 1.0 Q₂[3, 7] = -1.0 Q₂[7, 3] = -1.0 - c₂ = Cost(Q₂) + c₂ = QuadraticCost(Q₂) add_control_cost!(c₂, 2, 1 * diagm([1, 1])) add_control_cost!(c₂, 1, zeros(2, 2)) diff --git a/src/CostUtils.jl b/src/CostUtils.jl new file mode 100644 index 00000000..f0598564 --- /dev/null +++ b/src/CostUtils.jl @@ -0,0 +1,59 @@ +# Utilities for managing quadratic and nonquadratic dynamics. +abstract type Cost end + +# Every Cost is assumed to have the following functions defined on it: +# - quadraticize_costs(cost, t, x, us) - this function produces a QuadraticCost at time t given the state and controls +# - evaluate(cost, xs, us) - this function evaluates the cost of a trajectory given the states and controls + +# We use this type by making a substruct of it which can then have certain functions defined for it. +abstract type NonQuadraticCost <: Cost end + +# Cost for a single player. +# Form is: x^T_t Q^i x + \sum_j u^{jT}_t R^{ij} u^j_t. +# For simplicity, assuming that Q, R are time-invariant, and that dynamics are +# linear time-invariant, i.e. x_{t+1} = A x_t + \sum_i B^i u^i_t. +mutable struct QuadraticCost <: Cost + Q + Rs +end +QuadraticCost(Q) = QuadraticCost(Q, Dict{Int, Matrix{eltype(Q)}}()) + +# TODO(hamzah) Add better tests for the QuadraticCost struct and associated functions. + +# Method to add R^{ij}s to a Cost struct. +export add_control_cost! +function add_control_cost!(c::QuadraticCost, other_player_idx, Rij) + c.Rs[other_player_idx] = Rij +end + +function quadraticize_costs(cost::QuadraticCost, t, x, us) + return cost +end + +# Evaluate cost on a state/control trajectory. +# - xs[:, time] +# - us[player][:, time] +function evaluate(c::QuadraticCost, xs, us) + horizon = last(size(xs)) + + total = 0.0 + for tt in 1:horizon + total += xs[:, tt]' * c.Q * xs[:, tt] + total += sum(us[jj][:, tt]' * Rij * us[jj][:, tt] for (jj, Rij) in c.Rs) + end + return total +end + + +# TODO: Make the affine cost structure with homogenenized coordinates. +# struct AffineCost <: Cost end +# function quadraticize_costs(cost::AffineCost, t, x, us) +# function evaluate(c::AffineCost, xs, us) +# end + +# Export all the cost types/structs. +export Cost, NonQuadraticCost, QuadraticCost + +# Export all the cost types/structs. +export quadraticize_costs, evaluate + diff --git a/src/DynamicsUtils.jl b/src/DynamicsUtils.jl new file mode 100644 index 00000000..ae1b4171 --- /dev/null +++ b/src/DynamicsUtils.jl @@ -0,0 +1,114 @@ +# Utilities for managing linear and nonlinear dynamics. + +# Every Dynamics is assumed to have the following functions defined on it: +# - linearize_dynamics(dyn, x, us) - this function linearizes the dynamics given the state and controls. +# - propagate_dynamics(cost, t, x, us) - this function propagates the dynamics to the next timestep. +# Every Dynamics struct must have a sys_info field of type SystemInfo. +abstract type Dynamics end + +# A type that every nonlinear dynamics struct (unique per use case) can inherit from. These need to have the same +# functions as the Dynamics type. +abstract type NonlinearDynamics <: Dynamics end + +# TODO(hamzah) Add better tests for the LinearDynamics struct and associated functions. +struct LinearDynamics <: Dynamics + A # state + Bs # controls + sys_info::SystemInfo +end +# Constructor for linear dynamics that auto-generates the system info. +LinearDynamics(A, Bs) = LinearDynamics(A, Bs, SystemInfo(length(Bs), last(size(A)), [last(size(Bs[i])) for i in 1:length(Bs)])) + +function propagate_dynamics(dyn::LinearDynamics, t, x, us) + N = dyn.sys_info.num_agents + x_next = dyn.A * x + for i in 1:N + ui = reshape(us[i], dyn.sys_info.num_us[i], 1) + x_next += dyn.Bs[i] * ui + end + return x_next +end + +function linearize_dynamics(dyn::LinearDynamics, x, us) + return dyn +end + +# Export the types of dynamics. +export Dynamics, NonlinearDynamics, LinearDynamics + +# Export the functionality each Dynamics requires. +export propagate_dynamics, linearize_dynamics + + +# Dimensionality helpers. +function xdim(dyn::Dynamics) + return dyn.sys_info.num_x +end + +function udim(dyn::Dynamics) + return sum(dyn.sys_info.num_us) +end + +function udim(dyn::Dynamics, player_idx) + return dyn.sys_info.num_us[player_idx] +end + +export xdim, udim + + +# TODO(hamzah) Add better tests for the unroll_feedback, unroll_raw_controls functions. +# TODO(hamzah) Abstract the unroll_feedback, unroll_raw_controls functions to not assume linear feedback P. + +# Function to unroll a set of feedback matrices from an initial condition. +# Output is a sequence of states xs[:, time] and controls us[player][:, time]. +export unroll_feedback +function unroll_feedback(dyn::Dynamics, Ps, x₁) + @assert length(x₁) == xdim(dyn) + + N = length(Ps) + @assert N == dyn.sys_info.num_agents + + horizon = last(size(first(Ps))) + + # Populate state/control trajectory. + xs = zeros(xdim(dyn), horizon) + xs[:, 1] = x₁ + us = [zeros(udim(dyn, ii), horizon) for ii in 1:N] + for tt in 2:horizon + for ii in 1:N + us[ii][:, tt - 1] = -Ps[ii][:, :, tt - 1] * xs[:, tt - 1] + end + + us_prev = [us[i][:, tt-1] for i in 1:N] + xs[:, tt] = propagate_dynamics(dyn, tt, xs[:, tt-1], us_prev) + end + + # Controls at final time. + for ii in 1:N + us[ii][:, horizon] = -Ps[ii][:, :, horizon] * xs[:, horizon] + end + + return xs, us +end + +# As above, but replacing feedback matrices `P` with raw control inputs `u`. +export unroll_raw_controls +function unroll_raw_controls(dyn::Dynamics, us, x₁) + @assert length(x₁) == xdim(dyn) + + N = length(us) + @assert N == dyn.sys_info.num_agents + + horizon = last(size(first(us))) + + # Populate state trajectory. + xs = zeros(xdim(dyn), horizon) + xs[:, 1] = x₁ + us = [zeros(udim(dyn, ii), horizon) for ii in 1:N] + for tt in 2:horizon + us_prev = [us[i][:, tt-1] for i in 1:N] + xs[:, tt] = propagate_dynamics(dyn, tt, xs[:, tt-1], us_prev) + end + + return xs +end diff --git a/src/LQNashFeedbackSolver.jl b/src/LQNashFeedbackSolver.jl index 09844df1..8787b2d9 100644 --- a/src/LQNashFeedbackSolver.jl +++ b/src/LQNashFeedbackSolver.jl @@ -1,5 +1,6 @@ # A helper function to compute P for all players at time t. -function compute_P_at_t(dyn_at_t::Dynamics, costs_at_t, Zₜ₊₁) +# TODO: Add the QuadraticCost modifier to this costs_at_t argument. +function compute_P_at_t(dyn_at_t::LinearDynamics, costs_at_t, Zₜ₊₁) num_players = size(costs_at_t)[1] num_states = xdim(dyn_at_t) @@ -35,7 +36,7 @@ end # Returns feedback matrices P[player][:, :, time] export solve_lq_nash_feedback function solve_lq_nash_feedback( - dyn::Dynamics, costs::AbstractArray{Cost}, horizon::Int) + dyn::LinearDynamics, costs::AbstractArray{QuadraticCost}, horizon::Int) num_players = size(costs)[1] diff --git a/src/LQRFeedbackSolver.jl b/src/LQRFeedbackSolver.jl index 9fad4a19..08b14fba 100644 --- a/src/LQRFeedbackSolver.jl +++ b/src/LQRFeedbackSolver.jl @@ -5,17 +5,15 @@ using LinearAlgebra # Shorthand function for LTI dynamics and costs. export solve_lqr_feedback -function solve_lqr_feedback( - dyn::Dynamics, costs::Cost, horizon::Int) +function solve_lqr_feedback(dyn::LinearDynamics, costs::QuadraticCost, horizon::Int) dyns = [dyn for _ in 1:horizon] costs = [costs for _ in 1:horizon] return solve_lqr_feedback(dyns, costs, horizon) end # TODO(hamzah): Add interfaces for cases in which one of the arguments is passed in as a list, but the other is not. - export solve_lqr_feedback -function solve_lqr_feedback(dyns::AbstractArray{Dynamics}, costs::AbstractArray{Cost}, horizon::Int) +function solve_lqr_feedback(dyns::AbstractArray{LinearDynamics}, costs::AbstractArray{QuadraticCost}, horizon::Int) # Ensure the number of dynamics and costs are the same as the horizon. @assert(ndims(dyns) == 1 && size(dyns, 1) == horizon) @@ -52,4 +50,4 @@ function solve_lqr_feedback(dyns::AbstractArray{Dynamics}, costs::AbstractArray{ end return Ps -end \ No newline at end of file +end diff --git a/src/LQStackelbergFeedbackSolver.jl b/src/LQStackelbergFeedbackSolver.jl index f33e4293..4651660b 100644 --- a/src/LQStackelbergFeedbackSolver.jl +++ b/src/LQStackelbergFeedbackSolver.jl @@ -27,7 +27,7 @@ end # Returns feedback matrices P[player][:, :, time] export solve_lq_stackelberg_feedback function solve_lq_stackelberg_feedback( - dyn::Dynamics, costs::AbstractArray{Cost}, horizon::Int, leader_idx::Int) + dyn::LinearDynamics, costs::AbstractArray{QuadraticCost}, horizon::Int, leader_idx::Int) # TODO: Add checks for correct input lengths - they should match the horizon. num_players = size(costs)[1] diff --git a/src/StackelbergControlHypothesesFiltering.jl b/src/StackelbergControlHypothesesFiltering.jl index 59d0650e..eb9f2abc 100644 --- a/src/StackelbergControlHypothesesFiltering.jl +++ b/src/StackelbergControlHypothesesFiltering.jl @@ -1,6 +1,8 @@ module StackelbergControlHypothesesFiltering include("Utils.jl") +include("CostUtils.jl") +include("DynamicsUtils.jl") include("LQNashFeedbackSolver.jl") include("LQStackelbergFeedbackSolver.jl") include("LQRFeedbackSolver.jl") diff --git a/src/Utils.jl b/src/Utils.jl index 6a6aa843..a2c9728c 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -1,110 +1,10 @@ -# Utilities for Assignment 3. You should not need to modify this file. - -# Cost for a single player. -# Form is: x^T_t Q^i x + \sum_j u^{jT}_t R^{ij} u^j_t. -# For simplicity, assuming that Q, R are time-invariant, and that dynamics are -# linear time-invariant, i.e. x_{t+1} = A x_t + \sum_i B^i u^i_t. -mutable struct Cost - Q - Rs # Nonzero R^{ij} for Pi. -end - -Cost(Q) = Cost(Q, Dict{Int, Matrix{eltype(Q)}}()) -export Cost - -# Method to add R^{ij}s to a Cost struct. -export add_control_cost! -function add_control_cost!(c::Cost, other_player_idx, Rij) - c.Rs[other_player_idx] = Rij -end - -# Evaluate cost on a state/control trajectory. -# - xs[:, time] -# - us[player][:, time] -export evaluate -function evaluate(c::Cost, xs, us) - horizon = last(size(xs)) - - total = 0.0 - for tt in 1:horizon - total += xs[:, tt]' * c.Q * xs[:, tt] - total += sum(us[jj][:, tt]' * Rij * us[jj][:, tt] for (jj, Rij) in c.Rs) - end - - return total +# Utilities +struct SystemInfo + num_agents::Int + num_x::Int + num_us::AbstractArray{Int} + num_v::Int end +SystemInfo(num_agents, num_x, num_us) = SystemInfo(num_agents, num_x, num_us, 0) -# Dynamics. -export Dynamics -struct Dynamics - A - Bs -end - -export xdim -function xdim(dyn::Dynamics) - return first(size(dyn.A)) -end - -export udim -function udim(dyn::Dynamics) - return sum(last(size(B)) for B in dyn.Bs) -end - -function udim(dyn::Dynamics, player_idx) - return last(size(dyn.Bs[player_idx])) -end - -# Function to unroll a set of feedback matrices from an initial condition. -# Output is a sequence of states xs[:, time] and controls us[player][:, time]. -export unroll_feedback -function unroll_feedback(dyn::Dynamics, Ps, x₁) - @assert length(x₁) == xdim(dyn) - - N = length(Ps) - @assert N == length(dyn.Bs) - - horizon = last(size(first(Ps))) - - # Populate state/control trajectory. - xs = zeros(xdim(dyn), horizon) - xs[:, 1] = x₁ - us = [zeros(udim(dyn, ii), horizon) for ii in 1:N] - for tt in 2:horizon - for ii in 1:N - us[ii][:, tt - 1] = -Ps[ii][:, :, tt - 1] * xs[:, tt - 1] - end - - xs[:, tt] = dyn.A * xs[:, tt - 1] + sum( - dyn.Bs[ii] * us[ii][:, tt - 1] for ii in 1:N) - end - - # Controls at final time. - for ii in 1:N - us[ii][:, horizon] = -Ps[ii][:, :, horizon] * xs[:, horizon] - end - - return xs, us -end - -# As above, but replacing feedback matrices `P` with raw control inputs `u`. -export unroll_raw_controls -function unroll_raw_controls(dyn::Dynamics, us, x₁) - @assert length(x₁) == xdim(dyn) - - N = length(us) - @assert N == length(dyn.Bs) - - horizon = last(size(first(us))) - - # Populate state trajectory. - xs = zeros(xdim(dyn), horizon) - xs[:, 1] = x₁ - us = [zeros(udim(dyn, ii), horizon) for ii in 1:N] - for tt in 2:horizon - xs[:, tt] = dyn.A * xs[:, tt - 1] + sum( - dyn.Bs[ii] * us[ii][:, tt - 1] for ii in 1:N) - end - - return xs -end +export SystemInfo diff --git a/test/TestLQSolvers.jl b/test/TestLQSolvers.jl index 2dd0f802..5329948e 100644 --- a/test/TestLQSolvers.jl +++ b/test/TestLQSolvers.jl @@ -21,13 +21,13 @@ seed!(0) 0 0 0 1] B₁ = [0 0.1 0 0]' B₂ = [0 0 0 0.1]' - dyn = Dynamics(A, [B₁, B₂]) + dyn = LinearDynamics(A, [B₁, B₂]) Q₁ = [0 0 0 0; 0 0 0 0; 0 0 1. 0; 0 0 0 0] - c₁ = Cost(Q₁) + c₁ = QuadraticCost(Q₁) add_control_cost!(c₁, 1, ones(1, 1)) add_control_cost!(c₁, 2, zeros(1, 1)) @@ -35,7 +35,7 @@ seed!(0) 0 0 0 0; -1 0 1 0; 0 0 0 0] - c₂ = Cost(Q₂) + c₂ = QuadraticCost(Q₂) add_control_cost!(c₂, 2, ones(1, 1)) add_control_cost!(c₂, 1, zeros(1, 1))