diff --git a/Project.toml b/Project.toml index 6b4d02b54..a2c6f4132 100644 --- a/Project.toml +++ b/Project.toml @@ -15,9 +15,12 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RandomFeatures = "36c3bae2-c0c3-419d-b3b4-eebadd35c5e5" ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" @@ -32,8 +35,9 @@ GaussianProcesses = "0.12" MCMCChains = "4.14, 5, 6" PyCall = "1.93" ScikitLearn = "0.6, 0.7" +RandomFeatures = "0.3" StatsBase = "0.33" -julia = "1.6" +julia = "1.6, 1.7, 1.8" [extras] Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/docs/Project.toml b/docs/Project.toml index a500ef4a7..51a4903c7 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,3 @@ [deps] CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" - diff --git a/docs/make.jl b/docs/make.jl index 9e20907e5..cbbe22da9 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -12,7 +12,11 @@ design = ["AbstractMCMC sampling API" => "API/AbstractMCMC.md"] api = [ "CalibrateEmulateSample" => [ - "Emulators" => ["General Emulator" => "API/Emulators.md", "Gaussian Process" => "API/GaussianProcess.md"], + "Emulators" => [ + "General Interface" => "API/Emulators.md", + "Gaussian Process" => "API/GaussianProcess.md", +# "Random Features" => "API/RandomFeatures.md", + ], "MarkovChainMonteCarlo" => "API/MarkovChainMonteCarlo.md", "Utilities" => "API/Utilities.md", ], diff --git a/docs/src/API/GaussianProcess.md b/docs/src/API/GaussianProcess.md index 79be9e7c0..3b1058352 100644 --- a/docs/src/API/GaussianProcess.md +++ b/docs/src/API/GaussianProcess.md @@ -8,6 +8,7 @@ CurrentModule = CalibrateEmulateSample.Emulators GaussianProcessesPackage PredictionType GaussianProcess -build_models! +build_models!(::GaussianProcess{GPJL}, ::PairedDataContainer{FT}) where {FT <: AbstractFloat} optimize_hyperparameters!(::GaussianProcess{GPJL}) +predict(::GaussianProcess{GPJL}, ::AbstractMatrix{FT}) where {FT <: AbstractFloat} ``` \ No newline at end of file diff --git a/docs/src/API/RandomFeatures.md b/docs/src/API/RandomFeatures.md new file mode 100644 index 000000000..b5f5956bc --- /dev/null +++ b/docs/src/API/RandomFeatures.md @@ -0,0 +1,39 @@ +# RandomFeatures + +```@meta +CurrentModule = CalibrateEmulateSample.Emulators +``` + +## Scalar interface + +```@docs +ScalarRandomFeatureInterface +ScalarRandomFeatureInterface(::Int,::Int) +build_models!(::ScalarRandomFeatureInterface, ::PairedDataContainer{FT}) where {FT <: AbstractFloat} +predict(::ScalarRandomFeatureInterface, ::M) where {M <: AbstractMatrix} +``` + +## Vector Interface + +```@docs +VectorRandomFeatureInterface +VectorRandomFeatureInterface(::Int, ::Int, ::Int) +build_models!(::VectorRandomFeatureInterface, ::PairedDataContainer{FT}) where {FT <: AbstractFloat} +predict(::VectorRandomFeatureInterface, ::M) where {M <: AbstractMatrix} +``` + +## Other utilities +```@docs +get_rfms +get_fitted_features +get_batch_sizes +get_n_features +get_input_dim +get_output_dim +get_rng +get_diagonalize_input +get_feature_decomposition +get_optimizer_options +optimize_hyperparameters!(::ScalarRandomFeatureInterface) +optimize_hyperparameters!(::VectorRandomFeatureInterface) +``` diff --git a/examples/Emulator/GaussianProcess/plot_GP.jl b/examples/Emulator/GaussianProcess/plot_GP.jl index 5a815969c..a0ee9751f 100644 --- a/examples/Emulator/GaussianProcess/plot_GP.jl +++ b/examples/Emulator/GaussianProcess/plot_GP.jl @@ -68,9 +68,9 @@ if !isdir(output_directory) end #create the machine learning tools: Gaussian Process -gppackage = GPJL() +gppackage = SKLJL() pred_type = YType() -gaussian_process = GaussianProcess(gppackage, noise_learn = true) +gaussian_process = GaussianProcess(gppackage, noise_learn = false) # Generate training data (x-y pairs, where x ∈ ℝ ᵖ, y ∈ ℝ ᵈ) # x = [x1, x2]: inputs/predictors/features/parameters @@ -92,7 +92,7 @@ gx[2, :] = g2x # Add noise η μ = zeros(d) -Σ = 0.1 * [[0.8, 0.0] [0.0, 0.5]] # d x d +Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d noise_samples = rand(MvNormal(μ, Σ), n) # y = G(x) + η Y = gx .+ noise_samples @@ -182,9 +182,9 @@ println("GP trained") # Plot mean and variance of the predicted observables y1 and y2 # For this, we generate test points on a x1-x2 grid. -n_pts = 50 -x1 = range(0.0, stop = 2 * π, length = n_pts) -x2 = range(0.0, stop = 2 * π, length = n_pts) +n_pts = 200 +x1 = range(0.0, stop = (4.0 / 5.0) * 2 * π, length = n_pts) +x2 = range(0.0, stop = (4.0 / 5.0) * 2 * π, length = n_pts) X1, X2 = meshgrid(x1, x2) # Input for predict has to be of size N_samples x input_dim inputs = permutedims(hcat(X1[:], X2[:]), (2, 1)) diff --git a/examples/Emulator/RandomFeature/Project.toml b/examples/Emulator/RandomFeature/Project.toml new file mode 100644 index 000000000..9d41c3993 --- /dev/null +++ b/examples/Emulator/RandomFeature/Project.toml @@ -0,0 +1,15 @@ +[deps] +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[compat] +FiniteDiff = "~2.10" +julia = "~1.6" diff --git a/examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl b/examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl new file mode 100644 index 000000000..d202f0391 --- /dev/null +++ b/examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl @@ -0,0 +1,259 @@ +# Reference the in-tree version of CalibrateEmulateSample on Julias load path +include(joinpath(@__DIR__, "..", "..", "ci", "linkfig.jl")) + +# Import modules +using Random +using StableRNGs +using Distributions +using Statistics +using LinearAlgebra +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.ParameterDistributions + +case = "scalar" +println("running case $case") + +diagonalize_input = true + +plot_flag = true +if plot_flag + using Plots + gr(size = (1500, 700)) + Plots.scalefontsizes(1.3) + font = Plots.font("Helvetica", 18) + fontdict = Dict(:guidefont => font, :xtickfont => font, :ytickfont => font, :legendfont => font) + +end + +function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}) where {T} + m, n = length(vy), length(vx) + gx = reshape(repeat(vx, inner = m, outer = 1), m, n) + gy = reshape(repeat(vy, inner = 1, outer = n), m, n) + + return gx, gy +end +rng_seed = 41 +Random.seed!(rng_seed) +output_directory = joinpath(@__DIR__, "output") +if !isdir(output_directory) + mkdir(output_directory) +end + +#problem +n = 100 # number of training points +p = 2 # input dim +d = 2 # output dim +X = 2.0 * π * rand(p, n) +# G(x1, x2) +g1x = sin.(X[1, :]) .+ cos.(X[2, :]) +g2x = sin.(X[1, :]) .- cos.(X[2, :]) +gx = zeros(2, n) +gx[1, :] = g1x +gx[2, :] = g2x + +# Add noise η +μ = zeros(d) +Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d +noise_samples = rand(MvNormal(μ, Σ), n) +# y = G(x) + η +Y = gx .+ noise_samples + +iopairs = PairedDataContainer(X, Y, data_are_columns = true) +@assert get_inputs(iopairs) == X +@assert get_outputs(iopairs) == Y + +#plot training data with and without noise +if plot_flag + p1 = plot( + X[1, :], + X[2, :], + g1x, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + + figpath = joinpath(output_directory, "RF_observed_y1nonoise.png") + savefig(figpath) + + p2 = plot( + X[1, :], + X[2, :], + g2x, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + figpath = joinpath(output_directory, "RF_observed_y2nonoise.png") + savefig(figpath) + + p3 = plot( + X[1, :], + X[2, :], + Y[1, :], + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + figpath = joinpath(output_directory, "RF_observed_y1.png") + savefig(figpath) + + p4 = plot( + X[1, :], + X[2, :], + Y[2, :], + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + figpath = joinpath(output_directory, "RF_observed_y2.png") + savefig(figpath) + +end + +# setup random features +n_features = 180 + +srfi = ScalarRandomFeatureInterface(n_features, p, diagonalize_input = diagonalize_input) +emulator = Emulator(srfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true) +println("build RF with $n training points and $(n_features) random features.") + +optimize_hyperparameters!(emulator) # although RF already optimized + +# Plot mean and variance of the predicted observables y1 and y2 +# For this, we generate test points on a x1-x2 grid. +n_pts = 200 +x1 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts) +x2 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts) +X1, X2 = meshgrid(x1, x2) +# Input for predict has to be of size N_samples x input_dim +inputs = permutedims(hcat(X1[:], X2[:]), (2, 1)) + +rf_mean, rf_cov = predict(emulator, inputs, transform_to_real = true) +println("end predictions at ", n_pts * n_pts, " points") + + +#plot predictions +for y_i in 1:d + rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,) + rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 2 x 40000 + + mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) # 2 x 40000 + if plot_flag + p5 = plot( + x1, + x2, + mean_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "mean of y" * string(y_i), + zguidefontrotation = 90, + ) + end + var_grid = reshape(rf_var[y_i, :], n_pts, n_pts) + if plot_flag + p6 = plot( + x1, + x2, + var_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "var of y" * string(y_i), + zguidefontrotation = 90, + ) + + plot(p5, p6, layout = (1, 2), legend = false) + + savefig(joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_predictions.png")) + end +end + +# Plot the true components of G(x1, x2) +g1_true = sin.(inputs[1, :]) .+ cos.(inputs[2, :]) +g1_true_grid = reshape(g1_true, n_pts, n_pts) +if plot_flag + p7 = plot( + x1, + x2, + g1_true_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "sin(x1) + cos(x2)", + zguidefontrotation = 90, + ) + savefig(joinpath(output_directory, "RF_" * case * "_true_g1.png")) +end + +g2_true = sin.(inputs[1, :]) .- cos.(inputs[2, :]) +g2_true_grid = reshape(g2_true, n_pts, n_pts) +if plot_flag + p8 = plot( + x1, + x2, + g2_true_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "sin(x1) - cos(x2)", + zguidefontrotation = 90, + ) + g_true_grids = [g1_true_grid, g2_true_grid] + + savefig(joinpath(output_directory, "RF_" * case * "_true_g2.png")) + +end + +# Plot the difference between the truth and the mean of the predictions +for y_i in 1:d + + # Reshape rf_cov to size N_samples x output_dim + rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,) + rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 40000 x 2 + + mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) + var_grid = reshape(rf_var[y_i, :], n_pts, n_pts) + # Compute and plot 1/variance * (truth - prediction)^2 + + if plot_flag + zlabel = "1/var * (true_y" * string(y_i) * " - predicted_y" * string(y_i) * ")^2" + + p9 = plot( + x1, + x2, + sqrt.(1.0 ./ var_grid .* (g_true_grids[y_i] .- mean_grid) .^ 2), + st = :surface, + camera = (30, 60), + c = :magma, + zlabel = zlabel, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + + savefig(joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_difference_truth_prediction.png")) + end +end diff --git a/examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl b/examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl new file mode 100644 index 000000000..05e4f348f --- /dev/null +++ b/examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl @@ -0,0 +1,286 @@ +# Reference the in-tree version of CalibrateEmulateSample on Julias load path +include(joinpath(@__DIR__, "..", "..", "ci", "linkfig.jl")) + +# Import modules +using Random +using StableRNGs +using Distributions +using Statistics +using LinearAlgebra +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.ParameterDistributions +plot_flag = true +if plot_flag + using Plots + gr(size = (1500, 700)) + Plots.scalefontsizes(1.3) + font = Plots.font("Helvetica", 18) + fontdict = Dict(:guidefont => font, :xtickfont => font, :ytickfont => font, :legendfont => font) + +end + + +function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}) where {T} + m, n = length(vy), length(vx) + gx = reshape(repeat(vx, inner = m, outer = 1), m, n) + gy = reshape(repeat(vy, inner = 1, outer = n), m, n) + + return gx, gy +end + +function main() + + rng_seed = 41 + Random.seed!(rng_seed) + output_directory = joinpath(@__DIR__, "output") + if !isdir(output_directory) + mkdir(output_directory) + end + + cases = ["svd-diag", "svd-nondiag", "nosvd-diag", "nosvd-nondiag"] + case_mask = 1:4 # which cases to do + + #problem + n = 150 # number of training points + p = 2 # input dim + d = 2 # output dim + X = 2.0 * π * rand(p, n) + # G(x1, x2) + g1x = sin.(X[1, :]) .+ cos.(X[2, :]) + g2x = sin.(X[1, :]) .- cos.(X[2, :]) + gx = zeros(2, n) + gx[1, :] = g1x + gx[2, :] = g2x + + # Add noise η + μ = zeros(d) + Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d + noise_samples = rand(MvNormal(μ, Σ), n) + # y = G(x) + η + Y = gx .+ noise_samples + + iopairs = PairedDataContainer(X, Y, data_are_columns = true) + @assert get_inputs(iopairs) == X + @assert get_outputs(iopairs) == Y + + #plot training data with and without noise + if plot_flag + p1 = plot( + X[1, :], + X[2, :], + g1x, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + + figpath = joinpath(output_directory, "RF_observed_y1nonoise.png") + savefig(figpath) + + p2 = plot( + X[1, :], + X[2, :], + g2x, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + figpath = joinpath(output_directory, "RF_observed_y2nonoise.png") + savefig(figpath) + + p3 = plot( + X[1, :], + X[2, :], + Y[1, :], + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + figpath = joinpath(output_directory, "RF_observed_y1.png") + savefig(figpath) + + p4 = plot( + X[1, :], + X[2, :], + Y[2, :], + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + figpath = joinpath(output_directory, "RF_observed_y2.png") + savefig(figpath) + + end + + for case in cases[case_mask] + println("running case $case") + + + + + # setup random features + n_features = 200 + + if case == "svd-diag" + vrfi = VectorRandomFeatureInterface(n_features, p, d, diagonalize_output = true) + emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true) + elseif case == "svd-nondiag" + vrfi = VectorRandomFeatureInterface(n_features, p, d) + emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true) + elseif case == "nosvd-diag" + vrfi = VectorRandomFeatureInterface(n_features, p, d, diagonalize_output = true) + emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true, decorrelate = false) + elseif case == "nosvd-nondiag" + vrfi = VectorRandomFeatureInterface(n_features, p, d) + emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true, decorrelate = false) + end + println("build RF with $n training points and $(n_features) random features.") + + optimize_hyperparameters!(emulator) # although RF already optimized + + # Plot mean and variance of the predicted observables y1 and y2 + # For this, we generate test points on a x1-x2 grid. + n_pts = 200 + x1 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts) + x2 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts) + X1, X2 = meshgrid(x1, x2) + # Input for predict has to be of size input_dim x N_samples + inputs = permutedims(hcat(X1[:], X2[:]), (2, 1)) + + rf_mean, rf_cov = predict(emulator, inputs, transform_to_real = true) + println("end predictions at ", n_pts * n_pts, " points") + + + #plot predictions + for y_i in 1:d + + rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,) + rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 2 x 40000 + + mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) # 2 x 40000 + if plot_flag + p5 = plot( + x1, + x2, + mean_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "mean of y" * string(y_i), + zguidefontrotation = 90, + ) + end + var_grid = reshape(rf_var[y_i, :], n_pts, n_pts) + if plot_flag + p6 = plot( + x1, + x2, + var_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "var of y" * string(y_i), + zguidefontrotation = 90, + ) + + plot(p5, p6, layout = (1, 2), legend = false) + + savefig(joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_predictions.png")) + end + end + + # Plot the true components of G(x1, x2) + g1_true = sin.(inputs[1, :]) .+ cos.(inputs[2, :]) + g1_true_grid = reshape(g1_true, n_pts, n_pts) + if plot_flag + p7 = plot( + x1, + x2, + g1_true_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "sin(x1) + cos(x2)", + zguidefontrotation = 90, + ) + savefig(joinpath(output_directory, "RF_" * case * "_true_g1.png")) + end + + g2_true = sin.(inputs[1, :]) .- cos.(inputs[2, :]) + g2_true_grid = reshape(g2_true, n_pts, n_pts) + if plot_flag + p8 = plot( + x1, + x2, + g2_true_grid, + st = :surface, + camera = (30, 60), + c = :cividis, + xlabel = "x1", + ylabel = "x2", + zlabel = "sin(x1) - cos(x2)", + zguidefontrotation = 90, + ) + g_true_grids = [g1_true_grid, g2_true_grid] + + savefig(joinpath(output_directory, "RF_" * case * "_true_g2.png")) + + end + MSE = 1 / size(rf_mean, 2) * sqrt(sum((rf_mean[1, :] - g1_true) .^ 2 + (rf_mean[2, :] - g2_true) .^ 2)) + println("L^2 error of mean and latent truth:", MSE) + + # Plot the difference between the truth and the mean of the predictions + for y_i in 1:d + + # Reshape rf_cov to size N_samples x output_dim + rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,) + rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 40000 x 2 + + mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) + var_grid = reshape(rf_var[y_i, :], n_pts, n_pts) + # Compute and plot 1/variance * (truth - prediction)^2 + + if plot_flag + zlabel = "1/var * (true_y" * string(y_i) * " - predicted_y" * string(y_i) * ")^2" + + p9 = plot( + x1, + x2, + sqrt.(1.0 ./ var_grid .* (g_true_grids[y_i] .- mean_grid) .^ 2), + st = :surface, + camera = (30, 60), + c = :magma, + zlabel = zlabel, + xlabel = "x1", + ylabel = "x2", + zguidefontrotation = 90, + ) + + savefig( + joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_difference_truth_prediction.png"), + ) + end + end + end +end + +main() diff --git a/examples/GCM/Project.toml b/examples/GCM/Project.toml new file mode 100644 index 000000000..aba51ad3c --- /dev/null +++ b/examples/GCM/Project.toml @@ -0,0 +1,16 @@ +[deps] +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[compat] +FiniteDiff = "~2.10" +julia = "~1.6" diff --git a/examples/GCM/data_from_eki_inflateyonly_100.jld2 b/examples/GCM/data_from_eki_inflateyonly_100.jld2 new file mode 100644 index 000000000..1eae5106d Binary files /dev/null and b/examples/GCM/data_from_eki_inflateyonly_100.jld2 differ diff --git a/examples/GCM/emulate_sample_script.jl b/examples/GCM/emulate_sample_script.jl new file mode 100644 index 000000000..806a3058d --- /dev/null +++ b/examples/GCM/emulate_sample_script.jl @@ -0,0 +1,227 @@ +# Script to run Emulation and Sampling on data from GCM + +# Import modules +using Distributions # probability distributions and associated functions +using LinearAlgebra +using Plots +using Random +using JLD2 +# CES +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo +using CalibrateEmulateSample.ParameterDistributions +using CalibrateEmulateSample.DataContainers + +@time begin + + rng_seed = 2413798 + Random.seed!(rng_seed) + + # expname = "vrf-nondiag-logdet_newprior" + #emulator_type ∈ ["GPR","ScalarRFR","VectorRFR-svd-diag","VectorRFR-svd-nondiag", "VectorRFR-nondiag"] + # emulator_type = "GPR" + # expname = "gpr_newprior" + + # emulator_type = "ScalarRFR" + # expname = "srf_newprior" + + # emulator_type = "VectorRFR-svd-diag" + # expname = "vrf-svd-diag_newprior" + + # emulator_type = "VectorRFR-svd-nondiag" + # expname = "vrf-svd-nondiag_newprior" + + emulator_type = "VectorRFR-nondiag" + expname = "vrf-nondiag_newprior" + + # Output figure save directory + example_directory = @__DIR__ + println(example_directory) + figure_save_directory = joinpath(example_directory, "output") + data_save_directory = joinpath(example_directory, "output") + if !isdir(figure_save_directory) + mkdir(figure_save_directory) + end + if !isdir(data_save_directory) + mkdir(data_save_directory) + end + + # Load data from file + datafile = "data_from_eki_inflateyonly_100.jld2" + inputs = load(datafile)["inputs"] #100 x 2 x 10 + outputs = load(datafile)["outputs"] #100 x 96 x 10 + truth = load(datafile)["truth"] # 96 + obs_noise_cov = load(datafile)["obs_noise_cov"] # 96 x 96 + + #take only first 400 points + iter_mask = 1:4 + #data_mask = 1:32 + # data_mask= 33:64 + # data_mask= 65:96 + data_mask = 1:96 + #data_mask = [5*i for i = 1:Int(floor(96/5))] + + inputs = inputs[:, :, iter_mask] + outputs = outputs[:, data_mask, iter_mask] + obs_noise_cov = obs_noise_cov[data_mask, data_mask] + truth = truth[data_mask] + + # priorfile = "priors.jld2" + # prior = load(priorfile)["prior"] + + # derived quantities + N_ens, input_dim, N_iter = size(inputs) + output_dim = size(outputs, 2) + + stacked_inputs = reshape(permutedims(inputs, (1, 3, 2)), (N_ens * N_iter, input_dim)) + stacked_outputs = reshape(permutedims(outputs, (1, 3, 2)), (N_ens * N_iter, output_dim)) + input_output_pairs = PairedDataContainer(stacked_inputs, stacked_outputs, data_are_columns = false) #data are rows + normalized = true + + # setup random features + eki_options_override = Dict("tikhonov" => 0, "multithread" => "ensemble") #faster than tullio multithread for training + + + if emulator_type == "VectorRFR-svd-nondiag" || emulator_type == "VectorRFR-nondiag" + if emulator_type == "VectorRFR-svd-nondiag" + println("Running Vector RF model - using SVD and assuming non-diagonal variance ") + elseif emulator_type == "VectorRFR-nondiag" + println("Running Vector RF model - without SVD and assuming non-diagonal variance ") + end + + n_features = 80 * Int(floor(5 * sqrt(N_ens * N_iter))) + println("build RF with $(N_ens*N_iter) training points and $(n_features) random features.") + + + mlt = VectorRandomFeatureInterface(n_features, input_dim, output_dim, optimizer_options = eki_options_override) + + elseif emulator_type == "VectorRFR-svd-diag" + + println("Running Vector RF model - using SVD and assuming diagonal variance") + n_features = 20 * Int(floor(5 * sqrt(N_ens * N_iter))) + println("build RF with $(N_ens*N_iter) training points and $(n_features) random features.") + + mlt = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + diagonalize_output = true, + optimizer_options = eki_options_override, + ) + + elseif emulator_type == "ScalarRFR" + println("Running Scalar RF model") + n_features = 5 * Int(floor(5 * sqrt(N_ens * N_iter))) + mlt = ScalarRandomFeatureInterface(n_features, input_dim, optimizer_options = eki_options_override) + + else + emulator_type == "GPR" + println("Running Gaussian Process model") + gppackage = SKLJL() + mlt = GaussianProcess(gppackage, noise_learn = false) + + end + + if emulator_type == "VectorRFR-nondiag" + emulator = Emulator( + mlt, + input_output_pairs; + obs_noise_cov = obs_noise_cov, + normalize_inputs = normalized, + decorrelate = false, + ) + else + emulator = Emulator(mlt, input_output_pairs; obs_noise_cov = obs_noise_cov, normalize_inputs = normalized) + + end + optimize_hyperparameters!(emulator) + + # + # save the emulator! + # + @save joinpath(data_save_directory, "emulator_" * expname * ".jld2") emulator + + # + # predict at some validation points + # + validate_id = ["phys", "mean", "rand"] + + for vid in validate_id + if vid == "phys" + new_input = [log(0.7 / 0.3) log(7200)]' # physical parameter value (at truth) + elseif vid == "mean" + new_input = [log(0.5 / (1 - 0.5)) log(43200)]' # mean-of-prior parameter value ("near-ish" truth) + elseif vid == "rand" + new_input = [log(0.31735951644387783 / (1 - 0.31735951644387783)) log(90632.50269636544)]' # random parameter value ("far" from truth + end + + pred_mean, pred_cov = predict(emulator, new_input, transform_to_real = true) + pred_sd = sqrt.([max(10 * eps(), pred_cov[1][i, i]) for i in 1:size(pred_cov[1], 1)]) + + + # NB pred_cov is a vector of matrices + tobj = load("truthobj_" * vid * "param.jld2")["truthobj"] + t_mean = tobj.mean[data_mask] + t_cov = tobj.cov[data_mask, data_mask] + + println("prediction error at truth for " * vid * " case:") + println(" mean: ", norm(t_mean - pred_mean)) + println(" cov: ", norm(t_cov - pred_cov[1])) + + save( + joinpath(data_save_directory, vid * "_" * expname * "_results.jld2"), + "pred_mean", + pred_mean, + "pred_cov", + pred_cov, + "pred_sd", + pred_sd, + ) + save(joinpath(data_save_directory, vid * "_" * expname * "_truth.jld2"), "true_mean", t_mean, "true_cov", t_cov) + end + + plot_input = [log(0.7 / 0.3) log(7200)]' # physical parameter value (at truth) + plot_mean, plot_cov = predict(emulator, plot_input, transform_to_real = true) + plot_sd = sqrt.([max(10 * eps(), plot_cov[1][i, i]) for i in 1:size(plot_cov[1], 1)]) + + + ids = [1:32, 33:64, 65:96] + plotnames = ["rh", "pr", "ext"] + + for (id, pn) in zip(ids, plotnames) + if data_mask == 1:96 + plt = plot( + collect(id), + plot_mean[id], + show = true, + ribbon = [2 * plot_sd[id]; 2 * plot_sd[id]], + linewidth = 5, + size = (600, 600), + label = "", + ) + figpath = joinpath(figure_save_directory, "predict_" * expname * "_" * pn * "_at_truth.png") + savefig(figpath) + println("plot saved at " * figpath) + else + if data_mask == id + plot_mask = 1:length(data_mask) + plt = plot( + collect(id), + plot_mean[plot_mask], + show = true, + ribbon = [2 * plot_sd[plot_mask]; 2 * plot_sd[plot_mask]], + linewidth = 5, + size = (600, 600), + label = "", + ) + figpath = joinpath(figure_save_directory, "predict_" * expname * "_" * pn * "_at_truth.png") + savefig(figpath) + println("plot saved at " * figpath) + end + end + + end + + + +end # for @time diff --git a/examples/GCM/priors.jld2 b/examples/GCM/priors.jld2 new file mode 100644 index 000000000..860634326 Binary files /dev/null and b/examples/GCM/priors.jld2 differ diff --git a/examples/GCM/sbatch_emulate_sample_jl b/examples/GCM/sbatch_emulate_sample_jl new file mode 100644 index 000000000..efbdb391c --- /dev/null +++ b/examples/GCM/sbatch_emulate_sample_jl @@ -0,0 +1,36 @@ +#!/bin/bash +#Submit this script with: sbatch thefilename +#SBATCH --time=6:00:00 # walltime +#SBATCH --ntasks-per-node=1 # number of processor cores (i.e. tasks) +#SBATCH --cpus-per-task=28 +#SBATCH --mem-per-cpu=6000 +#SBATCH -J "emulate_sample" # job name +#SBATCH --output=output/out_err/slurm_%j.out +#SBATCH --error=output/out_err/slurm_%j.err + +#general +set -euo pipefail #kill job if anything fails\ +#set -x # + +#modules (not automatically loaded with session) +module load julia/1.8.5 + +#julia package management + +export JULIA_PROJECT=@. +#precompiling is now done manually before +#julia -e 'using Pkg; Pkg.instantiate(); Pkg.API.precompile()' + +#run code +start=$(date +%s) + + +julia --project -t 28 emulate_sample_script.jl + +end=$(date +%s) +runtime=$((end-start)) +echo "********************" +echo "run time: $runtime" +echo "********************" + + diff --git a/examples/GCM/truthobj_meanparam.jld2 b/examples/GCM/truthobj_meanparam.jld2 new file mode 100644 index 000000000..cef3b8f9c Binary files /dev/null and b/examples/GCM/truthobj_meanparam.jld2 differ diff --git a/examples/GCM/truthobj_physparam.jld2 b/examples/GCM/truthobj_physparam.jld2 new file mode 100644 index 000000000..b2ab8e962 Binary files /dev/null and b/examples/GCM/truthobj_physparam.jld2 differ diff --git a/examples/GCM/truthobj_randparam.jld2 b/examples/GCM/truthobj_randparam.jld2 new file mode 100644 index 000000000..248ce29f1 Binary files /dev/null and b/examples/GCM/truthobj_randparam.jld2 differ diff --git a/examples/Lorenz/GModel.jl b/examples/Lorenz/GModel.jl index fd2820f00..4829fe9c8 100644 --- a/examples/Lorenz/GModel.jl +++ b/examples/Lorenz/GModel.jl @@ -7,11 +7,14 @@ export lorenz_forward include("GModel_common.jl") function run_ensembles(settings, lorenz_params, nd, N_ens) - g_ens = zeros(nd, N_ens) - for i in 1:N_ens + nthreads = Threads.nthreads() + g_ens = zeros(nthreads, nd, N_ens) + Threads.@threads for i in 1:N_ens + tid = Threads.threadid() # run the model with the current parameters, i.e., map θ to G(θ) - g_ens[:, i] = lorenz_forward(settings, lorenz_params[i]) + g_ens[tid, :, i] = lorenz_forward(settings, lorenz_params[i]) end + g_ens = dropdims(sum(g_ens, dims = 1), dims = 1) return g_ens end diff --git a/examples/Lorenz/calibrate.jl b/examples/Lorenz/calibrate.jl index 79038276a..b337a653d 100644 --- a/examples/Lorenz/calibrate.jl +++ b/examples/Lorenz/calibrate.jl @@ -39,7 +39,7 @@ function main() dynamics = 2 # Transient is 2 # Statistics integration length # This has to be less than 360 and 360 must be divisible by Ts_days - Ts_days = 90.0 # Integration length in days + Ts_days = 30.0 # Integration length in days # Stats type, which statistics to construct from the L96 system # 4 is a linear fit over a batch of length Ts_days # 5 is the mean over a batch of length Ts_days @@ -150,8 +150,9 @@ function main() else println("Using truth values to compute covariance") n_samples = 20 - yt = zeros(length(gt), n_samples) - for i in 1:n_samples + nthreads = Threads.nthreads() + yt = zeros(nthreads, length(gt), n_samples) + Threads.@threads for i in 1:n_samples lorenz_settings_local = GModel.LSettings( dynamics, stats_type, @@ -167,8 +168,10 @@ function main() ω_fixed, ω_true, ) - yt[:, i] = GModel.run_G_ensemble(params_true, lorenz_settings_local) + tid = Threads.threadid() + yt[tid, :, i] = GModel.run_G_ensemble(params_true, lorenz_settings_local) end + yt = dropdims(sum(yt, dims = 1), dims = 1) # Variance of truth data #Γy = convert(Array, Diagonal(dropdims(mean((yt.-mean(yt,dims=1)).^2,dims=1),dims=1))) # Covariance of truth data @@ -190,7 +193,7 @@ function main() # EKP parameters N_ens = 20 # number of ensemble members - N_iter = 5 # number of EKI iterations + N_iter = 6 # number of EKI iterations # initial parameters: N_params x N_ens initial_params = EKP.construct_initial_ensemble(priors, N_ens; rng_seed = rng_seed) diff --git a/examples/Lorenz/emulate_sample.jl b/examples/Lorenz/emulate_sample.jl index 46e2286ba..48a304f6c 100644 --- a/examples/Lorenz/emulate_sample.jl +++ b/examples/Lorenz/emulate_sample.jl @@ -1,6 +1,5 @@ # Import modules include(joinpath(@__DIR__, "..", "ci", "linkfig.jl")) -include(joinpath(@__DIR__, "GModel.jl")) # Contains Lorenz 96 source code # Import modules using Distributions # probability distributions and associated functions @@ -35,180 +34,211 @@ end function main() - ##### - # Should be loaded: - τc = 5.0 - F_true = 8.0 # Mean F - A_true = 2.5 # Transient F amplitude - ω_true = 2.0 * π / (360.0 / τc) # Frequency of the transient F - #### - - exp_name = "Lorenz_histogram_F$(F_true)_A$(A_true)-w$(ω_true)" - rng_seed = 44009 - Random.seed!(rng_seed) - # loading relevant data - homedir = pwd() - println(homedir) - figure_save_directory = joinpath(homedir, "output/") - data_save_directory = joinpath(homedir, "output/") - data_save_file = joinpath(data_save_directory, "calibrate_results.jld2") - - if !isfile(data_save_file) - throw( - ErrorException( - "data file $data_save_file not found. \n First run: \n > julia --project calibrate.jl \n and store results $data_save_file", - ), - ) - end - ekiobj = load(data_save_file)["eki"] - priors = load(data_save_file)["priors"] - truth_sample = load(data_save_file)["truth_sample"] - truth_sample_mean = load(data_save_file)["truth_sample_mean"] - truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space - truth_params = transform_constrained_to_unconstrained(priors, truth_params_constrained) - Γy = ekiobj.obs_noise_cov - ### - ### Emulate: Gaussian Process Regression - ### - - # Emulate-sample settings - standardize = true - retained_svd_frac = 0.95 - - gppackage = Emulators.GPJL() - pred_type = Emulators.YType() - gauss_proc = GaussianProcess( - gppackage; - kernel = nothing, # use default squared exponential kernel - prediction_type = pred_type, - noise_learn = false, - ) - - # Standardize the output data - # Use median over all data since all data are the same type - truth_sample_norm = vcat(truth_sample...) - norm_factor = get_standardizing_factors(truth_sample_norm) - println(size(norm_factor)) - #norm_factor = vcat(norm_factor...) - norm_factor = fill(norm_factor, size(truth_sample)) - println("Standardization factors") - println(norm_factor) - - # Get training points from the EKP iteration number in the second input term - N_iter = 5 - input_output_pairs = Utilities.get_training_points(ekiobj, N_iter) - # Save data - @save joinpath(data_save_directory, "input_output_pairs.jld2") input_output_pairs - - normalized = true - emulator = Emulator( - gauss_proc, - input_output_pairs; - obs_noise_cov = Γy, - normalize_inputs = normalized, - standardize_outputs = standardize, - standardize_outputs_factors = norm_factor, - retained_svd_frac = retained_svd_frac, - ) - optimize_hyperparameters!(emulator) - - # Check how well the Gaussian Process regression predicts on the - # true parameters - #if retained_svd_frac==1.0 - y_mean, y_var = Emulators.predict(emulator, reshape(truth_params, :, 1), transform_to_real = true) - - println("GP prediction on true parameters: ") - println(vec(y_mean)) - println(" GP variance") - println(diag(y_var[1], 0)) - println("true data: ") - println(truth_sample_mean) # same, regardless of norm_factor - println("GP MSE: ") - println(mean((truth_sample_mean - vec(y_mean)) .^ 2)) - - #end - ### - ### Sample: Markov Chain Monte Carlo - ### - # initial values - u0 = vec(mean(get_inputs(input_output_pairs), dims = 2)) - println("initial parameters: ", u0) - - # First let's run a short chain to determine a good step size - truth_sample = truth_sample - mcmc = MCMCWrapper(RWMHSampling(), truth_sample, priors, emulator; init_params = u0) - new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) - - # Now begin the actual MCMC - println("Begin MCMC - with step size ", new_step) - chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) - posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) - - post_mean = mean(posterior) - post_cov = cov(posterior) - println("post_mean") - println(post_mean) - println("post_cov") - println(post_cov) - println("D util") - println(det(inv(post_cov))) - println(" ") - - - # Plot the posteriors together with the priors and the true parameter values (in the constrained space) - n_params = length(truth_params) - save_directory = joinpath(figure_save_directory) - - posterior_distribution_unconstrained_dict = get_distribution(posterior) #as it is a Samples, the distribution are samples - posterior_samples_unconstrained_dict = get_distribution(posterior) #as it is a Samples, the distribution are samples - param_names = get_name(posterior) - posterior_samples_unconstrained_arr = vcat([posterior_samples_unconstrained_dict[n] for n in param_names]...) - posterior_samples = transform_unconstrained_to_constrained(priors, posterior_samples_unconstrained_arr) - - for idx in 1:n_params - if idx == 1 - param = "F" - xbounds = [F_true - 1.0, F_true + 1.0] - xs = collect(xbounds[1]:((xbounds[2] - xbounds[1]) / 100):xbounds[2]) - elseif idx == 2 - param = "A" - xbounds = [A_true - 1.0, A_true + 1.0] - xs = collect(xbounds[1]:((xbounds[2] - xbounds[1]) / 100):xbounds[2]) - elseif idx == 3 - param = "ω" - xs = collect((ω_true - 0.2):0.01:(ω_true + 0.2)) - xbounds = [xs[1], xs[end]] - else - throw("not implemented") + cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + "RF-scalar-diagin", # diagonalize, train scalar RF, assume diag inputs (most comparable to GP) + "RF-scalar", # diagonalize, train scalar RF, don't asume diag inputs + "RF-vector-svd-diag", + "RF-vector-svd-nondiag", + "RF-vector-nosvd-diag", + "RF-vector-nosvd-nondiag", + ] + + #### CHOOSE YOUR CASE: + mask = 2:7 + for case in cases[mask] + + + #case = cases[7] + println("case: ", case) + + overrides = Dict("train_fraction" => 0.6, "verbose" => true) + + # Should be loaded: + τc = 5.0 + F_true = 8.0 # Mean F + A_true = 2.5 # Transient F amplitude + ω_true = 2.0 * π / (360.0 / τc) # Frequency of the transient F + #### + + exp_name = "Lorenz_histogram_F$(F_true)_A$(A_true)-w$(ω_true)" + rng_seed = 44011 + rng = Random.MersenneTwister(rng_seed) + + # loading relevant data + homedir = pwd() + println(homedir) + figure_save_directory = joinpath(homedir, "output/") + data_save_directory = joinpath(homedir, "output/") + data_save_file = joinpath(data_save_directory, "calibrate_results.jld2") + + if !isfile(data_save_file) + throw( + ErrorException( + "data file $data_save_file not found. \n First run: \n > julia --project calibrate.jl \n and store results $data_save_file", + ), + ) end - label = "true " * param - - histogram(posterior_samples[idx, :], bins = 100, normed = true, fill = :slategray, lab = "posterior") - prior_plot = get_distribution(mcmc.prior) + ekiobj = load(data_save_file)["eki"] + priors = load(data_save_file)["priors"] + truth_sample = load(data_save_file)["truth_sample"] + truth_sample_mean = load(data_save_file)["truth_sample_mean"] + truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space + truth_params = transform_constrained_to_unconstrained(priors, truth_params_constrained) + Γy = ekiobj.obs_noise_cov + + n_params = length(truth_params) # "input dim" + output_dim = size(Γy, 1) + ### + ### Emulate: Gaussian Process Regression + ### + + # Emulate-sample settings + # choice of machine-learning tool + if case == "GP" + gppackage = Emulators.GPJL() + pred_type = Emulators.YType() + mlt = GaussianProcess( + gppackage; + kernel = nothing, # use default squared exponential kernel + prediction_type = pred_type, + noise_learn = false, + ) + elseif case ∈ ["RF-scalar", "RF-scalar-diagin"] + n_features = 300 + diagonalize_input = case == "RF-scalar-diagin" ? true : false + mlt = ScalarRandomFeatureInterface( + n_features, + n_params, + rng = rng, + diagonalize_input = diagonalize_input, + optimizer_options = overrides, + ) + elseif case ∈ ["RF-vector-svd-diag", "RF-vector-nosvd-diag", "RF-vector-svd-nondiag", "RF-vector-nosvd-nondiag"] + # do we want to assume that the outputs are decorrelated in the machine-learning problem? + diagonalize_output = case ∈ ["RF-vector-svd-diag", "RF-vector-nosvd-diag"] ? true : false + n_features = 300 + mlt = VectorRandomFeatureInterface( + n_features, + n_params, + output_dim, + rng = rng, + diagonalize_output = diagonalize_output, # assume outputs are decorrelated + optimizer_options = overrides, + ) + end - # This requires StatsPlots - xs_rows = permutedims(repeat(xs, 1, size(posterior_samples, 1)), (2, 1)) # This hack will enable us to apply the transformation using the full prior - xs_rows_unconstrained = transform_constrained_to_unconstrained(priors, xs_rows) - plot!( - xs, - pdf.(prior_plot[param_names[idx]], xs_rows_unconstrained[idx, :]), - w = 2.6, - color = :blue, - lab = "prior", + # Standardize the output data + # Use median over all data since all data are the same type + truth_sample_norm = vcat(truth_sample...) + norm_factor = get_standardizing_factors(truth_sample_norm) + println(size(norm_factor)) + #norm_factor = vcat(norm_factor...) + norm_factor = fill(norm_factor, size(truth_sample)) + println("Standardization factors") + println(norm_factor) + + # Get training points from the EKP iteration number in the second input term + N_iter = 6 # note we have 6 iterations stored + input_output_pairs = Utilities.get_training_points(ekiobj, N_iter - 1) # 1:N-1 + input_output_pairs_test = Utilities.get_training_points(ekiobj, N_iter:N_iter) # just "next" iteration used for testing + # Save data + @save joinpath(data_save_directory, "input_output_pairs.jld2") input_output_pairs + + standardize = true + retained_svd_frac = 1.0 + normalized = true + # do we want to use SVD to decorrelate outputs + decorrelate = case ∈ ["RF-vector-nosvd-diag", "RF-vector-nosvd-nondiag"] ? false : true + + emulator = Emulator( + mlt, + input_output_pairs; + obs_noise_cov = Γy, + normalize_inputs = normalized, + standardize_outputs = standardize, + standardize_outputs_factors = norm_factor, + retained_svd_frac = retained_svd_frac, + decorrelate = decorrelate, ) - #plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab="prior") - plot!([truth_params[idx]], seriestype = "vline", w = 2.6, lab = label) - plot!(xlims = xbounds) + optimize_hyperparameters!(emulator) + + # Check how well the Gaussian Process regression predicts on the + # true parameters + #if retained_svd_frac==1.0 + y_mean, y_var = Emulators.predict(emulator, reshape(truth_params, :, 1), transform_to_real = true) + y_mean_test, y_var_test = + Emulators.predict(emulator, get_inputs(input_output_pairs_test), transform_to_real = true) + + println("ML prediction on true parameters: ") + println(vec(y_mean)) + println("true data: ") + println(truth_sample_mean) # same, regardless of norm_factor + println(" ML predicted variance") + println(diag(y_var[1], 0)) + println("ML MSE (truth): ") + println(mean((truth_sample_mean - vec(y_mean)) .^ 2)) + println("ML MSE (next ensemble): ") + println(mean((get_outputs(input_output_pairs_test) - y_mean_test) .^ 2)) + + #end + ### + ### Sample: Markov Chain Monte Carlo + ### + # initial values + u0 = vec(mean(get_inputs(input_output_pairs), dims = 2)) + println("initial parameters: ", u0) + + # First let's run a short chain to determine a good step size + truth_sample = truth_sample + mcmc = MCMCWrapper(RWMHSampling(), truth_sample, priors, emulator; init_params = u0) + new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0) + + # Now begin the actual MCMC + println("Begin MCMC - with step size ", new_step) + chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000) + posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) + + post_mean = mean(posterior) + post_cov = cov(posterior) + println("post_mean") + println(post_mean) + println("post_cov") + println(post_cov) + println("D util") + println(det(inv(post_cov))) + println(" ") + + constrained_truth_params = transform_unconstrained_to_constrained(posterior, truth_params) + param_names = get_name(posterior) + + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + constrained_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + + gr(dpi = 300, size = (300, 300)) + p = cornerplot(permutedims(constrained_posterior_samples, (2, 1)), label = param_names, compact = true) + plot!(p.subplots[1], [constrained_truth_params[1]], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash) # vline on top histogram + plot!(p.subplots[3], [constrained_truth_params[2]], seriestype = "hline", w = 1.5, c = :steelblue, ls = :dash) # hline on right histogram + plot!(p.subplots[2], [constrained_truth_params[1]], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash) # v & h line on scatter. + plot!(p.subplots[2], [constrained_truth_params[2]], seriestype = "hline", w = 1.5, c = :steelblue, ls = :dash) + figpath = joinpath(figure_save_directory, "posterior_2d-" * case * ".png") + savefig(figpath) - title!(param) - figpath = joinpath(figure_save_directory, "posterior_$(param)_" * exp_name * ".png") - savefig(figpath) - linkfig(figpath) + # Save data + save( + joinpath(data_save_directory, "posterior.jld2"), + "posterior", + posterior, + "input_output_pairs", + input_output_pairs, + "truth_params", + truth_params, + ) end - end main() diff --git a/src/CalibrateEmulateSample.jl b/src/CalibrateEmulateSample.jl index 9f227d0ee..f5956b175 100644 --- a/src/CalibrateEmulateSample.jl +++ b/src/CalibrateEmulateSample.jl @@ -14,13 +14,14 @@ import EnsembleKalmanProcesses: EnsembleKalmanProcesses, ParameterDistributions, export EnsembleKalmanProcesses, ParameterDistributions, Observations, DataContainers -# No internal deps, heavy external deps -#include("GaussianProcessEmulator.jl") -include("Emulator.jl") # Internal deps, light external deps include("Utilities.jl") +# No internal deps, heavy external deps +#include("GaussianProcessEmulator.jl") +include("Emulator.jl") + # Internal deps, light external deps include("MarkovChainMonteCarlo.jl") diff --git a/src/Emulator.jl b/src/Emulator.jl index e4fa76aa8..6f4d2572d 100644 --- a/src/Emulator.jl +++ b/src/Emulator.jl @@ -7,9 +7,11 @@ using Statistics using Distributions using LinearAlgebra using DocStringExtensions +using Random export Emulator +export build_models! export optimize_hyperparameters! export predict @@ -24,7 +26,7 @@ abstract type MachineLearningTool end # include the different <: ML models include("GaussianProcess.jl") #for GaussianProcess -# include("RandomFeature.jl") +include("RandomFeature.jl") # include("NeuralNetwork.jl") # etc. @@ -75,6 +77,8 @@ struct Emulator{FT <: AbstractFloat} retained_svd_frac::FT end +get_machine_learning_tool(emulator::Emulator) = emulator.machine_learning_tool + # Constructor for the Emulator Object function Emulator( machine_learning_tool::MachineLearningTool, @@ -83,6 +87,7 @@ function Emulator( normalize_inputs::Bool = true, standardize_outputs::Bool = false, standardize_outputs_factors::Union{AbstractVector{FT}, Nothing} = nothing, + decorrelate::Bool = true, retained_svd_frac::FT = 1.0, ) where {FT <: AbstractFloat} @@ -118,24 +123,35 @@ function Emulator( training_outputs = get_outputs(input_output_pairs) end - # [3.] Decorrelating the outputs [always performed] - - #Transform data if obs_noise_cov available - # (if obs_noise_cov==nothing, transformed_data is equal to data) - decorrelated_training_outputs, decomposition = - svd_transform(training_outputs, obs_noise_cov, retained_svd_frac = retained_svd_frac) + # [3.] Decorrelating the outputs, not performed for vector RF + if decorrelate + #Transform data if obs_noise_cov available + # (if obs_noise_cov==nothing, transformed_data is equal to data) + decorrelated_training_outputs, decomposition = + svd_transform(training_outputs, obs_noise_cov, retained_svd_frac = retained_svd_frac) + + # write new pairs structure + if retained_svd_frac < 1.0 + #note this changes the dimension of the outputs + training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs) + input_dim, output_dim = size(training_pairs, 1) + else + training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs) + end - # write new pairs structure - if retained_svd_frac < 1.0 - #note this changes the dimension of the outputs - training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs) - input_dim, output_dim = size(training_pairs, 1) + # [4.] build an emulator + build_models!(machine_learning_tool, training_pairs) else - training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs) + if decorrelate || !isa(machine_learning_tool, VectorRandomFeatureInterface) + throw(ArgumentError("$machine_learning_tool is incompatible with option Emulator(...,decorrelate = false)")) + end + decomposition = nothing + training_pairs = PairedDataContainer(training_inputs, training_outputs) + + # [4.] build an emulator - providing the noise covariance as a Tikhonov regularizer + build_models!(machine_learning_tool, training_pairs, regularization_matrix = obs_noise_cov) end - # [4.] build an emulator - build_models!(machine_learning_tool, training_pairs) return Emulator{FT}( machine_learning_tool, @@ -170,6 +186,7 @@ function predict( emulator::Emulator{FT}, new_inputs::AbstractMatrix{FT}; transform_to_real = false, + vector_rf_unstandardize = true, ) where {FT <: AbstractFloat} # Check if the size of new_inputs is consistent with the GP model's input # dimension. @@ -177,7 +194,7 @@ function predict( N_samples = size(new_inputs, 2) size(new_inputs, 1) == input_dim || - throw(ArgumentError("GP object and input observations do not have consistent dimensions")) + throw(ArgumentError("Emulator object and input observations do not have consistent dimensions")) # [1.] normalize normalized_new_inputs = normalize(emulator, new_inputs) @@ -186,28 +203,68 @@ function predict( ds_outputs, ds_output_var = predict(emulator.machine_learning_tool, normalized_new_inputs) # [3.] transform back to real coordinates or remain in decorrelated coordinates - if transform_to_real && emulator.decomposition === nothing - throw(ArgumentError("""Need SVD decomposition to transform back to original space, - but GaussianProcess.decomposition == nothing. - Try setting transform_to_real=false""")) - elseif transform_to_real && emulator.decomposition !== nothing - #transform back to real coords - cov becomes dense - s_outputs, s_output_cov = svd_reverse_transform_mean_cov(ds_outputs, ds_output_var, emulator.decomposition) - if output_dim == 1 - s_output_cov = [s_output_cov[i][1] for i in 1:N_samples] + if !isa(get_machine_learning_tool(emulator), VectorRandomFeatureInterface) + if transform_to_real && emulator.decomposition === nothing + throw(ArgumentError("""Need SVD decomposition to transform back to original space, + but GaussianProcess.decomposition == nothing. + Try setting transform_to_real=false""")) + elseif transform_to_real && emulator.decomposition !== nothing + + #transform back to real coords - cov becomes dense + s_outputs, s_output_cov = svd_reverse_transform_mean_cov(ds_outputs, ds_output_var, emulator.decomposition) + if output_dim == 1 + s_output_cov = reshape([s_output_cov[i][1] for i in 1:N_samples], 1, :) + end + + # [4.] unstandardize + return reverse_standardize(emulator, s_outputs, s_output_cov) + else + # remain in decorrelated, standardized coordinates (cov remains diagonal) + # Convert to vector of matrices to match the format + # when transform_to_real=true + ds_output_diagvar = vec([Diagonal(ds_output_var[:, j]) for j in 1:N_samples]) + if output_dim == 1 + ds_output_diagvar = reshape([ds_output_diagvar[i][1] for i in 1:N_samples], 1, :) + end + return ds_outputs, ds_output_diagvar end - # [4.] unstandardize - return reverse_standardize(emulator, s_outputs, s_output_cov) else - # remain in decorrelated, standardized coordinates (cov remains diagonal) - # Convert to vector of matrices to match the format - # when transform_to_real=true - ds_output_diagvar = vec([Diagonal(ds_output_var[:, j]) for j in 1:N_samples]) - if output_dim == 1 - ds_output_diagvar = [ds_output_diagvar[i][1] for i in 1:N_samples] + # create a vector of covariance matrices + ds_output_covvec = vec([ds_output_var[:, :, j] for j in 1:N_samples]) + + if emulator.decomposition === nothing # without applying SVD + if vector_rf_unstandardize + # [4.] unstandardize + return reverse_standardize(emulator, ds_outputs, ds_output_covvec) + else + return ds_outputs, ds_output_covvec + end + + else #if we applied SVD + if transform_to_real # ...and want to return coordinates + s_outputs, s_output_cov = + svd_reverse_transform_mean_cov(ds_outputs, ds_output_covvec, emulator.decomposition) + if output_dim == 1 + s_output_cov = reshape([s_output_cov[i][1] for i in 1:N_samples], 1, :) + end + # [4.] unstandardize + return reverse_standardize(emulator, s_outputs, s_output_cov) + else #... and want to stay in decorrelated standardized coordinates + diag_out_flag = get_diagonalize_output(get_machine_learning_tool(emulator)) + if diag_out_flag + ds_output_diagvar = vec([Diagonal(ds_output_covvec[j]) for j in 1:N_samples]) # extracts diagonal + if output_dim == 1 + ds_output_diagvar = ([ds_output_covvec[i][1, 1] for i in 1:N_samples], 1, :) # extracts value + end + return ds_outputs, ds_output_diagvar + else + return ds_outputs, ds_output_covvec + end + end end - return ds_outputs, ds_output_diagvar + end + end # Normalization, Standardization, and Decorrelation @@ -244,10 +301,14 @@ $(DocStringExtensions.TYPEDSIGNATURES) Standardize with a vector of factors (size equal to output dimension). """ function standardize( - outputs::AbstractVecOrMat{FT}, - output_covs::Vector{<:Union{AbstractMatrix{FT}, UniformScaling{FT}}}, - factors::AbstractVector{FT}, -) where {FT <: AbstractFloat} + outputs::VorM, + output_covs::VofMUS, + factors::V, +) where { + VorM <: AbstractVecOrMat, + VofMUS <: Union{AbstractVector{<:AbstractMatrix}, AbstractVector{<:UniformScaling}}, + V <: AbstractVector, +} # Case where `output_covs` is a Vector of covariance matrices n = length(factors) outputs = outputs ./ factors @@ -273,10 +334,15 @@ function standardize( end function standardize( - outputs::AbstractVecOrMat{FT}, - output_cov::Union{AbstractMatrix{FT}, UniformScaling{FT}}, - factors::AbstractVector{FT}, -) where {FT <: AbstractFloat} + outputs::VorM, + output_cov::MorVorUS, + factors::V, +) where { + VorM <: AbstractVecOrMat, + MorVorUS <: Union{AbstractMatrix, AbstractVector{<:AbstractFloat}, UniformScaling}, # must be vector of floats or could cause inf. loop with other method definition. + V <: AbstractVector, +} + # Case where `output_cov` is a single covariance matrix stdized_out, stdized_covs = standardize(outputs, [output_cov], factors) return stdized_out, stdized_covs[1] @@ -291,9 +357,13 @@ dimension). `output_cov` is a Vector of covariance matrices, such as is returned """ function reverse_standardize( emulator::Emulator{FT}, - outputs::AbstractVecOrMat{FT}, - output_covs::Union{AbstractMatrix{FT}, Vector{<:AbstractMatrix{FT}}}, -) where {FT <: AbstractFloat} + outputs::VorM, + output_covs::VorMtwo, +) where { + VorM <: AbstractVecOrMat, + VorMtwo <: AbstractVecOrMat, # can be different type + FT <: AbstractFloat, +} if emulator.standardize_outputs return standardize(outputs, output_covs, 1.0 ./ emulator.standardize_outputs_factors) else @@ -375,6 +445,7 @@ $(DocStringExtensions.TYPEDSIGNATURES) Transform the mean and covariance back to the original (correlated) coordinate system - `μ` - predicted mean; size *output\\_dim* × *N\\_predicted\\_points*. - `σ2` - predicted variance; size *output\\_dim* × *N\\_predicted\\_points*. + - predicted covariance; size *N\\_predicted\\_points* array of size *output\\_dim* × *output\\_dim*. - `decomposition` - SVD decomposition of *obs\\_noise\\_cov*. Returns the transformed mean (size *output\\_dim* × *N\\_predicted\\_points*) and variance. @@ -386,12 +457,20 @@ each element is a matrix of size *output\\_dim* × *output\\_dim*. """ function svd_reverse_transform_mean_cov( μ::AbstractMatrix{FT}, - σ2::AbstractMatrix{FT}, + σ2::AbstractVector,# vector of output_dim x output_dim matrices decomposition::SVD, ) where {FT <: AbstractFloat} - @assert size(μ) == size(σ2) - output_dim, N_predicted_points = size(σ2) + N_predicted_points = length(σ2) + if !(eltype(σ2) <: AbstractMatrix) + throw( + ArgumentError( + "input σ2 must be a vector of eltype <: AbstractMatrix, instead eltype(σ2) = ", + string(eltype(σ2)), + ), + ) + end + # We created meanvGP = D_inv * Vt * mean_v so meanv = V * D * meanvGP sqrt_singular_values = Diagonal(sqrt.(decomposition.S)) transformed_μ = decomposition.V * sqrt_singular_values * μ @@ -400,13 +479,27 @@ function svd_reverse_transform_mean_cov( # Back transformation for j in 1:N_predicted_points - σ2_j = decomposition.V * sqrt_singular_values * Diagonal(σ2[:, j]) * sqrt_singular_values * decomposition.Vt + σ2_j = decomposition.V * sqrt_singular_values * σ2[j] * sqrt_singular_values * decomposition.Vt transformed_σ2[j] = Symmetric(σ2_j) end return transformed_μ, transformed_σ2 + end +function svd_reverse_transform_mean_cov( + μ::AbstractMatrix{FT}, + σ2::AbstractMatrix{FT}, + decomposition::SVD, +) where {FT <: AbstractFloat} + @assert size(μ) == size(σ2) + output_dim, N_predicted_points = size(σ2) + σ2_as_cov = vec([Diagonal(σ2[:, j]) for j in 1:N_predicted_points]) + + return svd_reverse_transform_mean_cov(μ, σ2_as_cov, decomposition) +end + + end diff --git a/src/MarkovChainMonteCarlo.jl b/src/MarkovChainMonteCarlo.jl index 1abb1134b..2e376b32e 100644 --- a/src/MarkovChainMonteCarlo.jl +++ b/src/MarkovChainMonteCarlo.jl @@ -165,7 +165,10 @@ function EmulatorPosteriorModel( # Vector of N_samples covariance matrices. For MH, N_samples is always 1, so we # have to reshape()/re-cast input/output; simpler to do here than add a # predict() method. - g, g_cov = Emulators.predict(em, reshape(θ, :, 1), transform_to_real = false) + g, g_cov = + Emulators.predict(em, reshape(θ, :, 1), transform_to_real = false, vector_rf_unstandardize = false) + #TODO vector_rf will always unstandardize, but other methods will not, so we require this additional flag. + if isa(g_cov[1], Real) return logpdf(MvNormal(obs_sample, g_cov[1] * I), vec(g)) + get_logpdf(prior, θ) else diff --git a/src/RandomFeature.jl b/src/RandomFeature.jl new file mode 100644 index 000000000..3465d2c0d --- /dev/null +++ b/src/RandomFeature.jl @@ -0,0 +1,661 @@ + +using RandomFeatures +const RF = RandomFeatures +using EnsembleKalmanProcesses +const EKP = EnsembleKalmanProcesses +using ..ParameterDistributions +using ..Utilities +using StableRNGs +using Random + +abstract type RandomFeatureInterface <: MachineLearningTool end + +abstract type MultithreadType end +struct TullioThreading <: MultithreadType end +struct EnsembleThreading <: MultithreadType end + +#common functions +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Converts a flat array into a cholesky factor. +""" +function flat_to_chol(x::V) where {V <: AbstractVector} + choldim = Int(floor(sqrt(2 * length(x)))) + cholmat = zeros(choldim, choldim) + for i in 1:choldim + for j in 1:i + cholmat[i, j] = x[sum(0:(i - 1)) + j] + end + end + return cholmat +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Builds Covariance matrices from a flat array of hyperparameters +""" +function hyperparameters_from_flat( + x::VV, + input_dim::Int, + output_dim::Int, + diagonalize_input::Bool, + diagonalize_output::Bool, +) where {VV <: AbstractVector} + # if dim = 1 then parameters are a 1x1 matrix + # if we diagonalize then parameters are diagonal entries + constant + # if we don't diagonalize then parameters are a cholesky product + constant on diagonal. + + #input space + if input_dim == 1 + in_max = 1 + U = x[in_max] * ones(1, 1) + elseif diagonalize_input + in_max = input_dim + 2 + U = convert(Matrix, x[in_max - 1] * (Diagonal(x[1:(in_max - 2)]) + x[in_max] * I)) + elseif !diagonalize_input + in_max = Int(input_dim * (input_dim + 1) / 2) + 2 + cholU = flat_to_chol(x[1:(in_max - 2)]) + U = x[in_max - 1] * (cholU * permutedims(cholU, (2, 1)) + x[in_max] * I) + end + + # output_space + out_min = in_max + 1 + if output_dim == 1 + V = x[end] * ones(1, 1) + elseif diagonalize_output + V = convert(Matrix, x[end - 1] * (Diagonal(x[out_min:(end - 2)]) + x[end] * I)) + elseif !diagonalize_output + cholV = flat_to_chol(x[out_min:(end - 2)]) + V = x[end - 1] * (cholV * permutedims(cholV, (2, 1)) + x[end] * I) + end + + # sometimes this process does not give a (numerically) symmetric matrix + U = 0.5 * (U + permutedims(U, (2, 1))) + V = 0.5 * (V + permutedims(V, (2, 1))) + + return U, V + +end + +function hyperparameters_from_flat(x::VV, input_dim::Int, diagonalize_input::Bool) where {VV <: AbstractVector} + output_dim = 1 + diagonalize_output = false + U, V = hyperparameters_from_flat(x, input_dim, output_dim, diagonalize_input, diagonalize_output) + # Note that the scalar setting has a slight inconsistency from the 1-D output vector case + # We correct here by rescaling U using the single hyperparameter in V + return V[1, 1] * U +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Builds a prior over the hyperparameters (i.e. the cholesky/diagaonal or individaul entries of the input/output covariances). +For example, the case where the input covariance ``U = γ_1 * (LL^T + γ_2 I)``, +we set priors for the entries of the lower triangular matrix ``L`` as normal, and constant scalings ``γ_i`` as log-normal to retain positivity. +""" +function build_default_prior(input_dim::Int, output_dim::Int, diagonalize_input::Bool, diagonalize_output::Bool) + n_hp_in = n_hyperparameters_cov(input_dim, diagonalize_input) + n_hp_out = n_hyperparameters_cov(output_dim, diagonalize_output) + + # if dim = 1 , positive constant + # elseif diagonalized, positive on diagonal and positive scalings + # else chol factor, and positive scalings + if input_dim > 1 + if diagonalize_input + param_diag = constrained_gaussian("input_prior_diagonal", 1.0, 1.0, 0.0, Inf, repeats = n_hp_in - 2) + param_scaling = constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2) + input_prior = combine_distributions([param_diag, param_scaling]) + else + param_chol = constrained_gaussian("input_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_hp_in - 2) + param_scaling = constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2) + input_prior = combine_distributions([param_chol, param_scaling]) + end + else + input_prior = constrained_gaussian("input_prior", 1.0, 1.0, 0.0, Inf) + end + + if output_dim > 1 + if diagonalize_output + param_diag = constrained_gaussian("output_prior_diagonal", 1.0, 1.0, 0.0, Inf, repeats = n_hp_out - 2) + param_scaling = constrained_gaussian("output_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2) + output_prior = combine_distributions([param_diag, param_scaling]) + else + param_chol = constrained_gaussian("output_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_hp_out - 2) + param_scaling = constrained_gaussian("output_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2) + output_prior = combine_distributions([param_chol, param_scaling]) + end + else + output_prior = constrained_gaussian("output_prior", 1.0, 1.0, 0.0, Inf) + end + + return combine_distributions([input_prior, output_prior]) + +end + +function build_default_prior(input_dim::Int, diagonalize_input::Bool) + #scalar case consistent with vector case + output_dim = 1 + diagonalize_output = false + return build_default_prior(input_dim, output_dim, diagonalize_input, diagonalize_output) +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +The number of hyperparameters required to characterize covariance (determined from `hyperparameters_from_flat`). +""" +function n_hyperparameters_cov(dim::Int, diagonalize::Bool) + n_hp = 0 + n_hp += diagonalize ? dim : Int(dim * (dim + 1) / 2) # inputs: diagonal or cholesky + n_hp += (dim > 1) ? 2 : 0 # add two more for constant diagonal in each case. + return n_hp +end + + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Calculate number of hyperparameters required to create the default prior in the given input/output space (determined from `hyperparameters_from_flat`) +""" +function calculate_n_hyperparameters(input_dim::Int, output_dim::Int, diagonalize_input::Bool, diagonalize_output::Bool) + n_hp_in = n_hyperparameters_cov(input_dim, diagonalize_input) + n_hp_out = n_hyperparameters_cov(output_dim, diagonalize_output) + return n_hp_in + n_hp_out +end + +function calculate_n_hyperparameters(input_dim::Int, diagonalize_input::Bool) + #scalar case consistent with vector case + output_dim = 1 + diagonalize_output = false + return calculate_n_hyperparameters(input_dim, output_dim, diagonalize_input, diagonalize_output) +end + + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Calculates key quantities (mean, covariance, and coefficients) of the objective function to be optimized for hyperparameter training. Buffers: mean_store, cov_store, buffer are provided to aid memory management. +""" +function calculate_mean_cov_and_coeffs( + rfi::RFI, + rng::RNG, + l::ForVM, + regularization::MorUSorD, + n_features::Int, + n_train::Int, + n_test::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + io_pairs::PairedDataContainer, + decomp_type::S, + mean_store::M, + cov_store::A, + buffer::A, + multithread_type::MT, +) where { + RFI <: RandomFeatureInterface, + RNG <: AbstractRNG, + ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + S <: AbstractString, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, + M <: AbstractMatrix{<:AbstractFloat}, + A <: AbstractArray{<:AbstractFloat, 3}, + MT <: MultithreadType, +} + + # split data into train/test + itrain = get_inputs(io_pairs)[:, 1:n_train] + otrain = get_outputs(io_pairs)[:, 1:n_train] + io_train_cost = PairedDataContainer(itrain, otrain) + itest = get_inputs(io_pairs)[:, (n_train + 1):end] + otest = get_outputs(io_pairs)[:, (n_train + 1):end] + input_dim = size(itrain, 1) + output_dim = size(otrain, 1) + + # build and fit the RF + rfm = RFM_from_hyperparameters( + rfi, + rng, + l, + regularization, + n_features, + batch_sizes, + input_dim, + output_dim, + multithread_type, + ) + fitted_features = RF.Methods.fit(rfm, io_train_cost, decomposition_type = decomp_type) + + #we want to calc 1/var(y-mean)^2 + lambda/m * coeffs^2 in the end + thread_opt = isa(multithread_type, TullioThreading) + RF.Methods.predict!( + rfm, + fitted_features, + DataContainer(itest), + mean_store, + cov_store, + buffer, + tullio_threading = thread_opt, + ) + # sizes (output_dim x n_test), (output_dim x output_dim x n_test) + scaled_coeffs = sqrt(1 / n_features) * RF.Methods.get_coeffs(fitted_features) + + # we add the additional complexity term + # TODO only cholesky and SVD available + if decomp_type == "cholesky" + chol_fac = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).L + complexity = 2 * sum(log(chol_fac[i, i]) for i in 1:size(chol_fac, 1)) + else + svd_singval = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).S + complexity = sum(log, svd_singval) # note this is log(abs(det)) + end + return scaled_coeffs, complexity + +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Estimation of objective function (marginal log-likelihood) variability for optimization of hyperparameters. Calculated empirically by calling many samples of the objective at a "reasonable" value. `multithread_type` is used as dispatch, either `ensemble` (thread over the empirical samples) or `tullio` (serial samples, and thread the linear algebra within each sample) +""" +function estimate_mean_and_coeffnorm_covariance( + rfi::RFI, + rng::RNG, + l::ForVM, + regularization::MorUSorD, + n_features::Int, + n_train::Int, + n_test::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + io_pairs::PairedDataContainer, + n_samples::Int, + decomp_type::S, + multithread_type::TullioThreading; + repeats::Int = 1, +) where { + RFI <: RandomFeatureInterface, + RNG <: AbstractRNG, + ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + S <: AbstractString, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, +} + + output_dim = size(get_outputs(io_pairs), 1) + + means = zeros(output_dim, n_samples, n_test) + mean_of_covs = zeros(output_dim, output_dim, n_test) + moc_tmp = similar(mean_of_covs) + mtmp = zeros(output_dim, n_test) + buffer = zeros(n_test, output_dim, n_features) + complexity = zeros(1, n_samples) + coeffl2norm = zeros(1, n_samples) + println("estimate cov with " * string(n_samples * repeats) * " iterations...") + + for i in ProgressBar(1:n_samples) + for j in 1:repeats + c, cplxty = calculate_mean_cov_and_coeffs( + rfi, + rng, + l, + regularization, + n_features, + n_train, + n_test, + batch_sizes, + io_pairs, + decomp_type, + mtmp, + moc_tmp, + buffer, + multithread_type, + ) + + # m output_dim x n_test + # v output_dim x output_dim x n_test + # c n_features + # cplxty 1 + + # update vbles needed for cov + means[:, i, :] .+= mtmp ./ repeats + coeffl2norm[1, i] += sqrt(sum(abs2, c)) / repeats + complexity[1, i] += cplxty / repeats + + # update vbles needed for mean + @. mean_of_covs += moc_tmp / (repeats * n_samples) + + end + end + means = permutedims(means, (3, 2, 1)) + mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) + + approx_σ2 = zeros(n_test * output_dim, n_test * output_dim) + blockmeans = zeros(n_test * output_dim, n_samples) + for i in 1:n_test + id = ((i - 1) * output_dim + 1):(i * output_dim) + approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2. + blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) + end + + if !isposdef(approx_σ2) + println("approx_σ2 not posdef") + approx_σ2 = posdef_correct(approx_σ2) + end + + Γ = cov(vcat(blockmeans, coeffl2norm, complexity), dims = 2) + + + return Γ, approx_σ2 + +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Evaluate an objective function (marginal log-likelihood) over an ensemble of hyperparameter values (used for EKI). `multithread_type` is used as dispatch, either `ensemble` (thread over the ensemble members) or `tullio` (serial samples, and thread the linear algebra within each sample) +""" +function calculate_ensemble_mean_and_coeffnorm( + rfi::RFI, + rng::RNG, + lvecormat::VorM, + regularization::MorUSorD, + n_features::Int, + n_train::Int, + n_test::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + io_pairs::PairedDataContainer, + decomp_type::S, + multithread_type::TullioThreading; + repeats::Int = 1, +) where { + RFI <: RandomFeatureInterface, + RNG <: AbstractRNG, + VorM <: AbstractVecOrMat, + S <: AbstractString, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, +} + if isa(lvecormat, AbstractVector) + lmat = reshape(lvecormat, 1, :) + else + lmat = lvecormat + end + N_ens = size(lmat, 2) + output_dim = size(get_outputs(io_pairs), 1) + + means = zeros(output_dim, N_ens, n_test) + mean_of_covs = zeros(output_dim, output_dim, n_test) + buffer = zeros(n_test, output_dim, n_features) + complexity = zeros(1, N_ens) + coeffl2norm = zeros(1, N_ens) + moc_tmp = similar(mean_of_covs) + mtmp = zeros(output_dim, n_test) + + println("calculating " * string(N_ens * repeats) * " ensemble members...") + + for i in ProgressBar(1:N_ens) + for j in collect(1:repeats) + l = lmat[:, i] + + c, cplxty = calculate_mean_cov_and_coeffs( + rfi, + rng, + l, + regularization, + n_features, + n_train, + n_test, + batch_sizes, + io_pairs, + decomp_type, + mtmp, + moc_tmp, + buffer, + multithread_type, + ) + # m output_dim x n_test + # v output_dim x output_dim x n_test + # c n_features + means[:, i, :] += mtmp ./ repeats + @. mean_of_covs += moc_tmp / (repeats * N_ens) + coeffl2norm[1, i] += sqrt(sum(c .^ 2)) / repeats + complexity[1, i] += cplxty / repeats + end + end + means = permutedims(means, (3, 2, 1)) + mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) + blockcovmat = zeros(n_test * output_dim, n_test * output_dim) + blockmeans = zeros(n_test * output_dim, N_ens) + for i in 1:n_test + id = ((i - 1) * output_dim + 1):(i * output_dim) + blockcovmat[id, id] = mean_of_covs[i, :, :] + blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) + end + + if !isposdef(blockcovmat) + println("blockcovmat not posdef") + blockcovmat = posdef_correct(blockcovmat) + end + + return vcat(blockmeans, coeffl2norm, complexity), blockcovmat + +end + + +function estimate_mean_and_coeffnorm_covariance( + rfi::RFI, + rng::RNG, + l::ForVM, + regularization::MorUSorD, + n_features::Int, + n_train::Int, + n_test::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + io_pairs::PairedDataContainer, + n_samples::Int, + decomp_type::S, + multithread_type::EnsembleThreading; + repeats::Int = 1, +) where { + RFI <: RandomFeatureInterface, + RNG <: AbstractRNG, + ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + S <: AbstractString, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, +} + + output_dim = size(get_outputs(io_pairs), 1) + + println("estimate cov with " * string(n_samples * repeats) * " iterations...") + + nthreads = Threads.nthreads() + rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 + rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads] + + c_list = [zeros(1, n_samples) for i in 1:nthreads] + cp_list = [zeros(1, n_samples) for i in 1:nthreads] + m_list = [zeros(output_dim, n_samples, n_test) for i in 1:nthreads] + moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] + moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] + mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads] + buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads] + + + Threads.@threads for i in ProgressBar(1:n_samples) + tid = Threads.threadid() + rngtmp = rng_list[tid] + mtmp = mtmp_list[tid] + moc_tmp = moc_tmp_list[tid] + buffer = buffer_list[tid] + for j in 1:repeats + + c, cplxty = calculate_mean_cov_and_coeffs( + rfi, + rngtmp, + l, + regularization, + n_features, + n_train, + n_test, + batch_sizes, + io_pairs, + decomp_type, + mtmp, + moc_tmp, + buffer, + multithread_type, + ) + + + # m output_dim x n_test + # v output_dim x output_dim x n_test + # c n_features + # cplxty 1 + + # update vbles needed for cov + # update vbles needed for cov + m_list[tid][:, i, :] += mtmp ./ repeats + c_list[tid][1, i] += sqrt(sum(abs2, c)) / repeats + cp_list[tid][1, i] += cplxty / repeats + + # update vbles needed for mean + @. moc_list[tid] += moc_tmp ./ (repeats * n_samples) + + end + end + + #put back together after threading + mean_of_covs = sum(moc_list) + means = sum(m_list) + coeffl2norm = sum(c_list) + complexity = sum(cp_list) + means = permutedims(means, (3, 2, 1)) + mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) + + approx_σ2 = zeros(n_test * output_dim, n_test * output_dim) + blockmeans = zeros(n_test * output_dim, n_samples) + for i in 1:n_test + id = ((i - 1) * output_dim + 1):(i * output_dim) + approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2. + blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) + end + + if !isposdef(approx_σ2) + println("approx_σ2 not posdef") + approx_σ2 = posdef_correct(approx_σ2) + end + + Γ = cov(vcat(blockmeans, coeffl2norm, complexity), dims = 2) + return Γ, approx_σ2 + +end + +function calculate_ensemble_mean_and_coeffnorm( + rfi::RFI, + rng::RNG, + lvecormat::VorM, + regularization::MorUSorD, + n_features::Int, + n_train::Int, + n_test::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + io_pairs::PairedDataContainer, + decomp_type::S, + multithread_type::EnsembleThreading; + repeats::Int = 1, +) where { + RFI <: RandomFeatureInterface, + RNG <: AbstractRNG, + VorM <: AbstractVecOrMat, + S <: AbstractString, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, +} + if isa(lvecormat, AbstractVector) + lmat = reshape(lvecormat, 1, :) + else + lmat = lvecormat + end + N_ens = size(lmat, 2) + output_dim = size(get_outputs(io_pairs), 1) + + + + println("calculating " * string(N_ens * repeats) * " ensemble members...") + + nthreads = Threads.nthreads() + + means = zeros(output_dim, N_ens, n_test) + complexity = zeros(1, N_ens) + coeffl2norm = zeros(1, N_ens) + + c_list = [zeros(1, N_ens) for i in 1:nthreads] + cp_list = [zeros(1, N_ens) for i in 1:nthreads] + m_list = [zeros(output_dim, N_ens, n_test) for i in 1:nthreads] + moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] + buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads] + moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] + mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads] + rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 + rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads] + Threads.@threads for i in ProgressBar(1:N_ens) + tid = Threads.threadid() + rngtmp = rng_list[tid] + mtmp = mtmp_list[tid] + moc_tmp = moc_tmp_list[tid] + buffer = buffer_list[tid] + l = lmat[:, i] + for j in collect(1:repeats) + + c, cplxty = calculate_mean_cov_and_coeffs( + rfi, + rngtmp, + l, + regularization, + n_features, + n_train, + n_test, + batch_sizes, + io_pairs, + decomp_type, + mtmp, + moc_tmp, + buffer, + multithread_type, + ) + + # m output_dim x n_test + # v output_dim x output_dim x n_test + # c n_features + m_list[tid][:, i, :] += mtmp ./ repeats + @. moc_list[tid] += moc_tmp ./ (repeats * N_ens) + c_list[tid][1, i] += sqrt(sum(c .^ 2)) / repeats + cp_list[tid][1, i] += cplxty / repeats + + end + end + + # put back together after threading + mean_of_covs = sum(moc_list) + means = sum(m_list) + coeffl2norm = sum(c_list) + complexity = sum(cp_list) + means = permutedims(means, (3, 2, 1)) + mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) + blockcovmat = zeros(n_test * output_dim, n_test * output_dim) + blockmeans = zeros(n_test * output_dim, N_ens) + for i in 1:n_test + id = ((i - 1) * output_dim + 1):(i * output_dim) + blockcovmat[id, id] = mean_of_covs[i, :, :] + blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) + end + + if !isposdef(blockcovmat) + println("blockcovmat not posdef") + blockcovmat = posdef_correct(blockcovmat) + end + + return vcat(blockmeans, coeffl2norm, complexity), blockcovmat + +end + +include("ScalarRandomFeature.jl") +include("VectorRandomFeature.jl") diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl new file mode 100644 index 000000000..dbc26d11b --- /dev/null +++ b/src/ScalarRandomFeature.jl @@ -0,0 +1,437 @@ +export ScalarRandomFeatureInterface +# getters already exported in VRFI + +""" +$(DocStringExtensions.TYPEDEF) + +Structure holding the Scalar Random Feature models. + +# Fields +$(DocStringExtensions.TYPEDFIELDS) + +""" +struct ScalarRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG} <: RandomFeatureInterface + "vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization" + rfms::Vector{RF.Methods.RandomFeatureMethod} + "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data" + fitted_features::Vector{RF.Methods.Fit} + "batch sizes" + batch_sizes::Union{Dict{S, Int}, Nothing} + "n_features" + n_features::Union{Int, Nothing} + "input dimension" + input_dim::Int + "choice of random number generator" + rng::RNG + "Assume inputs are decorrelated for ML" + diagonalize_input::Bool + "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)" + feature_decomposition::S + "dictionary of options for hyperparameter optimizer" + optimizer_options::Dict{S} +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the rfms field +""" +get_rfms(srfi::ScalarRandomFeatureInterface) = srfi.rfms + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the fitted_features field +""" +get_fitted_features(srfi::ScalarRandomFeatureInterface) = srfi.fitted_features + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets batch_sizes the field +""" +get_batch_sizes(srfi::ScalarRandomFeatureInterface) = srfi.batch_sizes + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the n_features field +""" +get_n_features(srfi::ScalarRandomFeatureInterface) = srfi.n_features + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the input_dim field +""" +get_input_dim(srfi::ScalarRandomFeatureInterface) = srfi.input_dim + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the rng field +""" +get_rng(srfi::ScalarRandomFeatureInterface) = srfi.rng + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the diagonalize_input field +""" +get_diagonalize_input(srfi::ScalarRandomFeatureInterface) = srfi.diagonalize_input + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the feature_decomposition field +""" +get_feature_decomposition(srfi::ScalarRandomFeatureInterface) = srfi.feature_decomposition + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +gets the optimizer_options field +""" +get_optimizer_options(srfi::ScalarRandomFeatureInterface) = srfi.optimizer_options + + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and single- (or decorrelated-)output emulators. + - `n_features` - the number of random features + - `input_dim` - the dimension of the input space + - `diagonalize_input = false` - whether to assume independence in input space (Note, whens set `true`, this tool will approximate directly the behaviour of the scalar-valued GaussianProcess with ARD kernel) + - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there) + - `rng = Random.GLOBAL_RNG` - random number generator + - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available + - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `ScalarRandomFeatureInterface` constructor): + - "prior": the prior for the hyperparameter optimization + - "n_ensemble": number of ensemble members + - "n_iteration": number of eki iterations + - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 1.0) + - "tikhonov": tikhonov regularization parameter if >0 + - "inflation": additive inflation ∈ [0,1] with 0 being no inflation + - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split + - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra + - "verbose" => false, verbose optimizer statements +""" +function ScalarRandomFeatureInterface( + n_features::Int, + input_dim::Int; + diagonalize_input::Bool = false, + batch_sizes::Union{Dict{S, Int}, Nothing} = nothing, + rng::RNG = Random.GLOBAL_RNG, + feature_decomposition::S = "cholesky", + optimizer_options::Union{Dict{S}, Nothing} = nothing, +) where {S <: AbstractString, RNG <: AbstractRNG} + # Initialize vector for GP models + rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0) + fitted_features = Vector{RF.Methods.Fit}(undef, 0) + + + n_hp = calculate_n_hyperparameters(input_dim, diagonalize_input) + prior = build_default_prior(input_dim, diagonalize_input) + + # default optimizer settings + optimizer_opts = Dict( + "prior" => prior, #the hyperparameter_prior + "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble + "n_iteration" => 5, # number of eki iterations + "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme + "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation + "train_fraction" => 0.8, # 80:20 train - test split + "multithread" => "ensemble", # instead of "tullio" + "verbose" => false, # verbose optimizer statements + ) + + if !isnothing(optimizer_options) + for key in keys(optimizer_options) + optimizer_opts[key] = optimizer_options[key] + end + end + + opt_tmp = Dict() + for key in keys(optimizer_opts) + if key != "prior" + opt_tmp[key] = optimizer_opts[key] + end + end + @info("hyperparameter optimization with EKI configured with $opt_tmp") + + return ScalarRandomFeatureInterface{S, RNG}( + rfms, + fitted_features, + batch_sizes, + n_features, + input_dim, + rng, + diagonalize_input, + feature_decomposition, + optimizer_opts, + ) +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Builds the random feature method from hyperparameters. We use cosine activation functions and a MatrixVariateNormal(M,U,V) distribution (from `Distributions.jl`) with mean M=0, and input covariance U built from cholesky factorization or diagonal components based on the diagonalize_input flag. +""" +function RFM_from_hyperparameters( + srfi::ScalarRandomFeatureInterface, + rng::RNG, + l::ForVM, + regularization::MorUSorD, # just a 1x1 matrix though + n_features::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + input_dim::Int, + multithread_type::MT; +) where { + RNG <: AbstractRNG, + ForVM <: Union{Real, AbstractVecOrMat}, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, + S <: AbstractString, + MT <: MultithreadType, +} + diagonalize_input = get_diagonalize_input(srfi) + + M = zeros(input_dim) #scalar output + U = hyperparameters_from_flat(l, input_dim, diagonalize_input) + if !isposdef(U) + println("U not posdef - correcting") + U = posdef_correct(U) + end + + dist = MvNormal(M, U) + pd = ParameterDistribution( + Dict( + "distribution" => Parameterized(dist), + "constraint" => repeat([no_constraint()], input_dim), + "name" => "xi", + ), + ) + feature_sampler = RF.Samplers.FeatureSampler(pd, rng = rng) + # Learn hyperparameters for different feature types + + sf = RF.Features.ScalarFourierFeature(n_features, feature_sampler) + thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio + if isnothing(batch_sizes) + return RF.Methods.RandomFeatureMethod(sf, regularization = regularization, tullio_threading = thread_opt) + else + return RF.Methods.RandomFeatureMethod( + sf, + regularization = regularization, + batch_sizes = batch_sizes, + tullio_threading = thread_opt, + ) + end +end + +#removes vector-only input arguments +RFM_from_hyperparameters( + srfi::ScalarRandomFeatureInterface, + rng::RNG, + l::ForVM, + regularization::MorUS, # just a 1x1 matrix though + n_features::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + input_dim::Int, + output_dim::Int, + multithread_type::MT; +) where { + RNG <: AbstractRNG, + ForVM <: Union{Real, AbstractVecOrMat}, + MorUS <: Union{Matrix, UniformScaling}, + S <: AbstractString, + MT <: MultithreadType, +} = RFM_from_hyperparameters(srfi, rng, l, regularization, n_features, batch_sizes, input_dim, multithread_type) + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Builds the random feature method from hyperparameters. We use cosine activation functions and a Multivariate Normal distribution (from `Distributions.jl`) with mean M=0, and input covariance U built from cholesky factorization or diagonal components based on the diagonalize_input flag. +""" +function build_models!( + srfi::ScalarRandomFeatureInterface, + input_output_pairs::PairedDataContainer{FT}, +) where {FT <: AbstractFloat} + # get inputs and outputs + input_values = get_inputs(input_output_pairs) + output_values = get_outputs(input_output_pairs) + n_rfms, n_data = size(output_values) + noise_sd = 1.0 + + input_dim = size(input_values, 1) + + rfms = get_rfms(srfi) + fitted_features = get_fitted_features(srfi) + n_features = get_n_features(srfi) + batch_sizes = get_batch_sizes(srfi) + rng = get_rng(srfi) + decomp_type = get_feature_decomposition(srfi) + optimizer_options = get_optimizer_options(srfi) + + #regularization = I = 1.0 in scalar case + regularization = I + # Optimize features with EKP for each output dim + # [1.] Split data into test/train 80/20 + train_fraction = optimizer_options["train_fraction"] + n_train = Int(floor(train_fraction * n_data)) + n_test = n_data - n_train + n_features_opt = min(n_features, Int(floor(2 * n_test))) #we take a specified number of features for optimization. + idx_shuffle = randperm(rng, n_data) + + @info ( + "hyperparameter learning for $n_rfms models using $n_train training points, $n_test validation points and $n_features_opt features" + ) + for i in 1:n_rfms + + + io_pairs_opt = PairedDataContainer( + input_values[:, idx_shuffle], + reshape(output_values[i, idx_shuffle], 1, size(output_values, 2)), + ) + + multithread = optimizer_options["multithread"] + if multithread == "ensemble" + multithread_type = EnsembleThreading() + elseif multithread == "tullio" + multithread_type = TullioThreading() + else + throw( + ArgumentError( + "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)", + ), + ) + end + # [2.] Estimate covariance at mean value + prior = optimizer_options["prior"] + μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) + + cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] + n_cov_samples_min = n_test + 2 + n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 1.0))) + + internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( + srfi, + rng, + μ_hp, + regularization, + n_features_opt, + n_train, + n_test, + batch_sizes, + io_pairs_opt, + n_cov_samples, + decomp_type, + multithread_type, + ) + + Γ = internal_Γ + Γ[1:n_test, 1:n_test] += approx_σ2 + Γ[(n_test + 1):end, (n_test + 1):end] += I + + # [3.] set up EKP optimization + n_ensemble = optimizer_options["n_ensemble"] + n_iteration = optimizer_options["n_iteration"] + opt_verbose_flag = optimizer_options["verbose"] + initial_params = construct_initial_ensemble(rng, prior, n_ensemble) + min_complexity = log(det(regularization)) + data = vcat(get_outputs(io_pairs_opt)[(n_train + 1):end], 0.0, min_complexity) + ekiobj = EKP.EnsembleKalmanProcess(initial_params, data, Γ, Inversion(), rng = rng, verbose = opt_verbose_flag) + err = zeros(n_iteration) + + # [4.] optimize with EKP + for i in 1:n_iteration + + #get parameters: + lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj)) + + g_ens, _ = calculate_ensemble_mean_and_coeffnorm( + srfi, + rng, + lvec, + regularization, + n_features_opt, + n_train, + n_test, + batch_sizes, + io_pairs_opt, + decomp_type, + multithread_type, + ) + inflation = optimizer_options["inflation"] + if inflation > 0 + EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation + else + EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation + end + err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) + + end + + # [5.] extract optimal hyperparameters + hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] + + io_pairs_i = PairedDataContainer(input_values, reshape(output_values[i, :], 1, size(output_values, 2))) + # Now, fit new RF model with the optimized hyperparameters + rfm_i = RFM_from_hyperparameters( + srfi, + rng, + hp_optimal, + regularization, + n_features, + batch_sizes, + input_dim, + multithread_type, + ) + fitted_features_i = RF.Methods.fit(rfm_i, io_pairs_i, decomposition_type = decomp_type) #fit features + + push!(rfms, rfm_i) + push!(fitted_features, fitted_features_i) + end + +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Empty method, as optimization takes place within the build_models stage +""" +function optimize_hyperparameters!(srfi::ScalarRandomFeatureInterface, args...; kwargs...) + @info("Random Features already trained. continuing...") +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions. +""" +function predict(srfi::ScalarRandomFeatureInterface, new_inputs::MM) where {MM <: AbstractMatrix} + M = length(get_rfms(srfi)) + N_samples = size(new_inputs, 2) + # Predicts columns of inputs: input_dim × N_samples + μ = zeros(M, N_samples) + σ2 = zeros(M, N_samples) + optimizer_options = get_optimizer_options(srfi) + multithread = optimizer_options["multithread"] + if multithread == "ensemble" + tullio_threading = false + elseif multithread == "tullio" + tullio_threading = true + end + + for i in 1:M + μ[i, :], σ2[i, :] = RF.Methods.predict( + get_rfms(srfi)[i], + get_fitted_features(srfi)[i], + DataContainer(new_inputs), + tullio_threading = tullio_threading, + ) + end + + # add the noise contribution from the regularization + σ2[:, :] = σ2[:, :] .+ 1.0 + + return μ, σ2 +end diff --git a/src/Utilities.jl b/src/Utilities.jl index eb354842e..e1a625892 100644 --- a/src/Utilities.jl +++ b/src/Utilities.jl @@ -14,7 +14,7 @@ export get_training_points export get_obs_sample export orig2zscore export zscore2orig - +export posdef_correct """ $(DocStringExtensions.TYPEDSIGNATURES) @@ -22,13 +22,20 @@ Extract the training points needed to train the Gaussian process regression. - `ekp` - EnsembleKalmanProcess holding the parameters and the data that were produced during the Ensemble Kalman (EK) process. -- `N_train_iterations` - Number of EK layers/iterations to train on. +- `train_iterations` - Number (or indices) EK layers/iterations to train on. """ -function get_training_points(ekp::EnsembleKalmanProcess{FT, IT, P}, N_train_iterations::IT) where {FT, IT, P} - - # Note u[end] does not have an equivalent g - iter_range = (get_N_iterations(ekp) - N_train_iterations + 1):get_N_iterations(ekp) +function get_training_points( + ekp::EnsembleKalmanProcess{FT, IT, P}, + train_iterations::Union{IT, AbstractVector{IT}}, +) where {FT, IT, P} + + if !isa(train_iterations, AbstractVector) + # Note u[end] does not have an equivalent g + iter_range = (get_N_iterations(ekp) - train_iterations + 1):get_N_iterations(ekp) + else + iter_range = train_iterations + end u_tp = [] g_tp = [] @@ -114,4 +121,30 @@ function zscore2orig(Z::AbstractMatrix{FT}, mean::AbstractVector{FT}, std::Abstr end +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Makes square matrix `mat` positive definite, by symmetrizing and bounding the minimum eigenvalue below by `tol` +""" +function posdef_correct(mat::AbstractMatrix; tol::Real = 1e8 * eps()) + if !issymmetric(mat) + out = 0.5 * (mat + permutedims(mat, (2, 1))) #symmetrize + if isposdef(out) + # very often, small numerical errors cause asymmetry, so cheaper to add this branch + return out + end + else + out = mat + end + + nugget = abs(minimum(eigvals(out))) + for i in 1:size(out, 1) + out[i, i] += nugget + tol #add to diag + end + return out +end + + + + end # module diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl new file mode 100644 index 000000000..20eb9f2de --- /dev/null +++ b/src/VectorRandomFeature.jl @@ -0,0 +1,567 @@ +using ProgressBars + +export VectorRandomFeatureInterface +export get_rfms, + get_fitted_features, + get_batch_sizes, + get_n_features, + get_input_dim, + get_output_dim, + get_rng, + get_diagonalize_input, + get_diagonalize_output, + get_optimizer_options + +""" +$(DocStringExtensions.TYPEDEF) + +Structure holding the Vector Random Feature models. + +# Fields +$(DocStringExtensions.TYPEDFIELDS) + +""" +struct VectorRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG} <: RandomFeatureInterface + "A vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization" + rfms::Vector{RF.Methods.RandomFeatureMethod} + "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data" + fitted_features::Vector{RF.Methods.Fit} + "batch sizes" + batch_sizes::Union{Dict{S, Int}, Nothing} + "number of features" + n_features::Union{Int, Nothing} + "input dimension" + input_dim::Int + "output_dimension" + output_dim::Int + "rng" + rng::RNG + "regularization" + regularization::Vector{Union{Matrix, UniformScaling, Diagonal}} + "Assume inputs are decorrelated for ML" + diagonalize_input::Bool + "Assume outputs are decorrelated for ML. Note: emulator(..., decorrelate=true) by default" + diagonalize_output::Bool + "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)" + feature_decomposition::S + "dictionary of options for hyperparameter optimizer" + optimizer_options::Dict +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the rfms field +""" +get_rfms(vrfi::VectorRandomFeatureInterface) = vrfi.rfms + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the fitted_features field +""" +get_fitted_features(vrfi::VectorRandomFeatureInterface) = vrfi.fitted_features + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the batch_sizes field +""" +get_batch_sizes(vrfi::VectorRandomFeatureInterface) = vrfi.batch_sizes + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the n_features field +""" +get_n_features(vrfi::VectorRandomFeatureInterface) = vrfi.n_features + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the input_dim field +""" +get_input_dim(vrfi::VectorRandomFeatureInterface) = vrfi.input_dim + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the output_dim field +""" +get_output_dim(vrfi::VectorRandomFeatureInterface) = vrfi.output_dim + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the rng field +""" +get_rng(vrfi::VectorRandomFeatureInterface) = vrfi.rng + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the regularization field +""" +get_regularization(vrfi::VectorRandomFeatureInterface) = vrfi.regularization + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the diagonalize_input field +""" +get_diagonalize_input(vrfi::VectorRandomFeatureInterface) = vrfi.diagonalize_input + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the diagonalize_output field +""" +get_diagonalize_output(vrfi::VectorRandomFeatureInterface) = vrfi.diagonalize_output + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the feature_decomposition field +""" +get_feature_decomposition(vrfi::VectorRandomFeatureInterface) = vrfi.feature_decomposition + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Gets the optimizer_options field +""" +get_optimizer_options(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer_options + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and multi-output emulators. + - `n_features` - the number of random features + - `input_dim` - the dimension of the input space + - `output_dim` - the dimension of the output space + - `diagonalize_input = false` - whether to assume independence in input space + - `diagonalize_output::Bool = false` - whether to assume independence in output space (even if set true, this is not the same as a list of scalar-valued models, as hyperparameters will be shared between models in this case) + - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there) + - `rng = Random.GLOBAL_RNG` - random number generator + - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available + - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `VectorRandomFeatureInterface` constructor): + - "prior": the prior for the hyperparameter optimization + - "n_ensemble": number of ensemble members + - "n_iteration": number of eki iterations + - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 1.0) + - "tikhonov": tikhonov regularization parameter if > 0 + - "inflation": additive inflation ∈ [0,1] with 0 being no inflation + - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split + - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra + - "verbose" => false, verbose optimizer statements + +""" +function VectorRandomFeatureInterface( + n_features::Int, + input_dim::Int, + output_dim::Int; + diagonalize_input::Bool = false, + diagonalize_output::Bool = false, + batch_sizes::Union{Dict{S, Int}, Nothing} = nothing, + rng::RNG = Random.GLOBAL_RNG, + feature_decomposition::S = "cholesky", + optimizer_options::Union{Dict{S}, Nothing} = nothing, +) where {S <: AbstractString, RNG <: AbstractRNG} + + # Initialize vector for RF model + rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0) + fitted_features = Vector{RF.Methods.Fit}(undef, 0) + regularization = Vector{Union{Matrix, UniformScaling, Nothing}}(undef, 0) + + #Optimization Defaults + n_hp = calculate_n_hyperparameters(input_dim, output_dim, diagonalize_input, diagonalize_output) + prior = build_default_prior(input_dim, output_dim, diagonalize_input, diagonalize_output) + + optimizer_opts = Dict( + "prior" => prior, #the hyperparameter_prior + "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble + "n_iteration" => 5, # number of eki iterations + "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme + "tikhonov" => 0, # tikhonov regularization parameter if >0 + "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation + "train_fraction" => 0.8, # 80:20 train - test split + "multithread" => "ensemble", # instead of "tullio" + "verbose" => false, # verbose optimizer statements + ) + + if !isnothing(optimizer_options) + for key in keys(optimizer_options) + optimizer_opts[key] = optimizer_options[key] + end + end + + opt_tmp = Dict() + for key in keys(optimizer_opts) + if key != "prior" + opt_tmp[key] = optimizer_opts[key] + end + end + @info("hyperparameter optimization with EKI configured with $opt_tmp") + + return VectorRandomFeatureInterface{S, RNG}( + rfms, + fitted_features, + batch_sizes, + n_features, + input_dim, + output_dim, + rng, + regularization, + diagonalize_input, + diagonalize_output, + feature_decomposition, + optimizer_opts, + ) +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Builds the random feature method from hyperparameters. We use cosine activation functions and a Matrixvariate Normal distribution with mean M=0, and input(output) covariance U(V) built from cholesky factorization or diagonal components based on the diagonalize_input(diagonalize_output) flag. +""" +function RFM_from_hyperparameters( + vrfi::VectorRandomFeatureInterface, + rng::RNG, + l::ForVM, + regularization::MorUSorD, + n_features::Int, + batch_sizes::Union{Dict{S, Int}, Nothing}, + input_dim::Int, + output_dim::Int, + multithread_type::MT; +) where { + S <: AbstractString, + RNG <: AbstractRNG, + ForVM <: Union{AbstractFloat, AbstractVecOrMat}, + MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, + MT <: MultithreadType, +} + diagonalize_input = get_diagonalize_input(vrfi) + diagonalize_output = get_diagonalize_output(vrfi) + + M = zeros(input_dim, output_dim) # n x p mean + U, V = hyperparameters_from_flat(l, input_dim, output_dim, diagonalize_input, diagonalize_output) + + if !isposdef(U) + println("U not posdef - correcting") + U = posdef_correct(U) + end + if !isposdef(V) + println("V not posdef - correcting") + V = posdef_correct(V) + end + + dist = MatrixNormal(M, U, V) + pd = ParameterDistribution( + Dict( + "distribution" => Parameterized(dist), + "constraint" => repeat([no_constraint()], input_dim * output_dim), + "name" => "xi", + ), + ) + feature_sampler = RF.Samplers.FeatureSampler(pd, output_dim, rng = rng) + vff = RF.Features.VectorFourierFeature(n_features, output_dim, feature_sampler) + + thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio + if isnothing(batch_sizes) + return RF.Methods.RandomFeatureMethod(vff, regularization = regularization, tullio_threading = thread_opt) + else + return RF.Methods.RandomFeatureMethod( + vff, + regularization = regularization, + batch_sizes = batch_sizes, + tullio_threading = thread_opt, + ) + end +end + + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Build Vector Random Feature model for the input-output pairs subject to regularization, and optimizes the hyperparameters with EKP. +""" +function build_models!( + vrfi::VectorRandomFeatureInterface, + input_output_pairs::PairedDataContainer{FT}; + regularization_matrix::Union{M, Nothing} = nothing, +) where {FT <: AbstractFloat, M <: AbstractMatrix} + + # get inputs and outputs + input_values = get_inputs(input_output_pairs) + output_values = get_outputs(input_output_pairs) + n_rfms, n_data = size(output_values) + + input_dim = size(input_values, 1) + output_dim = size(output_values, 1) + # there are less hyperparameters when we diagonalize + diagonalize_input = get_diagonalize_input(vrfi) + diagonalize_output = get_diagonalize_output(vrfi) + n_hp = calculate_n_hyperparameters(input_dim, output_dim, diagonalize_input, diagonalize_output) + + rfms = get_rfms(vrfi) + fitted_features = get_fitted_features(vrfi) + n_features = get_n_features(vrfi) + batch_sizes = get_batch_sizes(vrfi) + decomp_type = get_feature_decomposition(vrfi) + optimizer_options = get_optimizer_options(vrfi) + multithread = optimizer_options["multithread"] + if multithread == "ensemble" + multithread_type = EnsembleThreading() + elseif multithread == "tullio" + multithread_type = TullioThreading() + else + throw( + ArgumentError( + "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)", + ), + ) + end + prior = optimizer_options["prior"] + rng = get_rng(vrfi) + + if ndims(prior) > n_hp + # comes from having a truncated output_dimension + # TODO not really a truncation here, resetting to default + @info "truncating hyperparameter space to default in first $n_hp dimensions, due to truncation of output space" + prior = constrained_gaussian("cholesky_factors", 1.0, 1.0, 0.0, Inf, repeats = n_hp) + + end + + # Optimize feature cholesky factors with EKP + # [1.] Split data into test/train (e.g. 80/20) + train_fraction = optimizer_options["train_fraction"] + n_train = Int(floor(train_fraction * n_data)) # 20% split + n_test = n_data - n_train + if diagonalize_output + n_features_opt = min(n_features, Int(floor(4 * n_train))) #we take a specified number of features for optimization. MAGIC NUMBER + else + # note the n_features_opt should NOT exceed output_dim * n_train or the regularization is replaced with a diagonal approx. + n_features_opt = max(min(n_features, Int(floor(4 * sqrt(n_train * output_dim)))), Int(floor(1.9 * n_train)))#we take a specified number of features for optimization. MAGIC NUMBER + end + @info ( + "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features" + ) + + + # regularization_matrix = nothing when we use scaled SVD to decorrelate the space, + # in this setting, noise = I + if regularization_matrix === nothing + regularization = I + else + reg_mat = regularization_matrix + if !isposdef(regularization_matrix) + reg_mat = posdef_correct(regularization_matrix) + println("RF regularization matrix is not positive definite, correcting") + + else + # think of the regularization_matrix as the observational noise covariance, or a related quantity + regularization = exp((1 / output_dim) * sum(log.(eigvals(reg_mat)))) * I #i.e. det(M)^{1/output_dim} I + + #regularization = reg_mat #using the full p.d. tikhonov exp. EXPENSIVE, and challenge get complexity terms + end + end + + + idx_shuffle = randperm(rng, n_data) + + io_pairs_opt = PairedDataContainer( + input_values[:, idx_shuffle], + reshape(output_values[:, idx_shuffle], :, size(output_values, 2)), + ) + + # [2.] Estimate covariance at mean value + μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) + cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] + + if diagonalize_output + n_cov_samples_min = n_test + 2 # diagonal case + else + n_cov_samples_min = (n_test * output_dim + 2) # min required for + end + n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 1.0))) + + internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( + vrfi, + rng, + μ_hp, # take mean values + regularization, + n_features_opt, + n_train, + n_test, + batch_sizes, + io_pairs_opt, + n_cov_samples, + decomp_type, + multithread_type, + ) + + tikhonov_opt_val = optimizer_options["tikhonov"] + if tikhonov_opt_val == 0 + # Build the covariance + Γ = internal_Γ + Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += approx_σ2 + Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I + + #in diag case we have data logdet = λ^m, in non diag case we have logdet(Λ^) to match the different reg matrices. + min_complexity = + isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) : + n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L))) + + data = vcat(reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), 0.0, min_complexity) #flatten data + + elseif tikhonov_opt_val > 0 + # augment the state to add tikhonov + outsize = size(internal_Γ, 1) + Γ = zeros(outsize + n_hp, outsize + n_hp) + Γ[1:outsize, 1:outsize] = internal_Γ + Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += approx_σ2 + Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I + + Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior) + + #TODO the min complexity here is not the correct object in the non-diagonal case + min_complexity = + isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) : + n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L))) + + data = vcat( + reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), + 0.0, + min_complexity, + zeros(size(Γ, 1) - outsize, 1), + ) #flatten data with additional zeros + else + throw( + ArgumentError( + "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", + ), + ) + end + + # [3.] set up EKP optimization + n_ensemble = optimizer_options["n_ensemble"] # minimal ensemble size n_hp, + n_iteration = optimizer_options["n_iteration"] + opt_verbose_flag = optimizer_options["verbose"] + + if !isposdef(Γ) + Γ = posdef_correct(Γ) + end + + initial_params = construct_initial_ensemble(rng, prior, n_ensemble) + + ekiobj = EKP.EnsembleKalmanProcess(initial_params, data[:], Γ, Inversion(), rng = rng, verbose = opt_verbose_flag) + err = zeros(n_iteration) + + # [4.] optimize with EKP + for i in 1:n_iteration + #get parameters: + lvec = get_ϕ_final(prior, ekiobj) + + g_ens, _ = calculate_ensemble_mean_and_coeffnorm( + vrfi, + rng, + lvec, + regularization, + n_features_opt, + n_train, + n_test, + batch_sizes, + io_pairs_opt, + decomp_type, + multithread_type, + ) + if tikhonov_opt_val > 0 + # augment with the computational parameters (u not ϕ) + uvecormat = get_u_final(ekiobj) + if isa(uvecormat, AbstractVector) + umat = reshape(uvecormat, 1, :) + else + umat = uvecormat + end + + g_ens = vcat(g_ens, umat) + end + inflation = optimizer_options["inflation"] + if inflation > 0 + EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation + else + EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation + end + err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) + + end + + # [5.] extract optimal hyperparameters + hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] + # Now, fit new RF model with the optimized hyperparameters + + rfm_tmp = RFM_from_hyperparameters( + vrfi, + rng, + hp_optimal, + regularization, + n_features, + batch_sizes, + input_dim, + output_dim, + multithread_type, + ) + fitted_features_tmp = fit(rfm_tmp, input_output_pairs, decomposition_type = decomp_type) + + + push!(rfms, rfm_tmp) + push!(fitted_features, fitted_features_tmp) + push!(get_regularization(vrfi), regularization) + +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Empty method, as optimization takes place within the build_models stage +""" +function optimize_hyperparameters!(vrfi::VectorRandomFeatureInterface, args...; kwargs...) + @info("Random Features already trained. continuing...") +end + +""" +$(DocStringExtensions.TYPEDSIGNATURES) + +Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions. +""" +function predict(vrfi::VectorRandomFeatureInterface, new_inputs::M) where {M <: AbstractMatrix} + input_dim = get_input_dim(vrfi) + output_dim = get_output_dim(vrfi) + rfm = get_rfms(vrfi)[1] + ff = get_fitted_features(vrfi)[1] + optimizer_options = get_optimizer_options(vrfi) + multithread = optimizer_options["multithread"] + if multithread == "ensemble" + tullio_threading = false + elseif multithread == "tullio" + tullio_threading = true + end + + N_samples = size(new_inputs, 2) + # Predicts columns of inputs: input_dim × N_samples + μ, σ2 = RF.Methods.predict(rfm, ff, DataContainer(new_inputs), tullio_threading = tullio_threading) + # sizes (output_dim x n_test), (output_dim x output_dim x n_test) + # add the noise contribution from the regularization + # note this is because we are predicting the data here, not the latent function. + lambda = get_regularization(vrfi)[1] + for i in 1:N_samples + σ2[:, :, i] = 0.5 * (σ2[:, :, i] + permutedims(σ2[:, :, i], (2, 1))) + lambda + + if !isposdef(σ2[:, :, i]) + σ2[:, :, i] = posdef_correct(σ2[:, :, i]) + end + end + + return μ, σ2 +end diff --git a/test/GaussianProcess/runtests.jl b/test/GaussianProcess/runtests.jl index 53624cc56..5935f834a 100644 --- a/test/GaussianProcess/runtests.jl +++ b/test/GaussianProcess/runtests.jl @@ -214,7 +214,7 @@ using CalibrateEmulateSample.DataContainers @test μ4_noise_from_Σ[:, 3] ≈ [0.0, 0.0] atol = tol_mu @test μ4_noise_from_Σ[:, 4] ≈ [0.0, -2.0] atol = tol_mu - # check match between the means and variances (should be similar at least + # check match between the variances (should be similar at least) @test all(isapprox.(σ4²_noise_from_Σ, σ4²_noise_learnt, rtol = 2 * tol_mu)) diff --git a/test/RandomFeature/runtests.jl b/test/RandomFeature/runtests.jl new file mode 100644 index 000000000..e1792514f --- /dev/null +++ b/test/RandomFeature/runtests.jl @@ -0,0 +1,306 @@ +using Test +using Random +using RandomFeatures +using LinearAlgebra, Distributions + +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.ParameterDistributions +using RandomFeatures + +seed = 10101010 +rng = Random.MersenneTwister(seed) + +@testset "RandomFeatures" begin + + @testset "ScalarRandomFeatureInterface" begin + + input_dim = 2 + n_features = 200 + batch_sizes = Dict("train" => 100, "test" => 100, "feature" => 100) + diagonalize_input = false + #build interface + n_input_hp = Int(input_dim * (input_dim + 1) / 2) + 2 # see calculate_n_hyperparameters for details + n_output_hp = 1 + + # prior built from: + # input: γ₁*(Cholesky_factor + γ₂ * I ) + # output: γ₃ + # where γᵢ positive + prior = combine_distributions([ + constrained_gaussian("input_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_input_hp - 2), + constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2), + constrained_gaussian("output_prior", 1.0, 1.0, 0.0, Inf), + ]) + + optimizer_options = Dict( + "prior" => prior, + "n_ensemble" => max(ndims(prior) + 1, 10), + "n_iteration" => 5, + "cov_sample_multiplier" => 10.0, + "inflation" => 1e-4, + "train_fraction" => 0.8, + "multithread" => "ensemble", + "verbose" => false, + ) + + srfi = ScalarRandomFeatureInterface( + n_features, + input_dim, + batch_sizes = batch_sizes, + rng = rng, + diagonalize_input = diagonalize_input, + optimizer_options = optimizer_options, + ) + + @test isa(get_rfms(srfi), Vector{RandomFeatures.Methods.RandomFeatureMethod}) + @test isa(get_fitted_features(srfi), Vector{RandomFeatures.Methods.Fit}) + @test get_batch_sizes(srfi) == batch_sizes + @test get_n_features(srfi) == n_features + @test get_input_dim(srfi) == input_dim + @test get_rng(srfi) == rng + @test get_diagonalize_input(srfi) == diagonalize_input + @test get_optimizer_options(srfi) == optimizer_options + + # check defaults + srfi2 = ScalarRandomFeatureInterface(n_features, input_dim) + @test get_batch_sizes(srfi2) === nothing + @test get_rng(srfi2) == Random.GLOBAL_RNG + @test get_diagonalize_input(srfi2) == false + @test get_optimizer_options(srfi2) == optimizer_options # we just set the defaults above + + end + + @testset "VectorRandomFeatureInterface" begin + + rng = Random.MersenneTwister(seed) + + input_dim = 2 + output_dim = 3 + n_features = 200 + batch_sizes = Dict("train" => 100, "test" => 100, "feature" => 100) + diagonalize_input = true + diagonalize_output = false + + n_input_hp = Int(input_dim) + 2 + n_output_hp = Int(output_dim * (output_dim + 1) / 2) + 2 + + # prior built from: + # input: γ₁*(Cholesky_factor_in + γ₂ * I) + # output: γ₃*(Cholesky_factor_out + γ₄ * I) + # where γᵢ positive + prior = combine_distributions([ + constrained_gaussian("input_prior_diagonal", 1.0, 1.0, 0.0, Inf, repeats = n_input_hp - 2), + constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2), + constrained_gaussian("output_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_output_hp - 2), + constrained_gaussian("output_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2), + ]) + + optimizer_options = Dict( + "prior" => prior, + "n_ensemble" => max(ndims(prior) + 1, 10), + "n_iteration" => 5, + "tikhonov" => 0, + "cov_sample_multiplier" => 10.0, + "inflation" => 1e-4, + "train_fraction" => 0.8, + "multithread" => "ensemble", + "verbose" => false, + ) + + #build interfaces + vrfi = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + rng = rng, + batch_sizes = batch_sizes, + diagonalize_input = diagonalize_input, + diagonalize_output = diagonalize_output, + optimizer_options = optimizer_options, + ) + + @test isa(get_rfms(vrfi), Vector{RandomFeatures.Methods.RandomFeatureMethod}) + @test isa(get_fitted_features(vrfi), Vector{RandomFeatures.Methods.Fit}) + @test get_batch_sizes(vrfi) == batch_sizes + @test get_n_features(vrfi) == n_features + @test get_input_dim(vrfi) == input_dim + @test get_output_dim(vrfi) == output_dim + @test get_rng(vrfi) == rng + @test get_diagonalize_input(vrfi) == diagonalize_input + @test get_diagonalize_output(vrfi) == diagonalize_output + @test get_optimizer_options(vrfi) == optimizer_options + + + #check defaults + vrfi2 = VectorRandomFeatureInterface(n_features, input_dim, output_dim, diagonalize_input = diagonalize_input) + + @test get_batch_sizes(vrfi2) === nothing + @test get_rng(vrfi2) == Random.GLOBAL_RNG + @test get_diagonalize_input(vrfi2) == true + @test get_diagonalize_output(vrfi2) == false + @test get_optimizer_options(vrfi2) == optimizer_options # we just set the defaults above + + end + + @testset "RF within Emulator: 1D -> 1D" begin + + # Training data + input_dim = 1 + output_dim = 1 + n = 40 # number of training points + x = reshape(2.0 * π * rand(n), 1, n) # unif(0,2π) predictors/features: 1 x n + obs_noise_cov = 0.05^2 * I + y = reshape(sin.(x) + 0.05 * randn(n)', 1, n) # predictands/targets: 1 x n + iopairs = PairedDataContainer(x, y, data_are_columns = true) + + ntest = 40 + new_inputs = reshape(2.0 * π * rand(ntest), 1, ntest) + new_outputs = sin.(new_inputs) + + # RF parameters + n_features = 100 + diagonalize_input = true + # Scalar RF options to mimic squared-exp ARD kernel + srfi = ScalarRandomFeatureInterface(n_features, input_dim, diagonalize_input = diagonalize_input, rng = rng) + + # Vector RF options to mimic squared-exp ARD kernel (in 1D) + vrfi = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + diagonalize_input = diagonalize_input, + rng = rng, + ) + + # build emulators + em_srfi = Emulator(srfi, iopairs, obs_noise_cov = obs_noise_cov) + em_vrfi = Emulator(vrfi, iopairs, obs_noise_cov = obs_noise_cov) + + # just see if it prints something + @test_logs (:info,) Emulators.optimize_hyperparameters!(em_srfi) + @test_logs (:info,) Emulators.optimize_hyperparameters!(em_vrfi) + + # predict and test at the new inputs + tol_μ = 0.1 * ntest + μs, σs² = Emulators.predict(em_srfi, new_inputs, transform_to_real = true) + @test size(μs) == (1, ntest) + @test size(σs²) == (1, ntest) + @test isapprox.(norm(μs - new_outputs), 0, atol = tol_μ) + @test all(isapprox.(vec(σs²), 0.05^2 * ones(ntest), atol = 1e-2)) + + μv, σv² = Emulators.predict(em_vrfi, new_inputs, transform_to_real = true) + @test size(μv) == (1, ntest) + @test size(σv²) == (1, ntest) + @test isapprox.(norm(μv - new_outputs), 0, atol = tol_μ) + @test all(isapprox.(vec(σv²), 0.05^2 * ones(ntest), atol = 1e-2)) + + end + @testset "RF within Emulator: 2D -> 2D" begin + # Generate training data + n = 100 # number of training points + + input_dim = 2 # input dim + output_dim = 2 # output dim + X = 2.0 * π * rand(input_dim, n) # [0,2π]x[0,2π] + + # G(x1, x2) + g1x = sin.(X[1, :]) .+ cos.(X[2, :]) + g2x = sin.(X[1, :]) .- cos.(X[2, :]) + gx = zeros(2, n) + gx[1, :] = g1x + gx[2, :] = g2x + + # Add noise η + μ = zeros(output_dim) + Σ = 0.05 * [[0.5, 0.2] [0.2, 0.5]] # d x d + noise_samples = rand(MvNormal(μ, Σ), n) + + # y = G(x) + η + Y = gx .+ noise_samples + + iopairs = PairedDataContainer(X, Y, data_are_columns = true) + + + # RF parameters + n_features = 150 + + # Test a few options for RF + # 1) scalar + diag in + # 2) scalar + # 3) vector + diag out + # 4) vector + + srfi_diagin = ScalarRandomFeatureInterface(n_features, input_dim, diagonalize_input = true, rng = rng) + srfi = ScalarRandomFeatureInterface(n_features, input_dim, diagonalize_input = false, rng = rng) + + vrfi_diagout = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + diagonalize_input = false, + diagonalize_output = true, + optimizer_options = Dict("train_fraction" => 0.7), + rng = rng, + ) + + vrfi = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + diagonalize_input = false, + diagonalize_output = false, + optimizer_options = Dict("train_fraction" => 0.7), + rng = rng, + ) + + # build emulators + # svd: scalar, scalar + diag in, vector + diag out, vector + # no-svd: vector + em_srfi_svd_diagin = Emulator(srfi_diagin, iopairs, obs_noise_cov = Σ) + em_srfi_svd = Emulator(srfi, iopairs, obs_noise_cov = Σ) + + em_vrfi_svd_diagout = Emulator(vrfi_diagout, iopairs, obs_noise_cov = Σ) + em_vrfi_svd = Emulator(vrfi, iopairs, obs_noise_cov = Σ) + + em_vrfi = Emulator(vrfi, iopairs, obs_noise_cov = Σ, decorrelate = false) + + #TODO truncated SVD option for vector (involves resizing prior) + + ntest = 100 + new_inputs = 2.0 * π * rand(input_dim, ntest) # [0,2π]x[0,2π] + + outx = zeros(2, ntest) + outx[1, :] = sin.(new_inputs[1, :]) .+ cos.(new_inputs[2, :]) + outx[2, :] = sin.(new_inputs[1, :]) .- cos.(new_inputs[2, :]) + + μ_ssd, σ²_ssd = Emulators.predict(em_srfi_svd_diagin, new_inputs, transform_to_real = true) + μ_ss, σ²_ss = Emulators.predict(em_srfi_svd, new_inputs, transform_to_real = true) + μ_vsd, σ²_vsd = Emulators.predict(em_vrfi_svd_diagout, new_inputs, transform_to_real = true) + μ_vs, σ²_vs = Emulators.predict(em_vrfi_svd, new_inputs, transform_to_real = true) + μ_v, σ²_v = Emulators.predict(em_vrfi, new_inputs) + + tol_μ = 0.1 * ntest * output_dim + @test isapprox.(norm(μ_ssd - outx), 0, atol = tol_μ) + @test isapprox.(norm(μ_ss - outx), 0, atol = tol_μ) + @test isapprox.(norm(μ_vsd - outx), 0, atol = tol_μ) + @test isapprox.(norm(μ_vs - outx), 0, atol = tol_μ) + @test isapprox.(norm(μ_v - outx), 0, atol = 4 * tol_μ) # a more approximate option so likely less good approx + + # An example with the other threading option + vrfi_tul = VectorRandomFeatureInterface( + n_features, + input_dim, + output_dim, + diagonalize_input = false, + diagonalize_output = false, + optimizer_options = Dict("train_fraction" => 0.7, "multithread" => "tullio"), + rng = rng, + ) + em_vrfi_svd_tul = Emulator(vrfi_tul, iopairs, obs_noise_cov = Σ) + μ_vs_tul, σ²_vs_tul = Emulators.predict(em_vrfi_svd_tul, new_inputs, transform_to_real = true) + @test isapprox.(norm(μ_vs_tul - outx), 0, atol = tol_μ) + + end + +end diff --git a/test/Utilities/runtests.jl b/test/Utilities/runtests.jl index bec9283ef..54e1ad7c9 100644 --- a/test/Utilities/runtests.jl +++ b/test/Utilities/runtests.jl @@ -60,4 +60,17 @@ using CalibrateEmulateSample.DataContainers @test get_inputs(training_points) ≈ initial_ensemble @test get_outputs(training_points) ≈ g_ens + #positive definiteness + mat = reshape(collect(-3:(-3 + 10^2 - 1)), 10, 10) + tol = 1e12 * eps() + @test !isposdef(mat) + + pdmat = posdef_correct(mat) + @test isposdef(pdmat) + @test minimum(eigvals(pdmat)) >= (1 - 1e-4) * 1e8 * eps() #eigvals is approximate so need a bit of give here + + pdmat2 = posdef_correct(mat, tol = tol) + @test isposdef(pdmat2) + @test minimum(eigvals(pdmat2)) >= (1 - 1e-4) * tol + end diff --git a/test/runtests.jl b/test/runtests.jl index c48b2aa9d..eeda04c7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -24,7 +24,7 @@ end end end - for submodule in ["Emulator", "GaussianProcess", "MarkovChainMonteCarlo", "Utilities"] + for submodule in ["Emulator", "GaussianProcess", "RandomFeature", "MarkovChainMonteCarlo", "Utilities"] if all_tests || has_submodule(submodule) || "CalibrateEmulateSample" in ARGS include_test(submodule) end