From 393e90face155d8cf35b91eeb08c977e847ac746 Mon Sep 17 00:00:00 2001 From: Melanie Date: Tue, 22 Dec 2020 17:51:41 -0600 Subject: [PATCH] Update examples to work with the latest CES code --- examples/Cloudy/Cloudy_example.jl | 177 +++++++++++++++---------- src/ParameterDistribution.jl | 4 +- test/ParameterDistribution/runtests.jl | 6 +- 3 files changed, 110 insertions(+), 77 deletions(-) diff --git a/examples/Cloudy/Cloudy_example.jl b/examples/Cloudy/Cloudy_example.jl index 8549b47b7..674332d52 100644 --- a/examples/Cloudy/Cloudy_example.jl +++ b/examples/Cloudy/Cloudy_example.jl @@ -1,4 +1,4 @@ -# Import Cloudy modules +# This example requires Cloudy to be installed. using Pkg; Pkg.add(PackageSpec(name="Cloudy", version="0.1.0")) using Cloudy const PDistributions = Cloudy.ParticleDistributions @@ -17,9 +17,43 @@ using CalibrateEmulateSample.EKP using CalibrateEmulateSample.GPEmulator using CalibrateEmulateSample.MCMC using CalibrateEmulateSample.Observations -using CalibrateEmulateSample.GModel using CalibrateEmulateSample.Utilities -using CalibrateEmulateSample.Priors +using CalibrateEmulateSample.ParameterDistributionStorage + +# Import the module that runs Cloudy +include("GModel.jl") +using .GModel + +################################################################################ +# # +# Cloudy Calibrate-Emulate-Sample Example # +# # +# # +# This example uses Cloudy, a microphysics model that simulates the # +# coalescence of cloud droplets into bigger drops, to demonstrate how # +# the full Calibrate-Emulate-Sample pipeline can be used for Bayesian # +# learning and uncertainty quantification of parameters, given some # +# observations. # +# # +# Specifically, this examples shows how to learn parameters of the # +# initial cloud droplet mass distribution, given observations of some # +# moments of that mass distribution at a later time, after some of the # +# droplets have collided and become bigger drops. # +# # +# In this example, Cloudy is used in a "perfect model" (aka "known # +# truth") setting, which means that the "observations" are generated by # +# Cloudy itself, by running it with the true parameter values. In more # +# realistic applications, the observations will come from some external # +# measurement system. # +# # +# The purpose is to show how to do parameter learning using # +# Calibrate-Emulate-Sample in a simple (and highly artificial) setting. # +# # +# For more information on Cloudy, see # +# https://github.com/CliMA/Cloudy.jl.git # +# # +################################################################################ + rng_seed = 41 Random.seed!(rng_seed) @@ -34,34 +68,38 @@ Random.seed!(rng_seed) param_names = ["N0", "θ", "k"] n_param = length(param_names) -N0_true = 300.0 -θ_true = 1.5597 -k_true = 0.0817 +N0_true = 300.0 # number of particles (scaling factor for Gamma distribution) +θ_true = 1.5597 # scale parameter of Gamma distribution +k_true = 0.0817 # shape parameter of Gamma distribution params_true = [N0_true, θ_true, k_true] # Note that dist_true is a Cloudy distribution, not a Distributions.jl # distribution dist_true = PDistributions.Gamma(N0_true, θ_true, k_true) -# Assume lognormal priors for all three parameters -# Note: For the model G (=Cloudy) to run, N0 needs to be nonnegative, and θ -# and k need to be positive. The EK update can result in violations of -# these constraints - therefore, we perform CES in log space, i.e., we try -# to find the logarithms of the true parameters (and of course, the actual -# parameters can then simply be obtained by exponentiating the final results). -function logmean_and_logstd(μ, σ) - σ_log = sqrt(log(1.0 + σ^2/μ^2)) - μ_log = log(μ / (sqrt(1.0 + σ^2/μ^2))) - return μ_log, σ_log -end -logmean_N0, logstd_N0 = logmean_and_logstd(280., 40.) -logmean_θ, logstd_θ = logmean_and_logstd(3.0, 1.5) -logmean_k, logstd_k = logmean_and_logstd(0.5, 0.5) +### +### Define priors for the parameters we want to learn +### + +# Define constraints +lbound_N0 = 0.4 * N0_true +lbound_θ = 1.0e-1 +lbound_k = 1.0e-4 +c1 = bounded_below(lbound_N0) +c2 = bounded_below(lbound_θ) +c3 = bounded_below(lbound_k) +constraints = [[c1], [c2], [c3]] + +# We choose to use normal distributions to represent the prior distributions of +# the parameters in the transformed (unconstrained) space. +d1 = Parameterized(Normal(0.0, 1.0)) +d2 = Parameterized(Normal(0.0, 1.0)) +d3 = Parameterized(Normal(0.0, 1.0)) +distributions = [d1, d2, d3] -priors = [Priors.Prior(Normal(logmean_N0, logstd_N0), "N0"), # prior on N0 - Priors.Prior(Normal(logmean_θ, logstd_θ), "θ"), # prior on θ - Priors.Prior(Normal(logmean_k, logstd_k), "k")] # prior on k +param_names = ["N0", "θ", "k"] +priors = ParameterDistribution(distributions, constraints, param_names) ### ### Define the data from which we want to learn the parameters @@ -77,12 +115,12 @@ n_moments = length(moments) ### # Collision-coalescence kernel to be used in Cloudy -coalescence_coeff = 1/3.14/4 +coalescence_coeff = 1/3.14/4/100 kernel_func = x -> coalescence_coeff kernel = Cloudy.KernelTensors.CoalescenceTensor(kernel_func, 0, 100.0) # Time period over which to run Cloudy -tspan = (0., 0.5) +tspan = (0., 1.0) ### @@ -96,8 +134,12 @@ gt = GModel.run_G(params_true, g_settings_true, PDistributions.update_params, PDistributions.moment, Cloudy.Sources.get_int_coalescence) n_samples = 100 yt = zeros(n_samples, length(gt)) -noise_level = 0.05 -Γy = noise_level * convert(Array, Diagonal(gt)) +# In a perfect model setting, the "observational noise" represent the internal +# model variability. Since Cloudy is a purely deterministic model, there is no +# straightforward way of coming up with a covariance structure for this internal +# model variability. We decide to use a diagonal covariance, with entries +# (variances) largely proportional to their corresponding data values, gt. +Γy = convert(Array, Diagonal([13.0, 1.2, 2.7])) μ = zeros(length(gt)) # Add noise @@ -112,15 +154,12 @@ truth = Observations.Obs(yt, Γy, data_names) ### Calibrate: Ensemble Kalman Inversion ### -log_transform(a::AbstractArray) = log.(a) -exp_transform(a::AbstractArray) = exp.(a) - N_ens = 50 # number of ensemble members N_iter = 5 # number of EKI iterations # initial parameters: N_ens x N_params -initial_params = EKP.construct_initial_ensemble(N_ens, priors; rng_seed=6) -ekiobj = EKP.EKObj(initial_params, param_names, truth.mean, - truth.obs_noise_cov, Inversion(), Δt=1.0) +initial_params = EKP.construct_initial_ensemble(priors, N_ens; rng_seed=6) +ekiobj = EKP.EKObj(initial_params, truth.mean, truth.obs_noise_cov, + Inversion(), Δt=0.3) # Initialize a ParticleDistribution with dummy parameters. The parameters # will then be set in run_G_ensemble @@ -130,9 +169,8 @@ g_settings = GModel.GSettings(kernel, dist_type, moments, tspan) # EKI iterations for i in 1:N_iter - # Note that the parameters are exp-transformed for use as input - # to Cloudy - params_i = deepcopy(exp_transform(ekiobj.u[end])) + params_i = mapslices(x -> transform_unconstrained_to_constrained(priors, x), + ekiobj.u[end]; dims=2) g_ens = GModel.run_G_ensemble(params_i, g_settings, PDistributions.update_params, PDistributions.moment, @@ -141,11 +179,13 @@ for i in 1:N_iter end # EKI results: Has the ensemble collapsed toward the truth? -println("True parameters: ") -println(params_true) +transformed_params_true = transform_constrained_to_unconstrained(priors, + params_true) +println("True parameters (transformed): ") +println(transformed_params_true) println("\nEKI results:") -println(mean(deepcopy(exp_transform(ekiobj.u[end])), dims=1)) +println(mean(ekiobj.u[end], dims=1)) ### @@ -164,9 +204,8 @@ kern1 = SE(len1, 1.0) len2 = zeros(3) kern2 = Mat52Ard(len2, 0.0) white = Noise(log(2.0)) -# # construct kernel GPkernel = kern1 + kern2 + white -# Get training points +# Get training points u_tp, g_tp = Utilities.extract_GP_tp(ekiobj, N_iter) normalized = true gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel, @@ -175,7 +214,8 @@ gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel, # Check how well the Gaussian Process regression predicts on the # true parameters -y_mean, y_var = GPEmulator.predict(gpobj, reshape(log.(params_true), 1, :), +y_mean, y_var = GPEmulator.predict(gpobj, + reshape(transformed_params_true, 1, :), transform_to_real=true) println("GP prediction on true parameters: ") @@ -192,7 +232,7 @@ println(truth.mean) u0 = vec(mean(u_tp, dims=1)) println("initial parameters: ", u0) -# MCMC parameters +# MCMC settings mcmc_alg = "rwm" # random walk Metropolis # First let's run a short chain to determine a good step size @@ -207,51 +247,44 @@ new_step = MCMC.find_mcmc_step!(mcmc_test, gpobj) # Now begin the actual MCMC println("Begin MCMC - with step size ", new_step) u0 = vec(mean(u_tp, dims=1)) - -# reset parameters burnin = 1000 max_iter = 100000 - -mcmc = MCMC.MCMCObj(yt_sample, Γy, priors, new_step, u0, max_iter, - mcmc_alg, burnin, svdflag=true) +mcmc = MCMC.MCMCObj(yt_sample, Γy, priors, new_step, u0, max_iter, mcmc_alg, + burnin, svdflag=true) MCMC.sample_posterior!(mcmc, gpobj, max_iter) -posterior = MCMC.get_posterior(mcmc) +posterior = MCMC.get_posterior(mcmc) -post_mean = mean(posterior, dims=1) -post_cov = cov(posterior, dims=1) -println("post_mean") +post_mean = get_mean(posterior) +post_cov = get_cov(posterior) +println("posterior mean") println(post_mean) -println("post_cov") +println("posterior covariance") println(post_cov) -println("D util") -println(det(inv(post_cov))) -println(" ") # Plot the posteriors together with the priors and the true parameter values -true_values = [log(N0_true) log(θ_true) log(k_true)] -n_params = length(true_values) +# (in the transformed/unconstrained space) +n_params = length(get_name(posterior)) for idx in 1:n_params if idx == 1 - param = "N0" - xs = collect(4.5:0.01:6.5) + xs = collect(range(5.0, stop=5.5, length=1000)) elseif idx == 2 - param = "Theta" - xs = collect(-1.0:0.01:2.5) + xs = collect(range(-1.0, stop=1.0, length=1000)) elseif idx == 3 - param = "k" - xs = collect(-4.0:0.01:1.0) + xs = collect(range(-3.0, stop=-1.0, length=1000)) else throw("not implemented") end - label = "true " * param - histogram(posterior[:, idx], bins=100, normed=true, fill=:slategray, - lab="posterior") - plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab="prior") - plot!([true_values[idx]], seriestype="vline", w=2.6, lab=label) - - title!(param) - StatsPlots.savefig("posterior_"*param*".png") + label = "true " * param_names[idx] + posterior_samples = dropdims(get_distribution(posterior)[param_names[idx]], + dims=1) + histogram(posterior_samples, bins=100, normed=true, fill=:slategray, + thickness_scaling=2.0, lab="posterior", legend=:outertopright) + prior_dist = get_distribution(mcmc.prior)[param_names[idx]] + plot!(xs, prior_dist, w=2.6, color=:blue, lab="prior") + plot!([transformed_params_true[idx]], seriestype="vline", w=2.6, lab=label) + title!(param_names[idx]) + StatsPlots.savefig("posterior_" * param_names[idx] * ".png") end diff --git a/src/ParameterDistribution.jl b/src/ParameterDistribution.jl index e4a567b25..1789d8872 100644 --- a/src/ParameterDistribution.jl +++ b/src/ParameterDistribution.jl @@ -252,14 +252,14 @@ end """ function get_distribution(pd::ParameterDistribution) -Returns a `Dict` of `ParameterDistribution` distributions by name, (unless sample type) +Returns a `Dict` of `ParameterDistribution` distributions, with the parameter names as dictionary keys. For parameters represented by `Samples`, the samples are returned as a 2D (parameter_dimension x n_samples) array """ function get_distribution(pd::ParameterDistribution) return Dict{String,Any}(pd.names[i] => get_distribution(d) for (i,d) in enumerate(pd.distributions)) end function get_distribution(d::Samples) - return "Contains samples only" + return d.distribution_samples end function get_distribution(d::Parameterized) return d.distribution diff --git a/test/ParameterDistribution/runtests.jl b/test/ParameterDistribution/runtests.jl index a4780d613..0d8223b65 100644 --- a/test/ParameterDistribution/runtests.jl +++ b/test/ParameterDistribution/runtests.jl @@ -131,12 +131,12 @@ using CalibrateEmulateSample.ParameterDistributionStorage # Tests for get_distribution @test get_distribution(d1) == MvNormal(4,0.1) @test get_distribution(u1)[name1] == MvNormal(4,0.1) - @test typeof(get_distribution(d2)) <: String - @test typeof(get_distribution(u2)[name2]) <: String + @test typeof(get_distribution(d2)) == Array{Int64, 2} + @test typeof(get_distribution(u2)[name2]) == Array{Int64, 2} d = get_distribution(u) @test d[name1] == MvNormal(4,0.1) - @test typeof(d[name2]) <: String + @test typeof(d[name2]) == Array{Int64, 2} # Test for get_all_constraints @test get_all_constraints(u) == cat([c1,c2]...,dims=1)