From 393e90face155d8cf35b91eeb08c977e847ac746 Mon Sep 17 00:00:00 2001
From: Melanie <melanie@charney.bieli.email>
Date: Tue, 22 Dec 2020 17:51:41 -0600
Subject: [PATCH] Update examples to work with the latest CES code

---
 examples/Cloudy/Cloudy_example.jl      | 177 +++++++++++++++----------
 src/ParameterDistribution.jl           |   4 +-
 test/ParameterDistribution/runtests.jl |   6 +-
 3 files changed, 110 insertions(+), 77 deletions(-)

diff --git a/examples/Cloudy/Cloudy_example.jl b/examples/Cloudy/Cloudy_example.jl
index 8549b47b7..674332d52 100644
--- a/examples/Cloudy/Cloudy_example.jl
+++ b/examples/Cloudy/Cloudy_example.jl
@@ -1,4 +1,4 @@
-# Import Cloudy modules
+# This example requires Cloudy to be installed.
 using Pkg; Pkg.add(PackageSpec(name="Cloudy", version="0.1.0"))
 using Cloudy
 const PDistributions = Cloudy.ParticleDistributions
@@ -17,9 +17,43 @@ using CalibrateEmulateSample.EKP
 using CalibrateEmulateSample.GPEmulator
 using CalibrateEmulateSample.MCMC
 using CalibrateEmulateSample.Observations
-using CalibrateEmulateSample.GModel
 using CalibrateEmulateSample.Utilities
-using CalibrateEmulateSample.Priors
+using CalibrateEmulateSample.ParameterDistributionStorage
+
+# Import the module that runs Cloudy
+include("GModel.jl")
+using .GModel
+
+################################################################################
+#                                                                              #
+#                      Cloudy Calibrate-Emulate-Sample Example                 #
+#                                                                              #
+#                                                                              #
+#     This example uses Cloudy, a microphysics model that simulates the        #
+#     coalescence of cloud droplets into bigger drops, to demonstrate how      #
+#     the full Calibrate-Emulate-Sample pipeline can be used for Bayesian      #
+#     learning and uncertainty quantification of parameters, given some        #
+#     observations.                                                            #
+#                                                                              #
+#     Specifically, this examples shows how to learn parameters of the         #
+#     initial cloud droplet mass distribution, given observations of some      #
+#     moments of that mass distribution at a later time, after some of the     #
+#     droplets have collided and become bigger drops.                          #
+#                                                                              #
+#     In this example, Cloudy is used in a "perfect model" (aka "known         #
+#     truth") setting, which means that the "observations" are generated by    #
+#     Cloudy itself, by running it with the true parameter values. In more     #
+#     realistic applications, the observations will come from some external    #
+#     measurement system.                                                      #
+#                                                                              #
+#     The purpose is to show how to do parameter learning using                #
+#     Calibrate-Emulate-Sample in a simple (and highly artificial) setting.    #
+#                                                                              #
+#     For more information on Cloudy, see                                      #
+#              https://github.com/CliMA/Cloudy.jl.git                          #
+#                                                                              #
+################################################################################
+
 
 rng_seed = 41
 Random.seed!(rng_seed)
@@ -34,34 +68,38 @@ Random.seed!(rng_seed)
 param_names = ["N0", "θ", "k"]
 n_param = length(param_names)
 
-N0_true = 300.0 
-θ_true = 1.5597  
-k_true = 0.0817                  
+N0_true = 300.0  # number of particles (scaling factor for Gamma distribution)
+θ_true = 1.5597  # scale parameter of Gamma distribution
+k_true = 0.0817  # shape parameter of Gamma distribution
 params_true = [N0_true, θ_true, k_true]
 # Note that dist_true is a Cloudy distribution, not a Distributions.jl 
 # distribution
 dist_true = PDistributions.Gamma(N0_true, θ_true, k_true)
 
-# Assume lognormal priors for all three parameters
-# Note: For the model G (=Cloudy) to run, N0 needs to be nonnegative, and θ 
-# and k need to be positive. The EK update can result in violations of 
-# these constraints - therefore, we perform CES in log space, i.e., we try 
-# to find the logarithms of the true parameters (and of course, the actual
-# parameters can then simply be obtained by exponentiating the final results). 
-function logmean_and_logstd(μ, σ)
-    σ_log = sqrt(log(1.0 + σ^2/μ^2))
-    μ_log = log(μ / (sqrt(1.0 + σ^2/μ^2)))
-    return μ_log, σ_log
-end
 
-logmean_N0, logstd_N0 = logmean_and_logstd(280., 40.)
-logmean_θ, logstd_θ = logmean_and_logstd(3.0, 1.5)
-logmean_k, logstd_k = logmean_and_logstd(0.5, 0.5)
+###
+###  Define priors for the parameters we want to learn
+###
+
+# Define constraints
+lbound_N0 = 0.4 * N0_true 
+lbound_θ = 1.0e-1
+lbound_k = 1.0e-4
+c1 = bounded_below(lbound_N0)
+c2 = bounded_below(lbound_θ)
+c3 = bounded_below(lbound_k)
+constraints = [[c1], [c2], [c3]]
+
+# We choose to use normal distributions to represent the prior distributions of
+# the parameters in the transformed (unconstrained) space.
+d1 = Parameterized(Normal(0.0, 1.0))
+d2 = Parameterized(Normal(0.0, 1.0))
+d3 = Parameterized(Normal(0.0, 1.0))
+distributions = [d1, d2, d3]
 
-priors = [Priors.Prior(Normal(logmean_N0, logstd_N0), "N0"),    # prior on N0
-          Priors.Prior(Normal(logmean_θ, logstd_θ), "θ"),       # prior on θ
-          Priors.Prior(Normal(logmean_k, logstd_k), "k")]       # prior on k
+param_names = ["N0", "θ", "k"]
 
+priors = ParameterDistribution(distributions, constraints, param_names)
 
 ###
 ###  Define the data from which we want to learn the parameters
@@ -77,12 +115,12 @@ n_moments = length(moments)
 ###
 
 # Collision-coalescence kernel to be used in Cloudy
-coalescence_coeff = 1/3.14/4
+coalescence_coeff = 1/3.14/4/100
 kernel_func = x -> coalescence_coeff
 kernel = Cloudy.KernelTensors.CoalescenceTensor(kernel_func, 0, 100.0)
 
 # Time period over which to run Cloudy
-tspan = (0., 0.5)  
+tspan = (0., 1.0)  
 
 
 ###
@@ -96,8 +134,12 @@ gt = GModel.run_G(params_true, g_settings_true, PDistributions.update_params,
                   PDistributions.moment, Cloudy.Sources.get_int_coalescence)
 n_samples = 100
 yt = zeros(n_samples, length(gt))
-noise_level = 0.05
-Γy = noise_level * convert(Array, Diagonal(gt))
+# In a perfect model setting, the "observational noise" represent the internal
+# model variability. Since Cloudy is a purely deterministic model, there is no
+# straightforward way of coming up with a covariance structure for this internal
+# model variability. We decide to use a diagonal covariance, with entries
+# (variances) largely proportional to their corresponding data values, gt.
+Γy = convert(Array, Diagonal([13.0, 1.2, 2.7]))
 μ = zeros(length(gt))
 
 # Add noise
@@ -112,15 +154,12 @@ truth = Observations.Obs(yt, Γy, data_names)
 ###  Calibrate: Ensemble Kalman Inversion
 ###
 
-log_transform(a::AbstractArray) = log.(a)
-exp_transform(a::AbstractArray) = exp.(a)
-
 N_ens = 50 # number of ensemble members
 N_iter = 5 # number of EKI iterations
 # initial parameters: N_ens x N_params
-initial_params = EKP.construct_initial_ensemble(N_ens, priors; rng_seed=6)
-ekiobj = EKP.EKObj(initial_params, param_names, truth.mean, 
-                   truth.obs_noise_cov, Inversion(), Δt=1.0)
+initial_params = EKP.construct_initial_ensemble(priors, N_ens; rng_seed=6)
+ekiobj = EKP.EKObj(initial_params, truth.mean, truth.obs_noise_cov,
+                   Inversion(), Δt=0.3)
 
 # Initialize a ParticleDistribution with dummy parameters. The parameters 
 # will then be set in run_G_ensemble
@@ -130,9 +169,8 @@ g_settings = GModel.GSettings(kernel, dist_type, moments, tspan)
 
 # EKI iterations
 for i in 1:N_iter
-    # Note that the parameters are exp-transformed for use as input
-    # to Cloudy
-    params_i = deepcopy(exp_transform(ekiobj.u[end]))
+    params_i = mapslices(x -> transform_unconstrained_to_constrained(priors, x),
+                         ekiobj.u[end]; dims=2)
     g_ens = GModel.run_G_ensemble(params_i, g_settings,
                                   PDistributions.update_params,
                                   PDistributions.moment,
@@ -141,11 +179,13 @@ for i in 1:N_iter
 end
 
 # EKI results: Has the ensemble collapsed toward the truth?
-println("True parameters: ")
-println(params_true)
+transformed_params_true = transform_constrained_to_unconstrained(priors,
+                                                                 params_true)
+println("True parameters (transformed): ")
+println(transformed_params_true)
 
 println("\nEKI results:")
-println(mean(deepcopy(exp_transform(ekiobj.u[end])), dims=1))
+println(mean(ekiobj.u[end], dims=1))
 
 
 ###
@@ -164,9 +204,8 @@ kern1 = SE(len1, 1.0)
 len2 = zeros(3)
 kern2 = Mat52Ard(len2, 0.0)
 white = Noise(log(2.0))
-# # construct kernel
 GPkernel =  kern1 + kern2 + white
-# Get training points    
+# Get training points
 u_tp, g_tp = Utilities.extract_GP_tp(ekiobj, N_iter)
 normalized = true
 gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel, 
@@ -175,7 +214,8 @@ gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel,
 
 # Check how well the Gaussian Process regression predicts on the
 # true parameters
-y_mean, y_var = GPEmulator.predict(gpobj, reshape(log.(params_true), 1, :), 
+y_mean, y_var = GPEmulator.predict(gpobj,
+                                   reshape(transformed_params_true, 1, :),
                                    transform_to_real=true)
 
 println("GP prediction on true parameters: ")
@@ -192,7 +232,7 @@ println(truth.mean)
 u0 = vec(mean(u_tp, dims=1))
 println("initial parameters: ", u0)
 
-# MCMC parameters    
+# MCMC settings
 mcmc_alg = "rwm" # random walk Metropolis
 
 # First let's run a short chain to determine a good step size
@@ -207,51 +247,44 @@ new_step = MCMC.find_mcmc_step!(mcmc_test, gpobj)
 # Now begin the actual MCMC
 println("Begin MCMC - with step size ", new_step)
 u0 = vec(mean(u_tp, dims=1))
-
-# reset parameters 
 burnin = 1000
 max_iter = 100000
-
-mcmc = MCMC.MCMCObj(yt_sample, Γy, priors, new_step, u0, max_iter, 
-                    mcmc_alg, burnin, svdflag=true)
+mcmc = MCMC.MCMCObj(yt_sample, Γy, priors, new_step, u0, max_iter, mcmc_alg,
+                    burnin, svdflag=true)
 MCMC.sample_posterior!(mcmc, gpobj, max_iter)
 
-posterior = MCMC.get_posterior(mcmc)      
+posterior = MCMC.get_posterior(mcmc)
 
-post_mean = mean(posterior, dims=1)
-post_cov = cov(posterior, dims=1)
-println("post_mean")
+post_mean = get_mean(posterior)
+post_cov = get_cov(posterior)
+println("posterior mean")
 println(post_mean)
-println("post_cov")
+println("posterior covariance")
 println(post_cov)
-println("D util")
-println(det(inv(post_cov)))
-println(" ")
 
 # Plot the posteriors together with the priors and the true parameter values
-true_values = [log(N0_true) log(θ_true) log(k_true)]
-n_params = length(true_values)
+# (in the transformed/unconstrained space)
+n_params = length(get_name(posterior))
 
 for idx in 1:n_params
     if idx == 1
-        param = "N0"
-        xs = collect(4.5:0.01:6.5)
+        xs = collect(range(5.0, stop=5.5, length=1000))
     elseif idx == 2
-        param = "Theta"
-        xs = collect(-1.0:0.01:2.5)
+        xs = collect(range(-1.0, stop=1.0, length=1000))
     elseif idx == 3
-        param = "k"
-        xs = collect(-4.0:0.01:1.0)
+        xs = collect(range(-3.0, stop=-1.0, length=1000))
     else
         throw("not implemented")
     end
 
-    label = "true " * param
-    histogram(posterior[:, idx], bins=100, normed=true, fill=:slategray, 
-              lab="posterior")
-    plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab="prior")
-    plot!([true_values[idx]], seriestype="vline", w=2.6, lab=label)
-
-    title!(param)
-    StatsPlots.savefig("posterior_"*param*".png")
+    label = "true " * param_names[idx]
+    posterior_samples = dropdims(get_distribution(posterior)[param_names[idx]],
+                                 dims=1)
+    histogram(posterior_samples, bins=100, normed=true, fill=:slategray,
+              thickness_scaling=2.0, lab="posterior", legend=:outertopright)
+    prior_dist = get_distribution(mcmc.prior)[param_names[idx]]
+    plot!(xs, prior_dist, w=2.6, color=:blue, lab="prior")
+    plot!([transformed_params_true[idx]], seriestype="vline", w=2.6, lab=label)
+    title!(param_names[idx])
+    StatsPlots.savefig("posterior_" * param_names[idx] * ".png")
 end
diff --git a/src/ParameterDistribution.jl b/src/ParameterDistribution.jl
index e4a567b25..1789d8872 100644
--- a/src/ParameterDistribution.jl
+++ b/src/ParameterDistribution.jl
@@ -252,14 +252,14 @@ end
 """
     function get_distribution(pd::ParameterDistribution)
 
-Returns a `Dict` of `ParameterDistribution` distributions by name, (unless sample type)
+Returns a `Dict` of `ParameterDistribution` distributions, with the parameter names as dictionary keys. For parameters represented by `Samples`, the samples are returned as a 2D (parameter_dimension x n_samples) array
 """
 function get_distribution(pd::ParameterDistribution)
     return Dict{String,Any}(pd.names[i] => get_distribution(d) for (i,d) in enumerate(pd.distributions))
 end
 
 function get_distribution(d::Samples)
-    return "Contains samples only"
+    return d.distribution_samples
 end
 function get_distribution(d::Parameterized)
     return d.distribution
diff --git a/test/ParameterDistribution/runtests.jl b/test/ParameterDistribution/runtests.jl
index a4780d613..0d8223b65 100644
--- a/test/ParameterDistribution/runtests.jl
+++ b/test/ParameterDistribution/runtests.jl
@@ -131,12 +131,12 @@ using CalibrateEmulateSample.ParameterDistributionStorage
         # Tests for get_distribution
         @test get_distribution(d1) == MvNormal(4,0.1)
         @test get_distribution(u1)[name1] == MvNormal(4,0.1)
-        @test typeof(get_distribution(d2)) <: String
-        @test typeof(get_distribution(u2)[name2]) <: String
+        @test typeof(get_distribution(d2)) == Array{Int64, 2}
+        @test typeof(get_distribution(u2)[name2]) == Array{Int64, 2}
         
         d = get_distribution(u)
         @test d[name1] == MvNormal(4,0.1)
-        @test typeof(d[name2]) <: String
+        @test typeof(d[name2]) == Array{Int64, 2}
 
         # Test for get_all_constraints
         @test get_all_constraints(u) == cat([c1,c2]...,dims=1)