Skip to content

Commit

Permalink
Update Cloudy example to work with latest (up to PR #89) CES code
Browse files Browse the repository at this point in the history
  • Loading branch information
Melanie committed Dec 18, 2020
1 parent 35eef88 commit 90b6e86
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 59 deletions.
149 changes: 92 additions & 57 deletions examples/Cloudy/Cloudy_example.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Import Cloudy modules
# This example requires Cloudy to be installed.
using Pkg; Pkg.add(PackageSpec(name="Cloudy", version="0.1.0"))
using Cloudy
const PDistributions = Cloudy.ParticleDistributions
Expand All @@ -17,9 +17,44 @@ using CalibrateEmulateSample.EKP
using CalibrateEmulateSample.GPEmulator
using CalibrateEmulateSample.MCMC
using CalibrateEmulateSample.Observations
using CalibrateEmulateSample.GModel
using CalibrateEmulateSample.Utilities
using CalibrateEmulateSample.Priors
using CalibrateEmulateSample.ParameterDistributionStorage

# Import the module that runs Cloudy
include("GModel.jl")
using .GModel

################################################################################
# #
# Cloudy Calibrate-Emulate-Sample Example #
# #
# #
# This example uses Cloudy, a microphysics model that simulates the #
# coalescence of cloud droplets into bigger drops, to demonstrate how #
# the full Calibrate-Emulate-Sample pipeline can be used for Bayesian #
# learning and uncertainty quantification of parameters, given some #
# observations. #
# #
# Specifically, this examples shows how to learn parameters of the #
# initial cloud droplet mass distribution, given observations of some #
# moments of that mass distribution at a later time, after some of the #
# droplets have collided and become bigger drops. #
# #
# In this example, Cloudy is used in a "perfect model" (aka "known #
# truth") setting, which means that the "observations" are generated by #
# Cloudy itself, by running it with the true parameter values. In more #
# realistic applications, the observations will come from some external #
# measurement system. #
# #
# The focus here is on the "how", not on the results, i.e., the purpose #
# is to show how to do parameter learning using Calibrate-Emulate-Sample #
# for a simple (and highly artificial) problem. #
# #
# For more information on Cloudy, see #
# https://github.com/CliMA/Cloudy.jl.git #
# #
################################################################################


rng_seed = 41
Random.seed!(rng_seed)
Expand All @@ -34,34 +69,38 @@ Random.seed!(rng_seed)
param_names = ["N0", "θ", "k"]
n_param = length(param_names)

N0_true = 300.0
θ_true = 1.5597
k_true = 0.0817
N0_true = 300.0 # number of particles (scaling factor for Gamma distribution)
θ_true = 1.5597 # scale parameter of Gamma distribution
k_true = 0.0817 # shape parameter of Gamma distribution
params_true = [N0_true, θ_true, k_true]
# Note that dist_true is a Cloudy distribution, not a Distributions.jl
# distribution
dist_true = PDistributions.Gamma(N0_true, θ_true, k_true)

# Assume lognormal priors for all three parameters
# Note: For the model G (=Cloudy) to run, N0 needs to be nonnegative, and θ
# and k need to be positive. The EK update can result in violations of
# these constraints - therefore, we perform CES in log space, i.e., we try
# to find the logarithms of the true parameters (and of course, the actual
# parameters can then simply be obtained by exponentiating the final results).
function logmean_and_logstd(μ, σ)
σ_log = sqrt(log(1.0 + σ^2/μ^2))
μ_log = log/ (sqrt(1.0 + σ^2/μ^2)))
return μ_log, σ_log
end

logmean_N0, logstd_N0 = logmean_and_logstd(280., 40.)
logmean_θ, logstd_θ = logmean_and_logstd(3.0, 1.5)
logmean_k, logstd_k = logmean_and_logstd(0.5, 0.5)
###
### Define priors for the parameters we want to learn
###

priors = [Priors.Prior(Normal(logmean_N0, logstd_N0), "N0"), # prior on N0
Priors.Prior(Normal(logmean_θ, logstd_θ), "θ"), # prior on θ
Priors.Prior(Normal(logmean_k, logstd_k), "k")] # prior on k
# Define constraints
lbound_N0 = 1.0e-1 * N0_true
lbound_θ = 1.0e-4
lbound_k = 1.0e-4
c1 = bounded_below(lbound_N0)
c2 = bounded_below(lbound_θ)
c3 = bounded_below(lbound_k)
constraints = [[c1], [c2], [c3]]

# We choose to use normal distributions to represent the prior distributions of
# the parameters in the transformed (unconstrained) space.
d1 = Parameterized(Normal(0.0, 0.1))
d2 = Parameterized(Normal(0.0, 0.1))
d3 = Parameterized(Normal(0.0, 0.1))
distributions = [d1, d2, d3]

names = ["N0", "θ", "k"]

priors = ParameterDistribution(distributions, constraints, names)

###
### Define the data from which we want to learn the parameters
Expand All @@ -77,7 +116,7 @@ n_moments = length(moments)
###

# Collision-coalescence kernel to be used in Cloudy
coalescence_coeff = 1/3.14/4
coalescence_coeff = 1/3.14/4/1000
kernel_func = x -> coalescence_coeff
kernel = Cloudy.KernelTensors.CoalescenceTensor(kernel_func, 0, 100.0)

Expand Down Expand Up @@ -112,15 +151,12 @@ truth = Observations.Obs(yt, Γy, data_names)
### Calibrate: Ensemble Kalman Inversion
###

log_transform(a::AbstractArray) = log.(a)
exp_transform(a::AbstractArray) = exp.(a)

N_ens = 50 # number of ensemble members
N_iter = 5 # number of EKI iterations
# initial parameters: N_ens x N_params
initial_params = EKP.construct_initial_ensemble(N_ens, priors; rng_seed=6)
ekiobj = EKP.EKObj(initial_params, param_names, truth.mean,
truth.obs_noise_cov, Inversion(), Δt=1.0)
initial_params = EKP.construct_initial_ensemble(priors, N_ens; rng_seed=6)
ekiobj = EKP.EKObj(initial_params, truth.mean, truth.obs_noise_cov,
Inversion(), Δt=1.0)

# Initialize a ParticleDistribution with dummy parameters. The parameters
# will then be set in run_G_ensemble
Expand All @@ -130,9 +166,8 @@ g_settings = GModel.GSettings(kernel, dist_type, moments, tspan)

# EKI iterations
for i in 1:N_iter
# Note that the parameters are exp-transformed for use as input
# to Cloudy
params_i = deepcopy(exp_transform(ekiobj.u[end]))
params_i = mapslices(x -> transform_unconstrained_to_constrained(priors, x),
ekiobj.u[end]; dims=2)
g_ens = GModel.run_G_ensemble(params_i, g_settings,
PDistributions.update_params,
PDistributions.moment,
Expand All @@ -141,11 +176,12 @@ for i in 1:N_iter
end

# EKI results: Has the ensemble collapsed toward the truth?
println("True parameters: ")
println(params_true)
transformed_truth = transform_constrained_to_unconstrained(priors, params_true)
println("True (transformed) parameters: ")
println(transformed_truth)

println("\nEKI results:")
println(mean(deepcopy(exp_transform(ekiobj.u[end])), dims=1))
println(mean(ekiobj.u[end], dims=1))


###
Expand Down Expand Up @@ -175,7 +211,8 @@ gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel,

# Check how well the Gaussian Process regression predicts on the
# true parameters
y_mean, y_var = GPEmulator.predict(gpobj, reshape(log.(params_true), 1, :),
y_mean, y_var = GPEmulator.predict(gpobj,
reshape(transformed_truth, 1, :),
transform_to_real=true)

println("GP prediction on true parameters: ")
Expand Down Expand Up @@ -216,42 +253,40 @@ mcmc = MCMC.MCMCObj(yt_sample, Γy, priors, new_step, u0, max_iter,
mcmc_alg, burnin, svdflag=true)
MCMC.sample_posterior!(mcmc, gpobj, max_iter)

posterior = MCMC.get_posterior(mcmc)
posterior = MCMC.get_posterior(mcmc)

post_mean = mean(posterior, dims=1)
post_cov = cov(posterior, dims=1)
post_mean = get_mean(posterior)
post_cov = get_cov(posterior)
println("post_mean")
println(post_mean)
println("post_cov")
println(post_cov)
println("D util")
println(det(inv(post_cov)))
println(" ")

# Plot the posteriors together with the priors and the true parameter values
true_values = [log(N0_true) log(θ_true) log(k_true)]
n_params = length(true_values)
n_params = length(names)

for idx in 1:n_params
if idx == 1
param = "N0"
xs = collect(4.5:0.01:6.5)
xs = collect(range(-2.0, stop=2.0, length=1000))
elseif idx == 2
param = "Theta"
xs = collect(-1.0:0.01:2.5)
xs = collect(range(-2.0, stop=2.0, length=1000))
elseif idx == 3
param = "k"
xs = collect(-4.0:0.01:1.0)
xs = collect(range(-2.0, stop=2.0, length=1000))
else
throw("not implemented")
end

label = "true " * param
histogram(posterior[:, idx], bins=100, normed=true, fill=:slategray,
label = "true " * names[idx]
println("size of samples to be plotted in histogram")
println(size(posterior.distributions[idx].distribution_samples))
ys = dropdims(posterior.distributions[idx].distribution_samples, dims=1)
histogram(ys, bins=100, normed=true, fill=:slategray, thickness_scaling=2.0,
lab="posterior")
plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab="prior")
plot!([true_values[idx]], seriestype="vline", w=2.6, lab=label)
println("created histogram")
plot!(xs, get_distribution(mcmc.prior.distributions[idx]), w=2.6,
color=:blue, lab="prior")
plot!([transformed_truth[idx]], seriestype="vline", w=2.6, lab=label)

title!(param)
StatsPlots.savefig("posterior_"*param*".png")
title!(names[idx])
StatsPlots.savefig("posterior_"*names[idx]*".png")
end
4 changes: 2 additions & 2 deletions examples/Cloudy/GModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ using DocStringExtensions
using Pkg; Pkg.build()

using Random
using Sundials # CVODE_BDF() solver for ODE
#using Sundials # CVODE_BDF() solver for ODE
using Distributions
using LinearAlgebra
using DifferentialEquations
Expand Down Expand Up @@ -117,7 +117,7 @@ function run_G(u::Array{FT, 1},
rhs(M, p, t) = get_src(M, dist, settings.kernel)
prob = ODEProblem(rhs, moments_init, settings.tspan)
# Solve the ODE
sol = solve(prob, CVODE_BDF(), alg_hints=[:stiff], reltol=tol, abstol=tol)
sol = solve(prob, Tsit5(), alg_hints=[:stiff], reltol=tol, abstol=tol)
# Return moments at last time step
moments_final = vcat(sol.u'...)[end, :]

Expand Down

0 comments on commit 90b6e86

Please sign in to comment.