From d3684c9caeb93b93cb9a640a2946a3180a045a77 Mon Sep 17 00:00:00 2001 From: Melanie Bieli Date: Mon, 22 Jun 2020 10:14:39 -0400 Subject: [PATCH] Organize priors in Prior structs --- ...calibrate_emulate_sample_Cloudy_demo.ipynb | 33 +++------- src/CalibrateEmulateSample.jl | 1 + src/EKI.jl | 13 ++-- src/MCMC.jl | 31 ++++----- src/Priors.jl | 66 +++++++++++++++++++ src/Utilities.jl | 2 +- test/Cloudy/runtests.jl | 13 ++-- test/EKI/runtests.jl | 9 +-- test/MCMC/runtests.jl | 11 ++-- test/Priors/runtests.jl | 21 ++++++ test/runtests.jl | 1 + 11 files changed, 136 insertions(+), 65 deletions(-) create mode 100644 src/Priors.jl create mode 100644 test/Priors/runtests.jl diff --git a/examples/Cloudy/calibrate_emulate_sample_Cloudy_demo.ipynb b/examples/Cloudy/calibrate_emulate_sample_Cloudy_demo.ipynb index 0660d99cf..571cd29bb 100644 --- a/examples/Cloudy/calibrate_emulate_sample_Cloudy_demo.ipynb +++ b/examples/Cloudy/calibrate_emulate_sample_Cloudy_demo.ipynb @@ -13,7 +13,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook demonstrates how parameters of cloud droplet size distributions can be learned from data generated by Cloudy, a cloud microphysics toy model that can be downloaded here: https://github.com/CliMA/Cloudy.jl. \n", + "This notebook demonstrates how parameters of cloud droplet size distributions can be learned from data generated by Cloudy, a cloud microphysics toy model that can be downloaded here: https://github.com/climate-machine/Cloudy.jl. \n", "\n", "Cloudy takes the following input:\n", "* parameters defining a **cloud droplet size distribution** (e.g., parameters defining a gamma distribution or an exponential distribution)\n", @@ -95,6 +95,7 @@ "source": [ "# Import Calibrate-Emulate-Sample modules (adjust paths \n", "# if necessary)\n", + "include(\"CalibrateEmulateSample.jl/src/Priors.jl\")\n", "include(\"CalibrateEmulateSample.jl/src/EKI.jl\")\n", "include(\"CalibrateEmulateSample.jl/src/Observations.jl\")\n", "include(\"CalibrateEmulateSample.jl/src/GPEmulator.jl\")\n", @@ -172,9 +173,9 @@ "logmean_θ, logstd_θ = logmean_and_logstd(3.0, 1.5)\n", "logmean_k, logstd_k = logmean_and_logstd(0.5, 0.5)\n", "\n", - "priors = [Distributions.Normal(logmean_N0, logstd_N0), # prior on N0\n", - " Distributions.Normal(logmean_θ, logstd_θ), # prior on θ\n", - " Distributions.Normal(logmean_k, logstd_k)] # prior on k " + "priors = [Priors.Prior(Normal(logmean_N0, logstd_N0), \"N0\"), # prior on N0\n", + " Priors.Prior(Normal(logmean_θ, logstd_θ), \"θ\"), # prior on θ\n", + " Priors.Prior(Normal(logmean_k, logstd_k), \"k\")] # prior on k" ] }, { @@ -356,7 +357,7 @@ "# construct kernel\n", "GPkernel = kern1 + kern2 + white\n", " \n", - "u_tp, g_tp = Utilities.extract_GP_tp(ekiobj, N_iter)\n", + "u_tp, g_tp = Utilities.extract_GP_tp(ekiobj, N_iter-1)\n", "normalized = true\n", "gpobj = GPEmulator.GPObj(u_tp, g_tp, gppackage; GPkernel=GPkernel, \n", " normalized=normalized, prediction_type=pred_type)" @@ -488,31 +489,13 @@ " label = \"true \" * param\n", " histogram(posterior[:, idx], bins=100, normed=true, \n", " fill=:slategray, lab=\"posterior\")\n", - " plot!(xs, mcmc.prior[idx], w=2.6, color=:blue, lab=\"prior\")\n", + " plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab=\"prior\")\n", " plot!([true_values[idx]], seriestype=\"vline\", w=2.6, lab=label)\n", "\n", " title!(param)\n", - " StatsPlots.savefig(\"/home/melanie/Desktop/posterior_\"*param*\".png\")\n", + " StatsPlots.savefig(\"posterior_\"*param*\".png\")\n", "end" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/src/CalibrateEmulateSample.jl b/src/CalibrateEmulateSample.jl index 266891e2b..c352ea9a8 100644 --- a/src/CalibrateEmulateSample.jl +++ b/src/CalibrateEmulateSample.jl @@ -7,6 +7,7 @@ include("Observations.jl") include("spaces.jl") include("problems.jl") include("EKS.jl") +include("Priors.jl") # No internal deps, heavy external deps include("EKI.jl") diff --git a/src/EKI.jl b/src/EKI.jl index 8bcb1efd0..3eb8d4f6f 100644 --- a/src/EKI.jl +++ b/src/EKI.jl @@ -1,5 +1,6 @@ module EKI +using ..Priors using Random using Statistics using Distributions @@ -59,19 +60,23 @@ end """ - construct_initial_ensemble(N_ens::IT, priors; rng_seed=42) where {IT<:Int} +construct_initial_ensemble(N_ens::IT, priors::Array{Prior, 1}; rng_seed=42) where {IT<:Int} Construct the initial parameters, by sampling N_ens samples from specified prior distributions. """ -function construct_initial_ensemble(N_ens::IT, priors; rng_seed=42) where {IT<:Int} +function construct_initial_ensemble(N_ens::IT, priors::Array{Prior, 1}; rng_seed=42) where {IT<:Int} N_params = length(priors) params = zeros(N_ens, N_params) # Ensuring reproducibility of the sampled parameter values Random.seed!(rng_seed) for i in 1:N_params - prior_i = priors[i] - params[:, i] = rand(prior_i, N_ens) + prior_i = priors[i].dist + if !(typeof(priors[i].dist) == Deterministic{Float64}) + params[:, i] = rand(prior_i, N_ens) + else + params[:, i] = prior_i.value * ones(N_ens) + end end return params diff --git a/src/MCMC.jl b/src/MCMC.jl index f58038ef7..79ef75863 100644 --- a/src/MCMC.jl +++ b/src/MCMC.jl @@ -1,6 +1,7 @@ module MCMC using ..GPEmulator +using ..Priors using Statistics using Distributions @@ -34,10 +35,8 @@ struct MCMCObj{FT<:AbstractFloat, IT<:Int} obs_cov::Array{FT, 2} "inverse of obs_cov" obs_covinv::Array{FT, 2} - "array of length N_parameters with the parameters' prior - distributions (Julia Distributions objects), e.g., - [Normal(5.0, 2.0), Uniform(2.0, 4.0)]" - prior::Array + "array of length N_parameters with the parameters' prior distributions" + prior::Array{Prior, 1} "MCMC step size" step::Array{FT} "Number of MCMC steps that are considered burnin" @@ -59,7 +58,7 @@ end """ MCMCObj(obs_sample::Vector{FT}, obs_cov::Array{FT, 2}, - priors::Array, + priors::Array{Prior, 1}, step::FT, param_init::Vector{FT}, max_iter::IT, @@ -71,7 +70,7 @@ where max_iter is the number of MCMC steps to perform (e.g., 100_000) """ function MCMCObj(obs_sample::Vector{FT}, obs_cov::Array{FT, 2}, - priors::Array, + priors::Array{Prior, 1}, step::FT, param_init::Vector{FT}, max_iter::IT, @@ -159,7 +158,7 @@ end function log_prior(mcmc::MCMCObj{FT}) where {FT} log_rho = FT[0] # Assume independent priors for each parameter - priors = mcmc.prior + priors = [mcmc.prior[i].dist for i in 1:length(mcmc.prior)] for (param, prior_dist) in zip(mcmc.param, priors) # get density at current parameter value log_rho[1] += logpdf(prior_dist, param) @@ -172,24 +171,18 @@ end function proposal(mcmc::MCMCObj) variances = zeros(length(mcmc.param)) - for (idx, prior) in enumerate(mcmc.prior) + priors = [mcmc.prior[i].dist for i in 1:length(mcmc.prior)] + param_names = [mcmc.prior[i].param_name for i in 1:length(mcmc.prior)] + for (idx, prior) in enumerate(priors) variances[idx] = var(prior) end if mcmc.algtype == "rwm" - #prop_dist = MvNormal(mcmc.posterior[1 + mcmc.iter[1], :], (mcmc.step[1]) * Diagonal(variances)) - prop_dist = MvNormal(zeros(length(mcmc.param)), (mcmc.step[1]^2) * Diagonal(variances)) + prop_dist = MvNormal(zeros(length(mcmc.param)), + (mcmc.step[1]^2) * Diagonal(variances)) end sample = mcmc.posterior[1 + mcmc.iter[1], :] .+ rand(prop_dist) - # TODO: The hope is that FreeParams will be able to do domain checks for the parameters -# for (idx, prior) in enumerate(mcmc.prior) -# while !insupport(prior, sample[idx]) -# println("not in support - resampling") -# sample[:] = mcmc.posterior[1 + mcmc.iter[1], :] .+ rand(prop_dist) -# end -# end - return sample end @@ -259,7 +252,7 @@ function sample_posterior!(mcmc::MCMCObj{FT,IT}, for mcmcit in 1:max_iter param = convert(Array{FT, 2}, mcmc.param') - # test predictions param' is 1xN_params + # test predictions (param' is 1 x N_parameters) gp_pred, gp_predvar = predict(gpobj, param) gp_pred = cat(gp_pred..., dims=2) gp_predvar = cat(gp_predvar..., dims=2) diff --git a/src/Priors.jl b/src/Priors.jl new file mode 100644 index 000000000..c634b00fb --- /dev/null +++ b/src/Priors.jl @@ -0,0 +1,66 @@ +module Priors + +using Distributions +using DocStringExtensions +using Random + +export Prior +export Deterministic + +""" + Deterministic(value::FT) + +A deterministic distribution is a distribution over only one value, which it takes with probability 1. +It can be used when a distribution is required, but the outcome is deterministic. +``` +d = Deterministic(3.0) # Deterministic "distribution" with value 3.0 +rand(d) # Always returns the same value +``` +""" +struct Deterministic{FT<:Real} <: DiscreteUnivariateDistribution + value::FT +end + +eltype(::Deterministic{FT}) where {FT} = FT +rand(rng::AbstractRNG, d::Deterministic) = d.value +pdf(d::Deterministic{FT}, x::FT) where {FT} = x == d.value ? FT(1) : FT(0) +cdf(d::Deterministic{FT}, x::FT) where {FT} = x < d.value ? FT(0) : FT(1) +quantile(d::Deterministic{FT}, p) where {FT} = 0 <= p <= 1 ? d.value : FT(NaN) +minimum(d::Deterministic) = d.value +maximum(d::Deterministic) = d.value +insupport(d::Deterministic, x) = x == d.value +mean(d::Deterministic) = d.value +var(d::Deterministic{FT}) where {FT} = FT(0) +mode(d::Deterministic) = d.value +skewness(d::Deterministic{FT}) where {FT} = FT(0) +kurtosis(d::Deterministic{FT}) where {FT} = FT(0) +entropy(d::Deterministic{FT}) where {FT} = FT(0) +mgf(d::Deterministic, t) = exp(t * d.value) +cf(d::Deterministic, t) = exp(im * t * d.value) + + +""" + struct Prior + +Structure to hold information on the prior distribution of a UQ parameter + +# Fields +$(DocStringExtensions.FIELDS) +""" +struct Prior + "prior distribution" + dist::Union{Distribution, Deterministic} + "name of the parameter with prior distribution dist" + param_name::String +end + + +""" + Prior(value::FT, param_name::String) +""" +function Prior(value::FT, param_name::String) where {FT<:Real} + Prior(Deterministic(value), param_name) +end + + +end # module diff --git a/src/Utilities.jl b/src/Utilities.jl index a314d4a1b..a8ad5a1a1 100644 --- a/src/Utilities.jl +++ b/src/Utilities.jl @@ -116,4 +116,4 @@ name(name::AbstractString) = rpad(name * ":", RPAD) warn(name::AbstractString) = rpad("WARNING (" * name * "):", RPAD) -end # module ConvenienceFunctions +end # module diff --git a/test/Cloudy/runtests.jl b/test/Cloudy/runtests.jl index d122704e0..7c947f395 100644 --- a/test/Cloudy/runtests.jl +++ b/test/Cloudy/runtests.jl @@ -55,9 +55,9 @@ logmean_N0, logstd_N0 = logmean_and_logstd(280., 40.) logmean_θ, logstd_θ = logmean_and_logstd(3.0, 1.5) logmean_k, logstd_k = logmean_and_logstd(0.5, 0.5) -priors = [Distributions.Normal(logmean_N0, logstd_N0), # prior on N0 - Distributions.Normal(logmean_θ, logstd_θ), # prior on θ - Distributions.Normal(logmean_k, logstd_k)] # prior on k +priors = [Priors.Prior(Normal(logmean_N0, logstd_N0), "N0"), # prior on N0 + Priors.Prior(Normal(logmean_θ, logstd_θ), "θ"), # prior on θ + Priors.Prior(Normal(logmean_k, logstd_k), "k")] # prior on k ### @@ -193,9 +193,8 @@ burnin = 0 step = 0.1 # first guess max_iter = 5000 yt_sample = truth.mean -mcmc_test = MCMC.MCMCObj(yt_sample, truth.cov, - priors, step, u0, - max_iter, mcmc_alg, burnin) +mcmc_test = MCMC.MCMCObj(yt_sample, truth.cov, priors, step, u0, max_iter, + mcmc_alg, burnin) new_step = MCMC.find_mcmc_step!(mcmc_test, gpobj) # Now begin the actual MCMC @@ -243,7 +242,7 @@ for idx in 1:n_params label = "true " * param histogram(posterior[:, idx], bins=100, normed=true, fill=:slategray, lab="posterior") - plot!(xs, mcmc.prior[idx], w=2.6, color=:blue, lab="prior") + plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab="prior") plot!([true_values[idx]], seriestype="vline", w=2.6, lab=label) title!(param) diff --git a/test/EKI/runtests.jl b/test/EKI/runtests.jl index f32e44b68..0c0c320d1 100644 --- a/test/EKI/runtests.jl +++ b/test/EKI/runtests.jl @@ -4,6 +4,7 @@ using Random using Test using CalibrateEmulateSample.EKI +using CalibrateEmulateSample.Priors @testset "EKI" begin @@ -14,8 +15,8 @@ using CalibrateEmulateSample.EKI ### Define priors on the parameters u umin = 0.0 umax = 20.0 - priors = [Uniform(umin, umax), # prior on u1 - Uniform(umin, umax)] # prior on u2 + priors = [Priors.Prior(Uniform(umin, umax), "u1"), # prior on u1 + Priors.Prior(Uniform(umin, umax), "u2")] # prior on u2 ### Define forward model G function G(u) @@ -24,7 +25,7 @@ using CalibrateEmulateSample.EKI ### Generate (artificial) truth samples npar = 2 # number of parameters - ut = rand(Uniform(umin, umax), npar) + ut = Distributions.rand(Uniform(umin, umax), npar) param_names = ["u1", "u2"] yt = G(ut) n_samples = 100 @@ -33,7 +34,7 @@ using CalibrateEmulateSample.EKI Γy = noise_level^2 * convert(Array, Diagonal(yt)) μ = zeros(length(yt)) for i in 1:n_samples - truth_samples[i, :] = yt + noise_level^2 * rand(MvNormal(μ, Γy)) + truth_samples[i, :] = yt + noise_level^2 * Distributions.rand(MvNormal(μ, Γy)) end ### Calibrate: Ensemble Kalman Inversion diff --git a/test/MCMC/runtests.jl b/test/MCMC/runtests.jl index 5ceabb111..de408065c 100644 --- a/test/MCMC/runtests.jl +++ b/test/MCMC/runtests.jl @@ -4,8 +4,9 @@ using Distributions using GaussianProcesses using Test -using CalibrateEmulateSample.GPEmulator using CalibrateEmulateSample.MCMC +using CalibrateEmulateSample.Priors +using CalibrateEmulateSample.GPEmulator @testset "MCMC" begin @@ -13,8 +14,8 @@ using CalibrateEmulateSample.MCMC rng_seed = 41 Random.seed!(rng_seed) - # We need a GPObj to run MCMC, so let's reconstruct the one that's tested in - # runtets.jl in test/GPEmulator/ + # We need a GPObj to run MCMC, so let's reconstruct the one that's tested + # in test/GPEmulator/runtests.jl # Training data n = 10 # number of training points x = 2.0 * π * rand(n) # predictors/features @@ -38,7 +39,7 @@ using CalibrateEmulateSample.MCMC ### Define prior umin = -1.0 umax = 6.0 - prior = [Uniform(umin, umax)] + prior = [Priors.Prior(Uniform(umin, umax), "u")] # prior on u obs_sample = [1.0] cov = convert(Array, Diagonal([1.0])) @@ -70,7 +71,7 @@ using CalibrateEmulateSample.MCMC @test mcmc.obs_covinv == inv(cov) @test mcmc.burnin == burnin @test mcmc.algtype == mcmc_alg - @test mcmc.iter[1] == max_iter+ 1 + @test mcmc.iter[1] == max_iter + 1 @test size(posterior) == (max_iter - burnin + 1, length(param_init)) @test_throws Exception MCMCObj(obs_sample, cov, prior, step, param_init, max_iter, "gibbs", burnin) diff --git a/test/Priors/runtests.jl b/test/Priors/runtests.jl new file mode 100644 index 000000000..66585153d --- /dev/null +++ b/test/Priors/runtests.jl @@ -0,0 +1,21 @@ +using Test +using Distributions + +using CalibrateEmulateSample.Priors + +@testset "Priors" begin + + # u1 is a parameter with a Gamma prior + u1_prior = Prior(Gamma(2.0, 0.8), "u1") + @test u1_prior.param_name == "u1" + @test u1_prior.dist == Gamma(2.0, 0.8) + + # u2 is a parameter with a Deterministic prior, i.e., it always takes on + # the same value, val + val = 3.0 + u2_prior = Prior(Deterministic(val), "u2") + @test u2_prior.param_name == "u2" + @test u2_prior.dist == Deterministic(val) + @test u2_prior.dist.value == val + +end diff --git a/test/runtests.jl b/test/runtests.jl index 61791d54d..717af6e7a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ ENV["JULIA_LOG_LEVEL"] = "WARN" ENV["PYTHON"] = "" for submodule in [ + "Priors", "EKI", "GPEmulator", "MCMC",