From 6f967f643fa3fb24ef25ce0469699d063bc861dd Mon Sep 17 00:00:00 2001 From: odunbar Date: Tue, 2 Apr 2024 20:20:40 -0700 Subject: [PATCH 1/4] first comment batch --- paper.bib | 34 ++++++++++++++++++++++++++++++++++ paper.md | 12 +++++++----- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/paper.bib b/paper.bib index 53a65cab9..f337a71b7 100644 --- a/paper.bib +++ b/paper.bib @@ -272,3 +272,37 @@ @article{King:2023 year = {2023}, doi = {10.22541/essoar.170365299.96491153/v1}, } + + +@article{Metropolis:1953, + title={Equation of state calculations by fast computing machines}, + author={Metropolis, Nicholas and Rosenbluth, Arianna W and Rosenbluth, Marshall N and Teller, Augusta H and Teller, Edward}, + journal={The journal of chemical physics}, + volume={21}, + number={6}, + pages={1087--1092}, + year={1953}, + publisher={American Institute of Physics} +} + +@article{Huggins:2023, +doi = {10.21105/joss.05428}, +url = {https://doi.org/10.21105/joss.05428}, +year = {2023}, +publisher = {The Open Journal}, +volume = {8}, +number = {86}, +pages = {5428}, +author = {Bobby Huggins and Chengkun Li and Marlon Tobaben and Mikko J. Aarnos and Luigi Acerbi}, +title = {PyVBMC: Efficient Bayesian inference in Python}, +journal = {Journal of Open Source Software} +} + +@article{Gammal:2022, + title={Fast and robust Bayesian Inference using Gaussian Processes with GPry}, + author={Jonas El Gammal and Nils Schöneberg and Jesús Torrado and Christian Fidler}, + year={2022}, + eprint={2211.02045}, + archivePrefix={arXiv}, + primaryClass={astro-ph.CO} +} \ No newline at end of file diff --git a/paper.md b/paper.md index 9143425b7..c2fe5c5b0 100644 --- a/paper.md +++ b/paper.md @@ -49,25 +49,27 @@ bibliography: paper.bib # Summary -A julia-language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, and some data with which to constrain the free parameter distribution. The pipeline has three stages, most easily explained in reverse: the last stage is to draw samples (Sample) from the Bayesian posterior distribution, i.e. the constrained joint parameter distribution consistent with observed data; to accelerate and smooth this process we train statistical machine-learning emulators to represent the user-provided parameter-to-data map (Emulate); the training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). We describe CES as an accelerated workflow, as it uses dramatically fewer evaluations of the computer model when compared with traditional algorithms to draw samples from the joint parameter distribution. +A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse: the last stage is to draw samples (Sample) from the Bayesian posterior distribution, i.e. the prior distribution conditioned on the observed data; to accelerate and smooth this process we train statistical emulators to represent the user-provided parameter-to-data map (Emulate); the training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly. * Calibration tools: We recommend choosing adaptive training points with Ensemble Kalman methods such as EKI [@Iglesias:2013] and its variants [@Huang:2022]; and CES provides explicit utilities from the codebase EnsembleKalmanProcesses.jl [@Dunbar:2022a]. * Emulation tools: CES integrates any statistical emulator, currently implemented are Gaussian Processes [@Rasmussen:2006], explicitly provided through packages SciKitLearn.jl [@scikit-learn] and GaussianProcesses.jl [@Fairbrother:2022], and Random Features [@Rahimi:2007;@Rahimi:2008;@Liu:2022], explicitly provided through [RandomFeatures.jl](https://doi.org/10.5281/zenodo.7141158) that can provide additional flexibility and scalability, particularly in higher dimensions. -* Sampling tools: The smoothed accelerated sampling problem is solved with Markov Chain Monte Carlo, and CES provides the variants of Random Walk Metropolis [@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). +* Sampling tools: The smoothed accelerated sampling problem is solved with MCMC, and CES provides the variants of Random Walk Metropolis [@Metropolis:1953;@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). To highlight code accessibility, we also provide a suite of detailed scientifically-inspired examples, with documentation that walks users through some use cases. Such use cases not only demonstrate the capability of the CES pipeline, but also teach users about typical interface and workflow experience. # Statement of need -Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code, with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the code. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that classically this has been unavailable. +Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code (e.g. Random Walk Metropolis), with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the code. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that has previously been unavailable. # State of the field -In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, e.g., [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and Bayesian Markov Chain Monte Carlo, e.g., [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble Methods also provide approximate sampling (e.g., the Ensemble Kalman Sampler [@Garbuno-Inigo:2020b;@Dunbar:2022a]) though these only provide Gaussian approximations of the posterior. +In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, e.g., [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and MCMC, e.g., [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble Methods also provide approximate sampling (e.g., the Ensemble Kalman Sampler [@Garbuno-Inigo:2020b;@Dunbar:2022a]) though these only provide Gaussian approximations of the posterior. -Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), e.g., GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (gpABC). ABC can be used in more general contexts than CES, but suffers greater approximation error and more stringent assumptions, especially in multi-dimensional problems. +Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), e.g., GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (gpABC). ABC can be used in more general contexts than CES, but suffers greater approximation error and more stringent assumptions, especially in multi-dimensional problems. + +Several other tools are available in other languages for a purpose of accelerated learning of the posterior distribution or posterior sampling. Two such examples, written in python, approximate the log-posterior distribution directly with a Gaussian process: [PyVBMC](https://github.com/acerbilab/pyvbmc) [@Huggins:2023] additionaly uses variational approximations to calculate the normalization constant, and [GPry](https://github.com/jonaselgammal/GPry) [@Gammal:2022], which iteratively trains the GP with an active training point selection algorithm. Such algorithms are distinct from CES, which approximates the parameter-to-data map with the Gaussian process, and advocates ensemble Kalman methods to select training points. # A simple example from the code documentation From 26e1b90bc1ff80e51c1b71be483cedbf328d933c Mon Sep 17 00:00:00 2001 From: odunbar Date: Wed, 3 Apr 2024 09:46:59 -0700 Subject: [PATCH 2/4] addressed further typos --- paper.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paper.md b/paper.md index c2fe5c5b0..80d8934c8 100644 --- a/paper.md +++ b/paper.md @@ -49,7 +49,7 @@ bibliography: paper.bib # Summary -A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse: the last stage is to draw samples (Sample) from the Bayesian posterior distribution, i.e. the prior distribution conditioned on the observed data; to accelerate and smooth this process we train statistical emulators to represent the user-provided parameter-to-data map (Emulate); the training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly. +A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse: the last stage is to draw samples (Sample) from the Bayesian posterior distribution, i.e. the prior distribution conditioned on the observed data; to accelerate and regularize this process we train statistical emulators to represent the user-provided parameter-to-data map (Emulate); the training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly. * Calibration tools: We recommend choosing adaptive training points with Ensemble Kalman methods such as EKI [@Iglesias:2013] and its variants [@Huang:2022]; and CES provides explicit utilities from the codebase EnsembleKalmanProcesses.jl [@Dunbar:2022a]. * Emulation tools: CES integrates any statistical emulator, currently implemented are Gaussian Processes [@Rasmussen:2006], explicitly provided through packages SciKitLearn.jl [@scikit-learn] and GaussianProcesses.jl [@Fairbrother:2022], and Random Features [@Rahimi:2007;@Rahimi:2008;@Liu:2022], explicitly provided through [RandomFeatures.jl](https://doi.org/10.5281/zenodo.7141158) that can provide additional flexibility and scalability, particularly in higher dimensions. @@ -60,14 +60,14 @@ To highlight code accessibility, we also provide a suite of detailed scientifica # Statement of need -Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code (e.g. Random Walk Metropolis), with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the code. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that has previously been unavailable. +Computationally expensive computer codes for predictive modelling are ubiquitous across science and engineering disciplines. Free parameter values that exist within these modelling frameworks are typically constrained by observations to produce accurate and robust predictions about the system they are approximating numerically. In a Bayesian setting, this is viewed as evolving an initial parameter distribution (based on prior information) with the input of observed data, to a more informative data-consistent distribution (posterior). Unfortunately, this task is intensely computationally expensive, commonly requiring over $10^5$ evaluations of the expensive computer code (e.g. Random Walk Metropolis), with accelerations relying on intrusive model information, such as a derivative of the parameter-to-data map. CES is able to approximate and accelerate this process in a non-intrusive fashion and requiring only on the order of $10^2$ evaluations of the original computer model. This opens the doors for quantifying parametric uncertainty for a class of numerically intensive computer codes that has previously been unavailable. # State of the field -In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, e.g., [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and MCMC, e.g., [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble Methods also provide approximate sampling (e.g., the Ensemble Kalman Sampler [@Garbuno-Inigo:2020b;@Dunbar:2022a]) though these only provide Gaussian approximations of the posterior. +In Julia there are a few tools for performing non-accelerated uncertainty quantification, from classical sensitivity analysis approaches, e.g., [UncertaintyQuantification.jl](https://zenodo.org/records/10149017), GlobalSensitivity.jl [@Dixit:2022], and MCMC, e.g., [Mamba.jl](https://github.com/brian-j-smith/Mamba.jl) or [Turing.jl](https://turinglang.org/). For computational efficiency, ensemble methods also provide approximate sampling (e.g., the Ensemble Kalman Sampler [@Garbuno-Inigo:2020b;@Dunbar:2022a]) though these only provide Gaussian approximations of the posterior. -Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), e.g., GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (gpABC). ABC can be used in more general contexts than CES, but suffers greater approximation error and more stringent assumptions, especially in multi-dimensional problems. +Accelerated uncertainty quantification tools also exist for the related approach of Approximate Bayesian Computation (ABC), e.g., GpABC [@Tankhilevich:2020] or [ApproxBayes.jl](https://github.com/marcjwilliams1/ApproxBayes.jl?tab=readme-ov-file); these tools both approximately sample from the posterior distribution. In ABC, this approximation comes from bypassing the likelihood that is usually required in sampling methods, such as MCMC. Instead, the goal of ABC is to replace the likelihood with a scalar-valued sampling objective that compares model and data. In CES, the approximation comes from learning the parameter-to-data map, then following this it calculates an explicit likelihood and uses exact sampling via MCMC. Some ABC algorithms also make use of statistical emulators to further accelerate sampling (GpABC). ABC can be used in more general contexts than CES, but suffers greater approximation error and more stringent assumptions, especially in multi-dimensional problems. Several other tools are available in other languages for a purpose of accelerated learning of the posterior distribution or posterior sampling. Two such examples, written in python, approximate the log-posterior distribution directly with a Gaussian process: [PyVBMC](https://github.com/acerbilab/pyvbmc) [@Huggins:2023] additionaly uses variational approximations to calculate the normalization constant, and [GPry](https://github.com/jonaselgammal/GPry) [@Gammal:2022], which iteratively trains the GP with an active training point selection algorithm. Such algorithms are distinct from CES, which approximates the parameter-to-data map with the Gaussian process, and advocates ensemble Kalman methods to select training points. @@ -154,7 +154,7 @@ chain = MC.sample( ![The joint posterior distribution histogram \label{fig:GP_2d_posterior}](sinusoid_MCMC_hist_GP.png){width=60%} -A histogram of the samples from is displayed in \autoref{fig:GP_2d_posterior}. We see that the posterior distribution contains the true value $(3.0, 7.0)$ with high probability. +A histogram of the samples from the CES algorithm is displayed in \autoref{fig:GP_2d_posterior}. We see that the posterior distribution contains the true value $(3.0, 7.0)$ with high probability. From d1f0f489630089311cfa9902ff6812a651ea9fb1 Mon Sep 17 00:00:00 2001 From: odunbar Date: Wed, 3 Apr 2024 10:01:32 -0700 Subject: [PATCH 3/4] derivative MCMC --- paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper.md b/paper.md index 80d8934c8..aadf17b01 100644 --- a/paper.md +++ b/paper.md @@ -52,8 +52,8 @@ bibliography: paper.bib A Julia language [@julia] package providing practical and modular implementation of ``Calibrate, Emulate, Sample" [@Cleary:2021], hereafter CES, an accelerated workflow for obtaining model parametric uncertainty is presented. This is also known as Bayesian inversion or uncertainty quantification. To apply CES one requires a computer model (written in any programming language) dependent on free parameters, a prior distribution encoding some prior knowledge about the distribution over the free parameters, and some data with which to constrain this prior distribution. The pipeline has three stages, most easily explained in reverse: the last stage is to draw samples (Sample) from the Bayesian posterior distribution, i.e. the prior distribution conditioned on the observed data; to accelerate and regularize this process we train statistical emulators to represent the user-provided parameter-to-data map (Emulate); the training points for these emulators are generated by the computer model, and selected adaptively around regions of high posterior mass (Calibrate). We describe CES as an accelerated workflow, as it is often able to use dramatically fewer evaluations of the computer model when compared with applying sampling algorithms, such as Markov chain Monte Carlo (MCMC), directly. * Calibration tools: We recommend choosing adaptive training points with Ensemble Kalman methods such as EKI [@Iglesias:2013] and its variants [@Huang:2022]; and CES provides explicit utilities from the codebase EnsembleKalmanProcesses.jl [@Dunbar:2022a]. -* Emulation tools: CES integrates any statistical emulator, currently implemented are Gaussian Processes [@Rasmussen:2006], explicitly provided through packages SciKitLearn.jl [@scikit-learn] and GaussianProcesses.jl [@Fairbrother:2022], and Random Features [@Rahimi:2007;@Rahimi:2008;@Liu:2022], explicitly provided through [RandomFeatures.jl](https://doi.org/10.5281/zenodo.7141158) that can provide additional flexibility and scalability, particularly in higher dimensions. -* Sampling tools: The smoothed accelerated sampling problem is solved with MCMC, and CES provides the variants of Random Walk Metropolis [@Metropolis:1953;@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). +* Emulation tools: CES integrates any statistical emulator, currently implemented are Gaussian Processes (GP) [@Rasmussen:2006], explicitly provided through packages SciKitLearn.jl [@scikit-learn] and GaussianProcesses.jl [@Fairbrother:2022], and Random Features [@Rahimi:2007;@Rahimi:2008;@Liu:2022], explicitly provided through [RandomFeatures.jl](https://doi.org/10.5281/zenodo.7141158) that can provide additional flexibility and scalability, particularly in higher dimensions. +* Sampling tools: The regularized and accelerated sampling problem is solved with MCMC, and CES provides the variants of Random Walk Metropolis [@Metropolis:1953;@Sherlock:2010], and preconditioned Crank-Nicholson [@Cotter:2013], using APIs from [Turing.jl](https://turinglang.org/). Some regular emulator mean functions are differentiable, and including accelerations of derivative-based MCMC into CES (e.g. NUTS [@hoffman:2014], Barker [@livingstone:2022]) is an active direction of work. To highlight code accessibility, we also provide a suite of detailed scientifically-inspired examples, with documentation that walks users through some use cases. Such use cases not only demonstrate the capability of the CES pipeline, but also teach users about typical interface and workflow experience. From 592bd24fc320bc0916ebcf498a99cef72f0f0ef9 Mon Sep 17 00:00:00 2001 From: odunbar Date: Wed, 3 Apr 2024 11:22:49 -0700 Subject: [PATCH 4/4] refs --- paper.bib | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/paper.bib b/paper.bib index f337a71b7..e15e14dfc 100644 --- a/paper.bib +++ b/paper.bib @@ -305,4 +305,25 @@ @article{Gammal:2022 eprint={2211.02045}, archivePrefix={arXiv}, primaryClass={astro-ph.CO} +} + +@article{livingstone:2022, + title={The Barker proposal: combining robustness and efficiency in gradient-based MCMC}, + author={Livingstone, Samuel and Zanella, Giacomo}, + journal={Journal of the Royal Statistical Society Series B: Statistical Methodology}, + volume={84}, + number={2}, + pages={496--523}, + year={2022}, + publisher={Oxford University Press} +} + +@article{hoffman:2014, + title={The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo.}, + author={Hoffman, Matthew D and Gelman, Andrew and others}, + journal={J. Mach. Learn. Res.}, + volume={15}, + number={1}, + pages={1593--1623}, + year={2014} } \ No newline at end of file