diff --git a/joss.06309/10.21105.joss.06309.crossref.xml b/joss.06309/10.21105.joss.06309.crossref.xml
new file mode 100644
index 0000000000..a6c1726449
--- /dev/null
+++ b/joss.06309/10.21105.joss.06309.crossref.xml
@@ -0,0 +1,517 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<doi_batch xmlns="http://www.crossref.org/schema/5.3.1"
+           xmlns:ai="http://www.crossref.org/AccessIndicators.xsd"
+           xmlns:rel="http://www.crossref.org/relations.xsd"
+           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+           version="5.3.1"
+           xsi:schemaLocation="http://www.crossref.org/schema/5.3.1 http://www.crossref.org/schemas/crossref5.3.1.xsd">
+  <head>
+    <doi_batch_id>20240405T201059-ef8e7f823fcb9feb7f7ef7b485702f34cf3b59c9</doi_batch_id>
+    <timestamp>20240405201059</timestamp>
+    <depositor>
+      <depositor_name>JOSS Admin</depositor_name>
+      <email_address>admin@theoj.org</email_address>
+    </depositor>
+    <registrant>The Open Journal</registrant>
+  </head>
+  <body>
+    <journal>
+      <journal_metadata>
+        <full_title>Journal of Open Source Software</full_title>
+        <abbrev_title>JOSS</abbrev_title>
+        <issn media_type="electronic">2475-9066</issn>
+        <doi_data>
+          <doi>10.21105/joss</doi>
+          <resource>https://joss.theoj.org</resource>
+        </doi_data>
+      </journal_metadata>
+      <journal_issue>
+        <publication_date media_type="online">
+          <month>04</month>
+          <year>2024</year>
+        </publication_date>
+        <journal_volume>
+          <volume>9</volume>
+        </journal_volume>
+        <issue>96</issue>
+      </journal_issue>
+      <journal_article publication_type="full_text">
+        <titles>
+          <title>LINFA: a Python library for variational inference with
+normalizing flow and annealing</title>
+        </titles>
+        <contributors>
+          <person_name sequence="first" contributor_role="author">
+            <given_name>Yu</given_name>
+            <surname>Wang</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Emma R.</given_name>
+            <surname>Cobian</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Jubilee</given_name>
+            <surname>Lee</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Fang</given_name>
+            <surname>Liu</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Jonathan D.</given_name>
+            <surname>Hauenstein</surname>
+          </person_name>
+          <person_name sequence="additional"
+                       contributor_role="author">
+            <given_name>Daniele E.</given_name>
+            <surname>Schiavazzi</surname>
+          </person_name>
+        </contributors>
+        <publication_date>
+          <month>04</month>
+          <day>05</day>
+          <year>2024</year>
+        </publication_date>
+        <pages>
+          <first_page>6309</first_page>
+        </pages>
+        <publisher_item>
+          <identifier id_type="doi">10.21105/joss.06309</identifier>
+        </publisher_item>
+        <ai:program name="AccessIndicators">
+          <ai:license_ref applies_to="vor">http://creativecommons.org/licenses/by/4.0/</ai:license_ref>
+          <ai:license_ref applies_to="am">http://creativecommons.org/licenses/by/4.0/</ai:license_ref>
+          <ai:license_ref applies_to="tdm">http://creativecommons.org/licenses/by/4.0/</ai:license_ref>
+        </ai:program>
+        <rel:program>
+          <rel:related_item>
+            <rel:description>Software archive</rel:description>
+            <rel:inter_work_relation relationship-type="references" identifier-type="doi">10.5281/zenodo.10883597</rel:inter_work_relation>
+          </rel:related_item>
+          <rel:related_item>
+            <rel:description>GitHub review issue</rel:description>
+            <rel:inter_work_relation relationship-type="hasReview" identifier-type="uri">https://github.com/openjournals/joss-reviews/issues/6309</rel:inter_work_relation>
+          </rel:related_item>
+        </rel:program>
+        <doi_data>
+          <doi>10.21105/joss.06309</doi>
+          <resource>https://joss.theoj.org/papers/10.21105/joss.06309</resource>
+          <collection property="text-mining">
+            <item>
+              <resource mime_type="application/pdf">https://joss.theoj.org/papers/10.21105/joss.06309.pdf</resource>
+            </item>
+          </collection>
+        </doi_data>
+        <citation_list>
+          <citation key="geman1984stochastic">
+            <article_title>Stochastic relaxation, Gibbs distributions,
+and the Bayesian restoration of images</article_title>
+            <author>Geman</author>
+            <journal_title>IEEE Transactions on pattern analysis and
+machine intelligence</journal_title>
+            <issue>6</issue>
+            <doi>10.1109/TPAMI.1984.4767596</doi>
+            <cYear>1984</cYear>
+            <unstructured_citation>Geman, S., &amp; Geman, D. (1984).
+Stochastic relaxation, Gibbs distributions, and the Bayesian restoration
+of images. IEEE Transactions on Pattern Analysis and Machine
+Intelligence, 6, 721–741.
+https://doi.org/10.1109/TPAMI.1984.4767596</unstructured_citation>
+          </citation>
+          <citation key="metropolis1953equation">
+            <article_title>Equation of state calculations by fast
+computing machines</article_title>
+            <author>Metropolis</author>
+            <journal_title>The journal of chemical
+physics</journal_title>
+            <issue>6</issue>
+            <volume>21</volume>
+            <doi>10.1063/1.1699114</doi>
+            <cYear>1953</cYear>
+            <unstructured_citation>Metropolis, N., Rosenbluth, A. W.,
+Rosenbluth, M. N., Teller, A. H., &amp; Teller, E. (1953). Equation of
+state calculations by fast computing machines. The Journal of Chemical
+Physics, 21(6), 1087–1092.
+https://doi.org/10.1063/1.1699114</unstructured_citation>
+          </citation>
+          <citation key="hastings1970monte">
+            <article_title>Monte Carlo sampling methods using Markov
+chains and their applications</article_title>
+            <author>Hastings</author>
+            <doi>10.1093/biomet/57.1.97</doi>
+            <cYear>1970</cYear>
+            <unstructured_citation>Hastings, W. K. (1970). Monte Carlo
+sampling methods using Markov chains and their applications.
+https://doi.org/10.1093/biomet/57.1.97</unstructured_citation>
+          </citation>
+          <citation key="gelfand1990sampling">
+            <article_title>Sampling-based approaches to calculating
+marginal densities</article_title>
+            <author>Gelfand</author>
+            <journal_title>Journal of the American statistical
+association</journal_title>
+            <issue>410</issue>
+            <volume>85</volume>
+            <doi>10.1080/01621459.1990.10476213</doi>
+            <cYear>1990</cYear>
+            <unstructured_citation>Gelfand, A. E., &amp; Smith, A. F.
+(1990). Sampling-based approaches to calculating marginal densities.
+Journal of the American Statistical Association, 85(410), 398–409.
+https://doi.org/10.1080/01621459.1990.10476213</unstructured_citation>
+          </citation>
+          <citation key="wainwright2008graphical">
+            <article_title>Graphical models, exponential families, and
+variational inference</article_title>
+            <author>Wainwright</author>
+            <journal_title>Foundations and Trends in Machine
+Learning</journal_title>
+            <issue>1–2</issue>
+            <volume>1</volume>
+            <doi>10.1561/2200000001</doi>
+            <cYear>2008</cYear>
+            <unstructured_citation>Wainwright, M. J., Jordan, M. I.,
+&amp; others. (2008). Graphical models, exponential families, and
+variational inference. Foundations and Trends in Machine Learning,
+1(1–2), 1–305.
+https://doi.org/10.1561/2200000001</unstructured_citation>
+          </citation>
+          <citation key="villani2009optimal">
+            <volume_title>Optimal transport: Old and new</volume_title>
+            <author>Villani</author>
+            <volume>338</volume>
+            <doi>10.1007/978-3-540-71050-9</doi>
+            <cYear>2009</cYear>
+            <unstructured_citation>Villani, C., &amp; others. (2009).
+Optimal transport: Old and new (Vol. 338). Springer.
+https://doi.org/10.1007/978-3-540-71050-9</unstructured_citation>
+          </citation>
+          <citation key="kobyzev2020normalizing">
+            <article_title>Normalizing flows: An introduction and review
+of current methods</article_title>
+            <author>Kobyzev</author>
+            <journal_title>IEEE transactions on pattern analysis and
+machine intelligence</journal_title>
+            <issue>11</issue>
+            <volume>43</volume>
+            <doi>10.1109/TPAMI.2020.2992934</doi>
+            <cYear>2020</cYear>
+            <unstructured_citation>Kobyzev, I., Prince, S. J., &amp;
+Brubaker, M. A. (2020). Normalizing flows: An introduction and review of
+current methods. IEEE Transactions on Pattern Analysis and Machine
+Intelligence, 43(11), 3964–3979.
+https://doi.org/10.1109/TPAMI.2020.2992934</unstructured_citation>
+          </citation>
+          <citation key="papamakarios2021normalizing">
+            <article_title>Normalizing flows for probabilistic modeling
+and inference</article_title>
+            <author>Papamakarios</author>
+            <journal_title>The Journal of Machine Learning
+Research</journal_title>
+            <issue>1</issue>
+            <volume>22</volume>
+            <cYear>2021</cYear>
+            <unstructured_citation>Papamakarios, G., Nalisnick, E.,
+Rezende, D. J., Mohamed, S., &amp; Lakshminarayanan, B. (2021).
+Normalizing flows for probabilistic modeling and inference. The Journal
+of Machine Learning Research, 22(1), 2617–2680.</unstructured_citation>
+          </citation>
+          <citation key="rezende2015variational">
+            <article_title>Variational inference with normalizing
+flows</article_title>
+            <author>Rezende</author>
+            <journal_title>International conference on machine
+learning</journal_title>
+            <cYear>2015</cYear>
+            <unstructured_citation>Rezende, D., &amp; Mohamed, S.
+(2015). Variational inference with normalizing flows. International
+Conference on Machine Learning, 1530–1538.</unstructured_citation>
+          </citation>
+          <citation key="wang2022variational">
+            <article_title>Variational inference with NoFAS: Normalizing
+flow with adaptive surrogate for computationally expensive
+models</article_title>
+            <author>Wang</author>
+            <journal_title>Journal of Computational
+Physics</journal_title>
+            <volume>467</volume>
+            <doi>10.1016/j.jcp.2022.111454</doi>
+            <cYear>2022</cYear>
+            <unstructured_citation>Wang, Y., Liu, F., &amp; Schiavazzi,
+D. E. (2022). Variational inference with NoFAS: Normalizing flow with
+adaptive surrogate for computationally expensive models. Journal of
+Computational Physics, 467, 111454.
+https://doi.org/10.1016/j.jcp.2022.111454</unstructured_citation>
+          </citation>
+          <citation key="cobian2023adaann">
+            <article_title>AdaAnn: Adaptive annealing scheduler for
+probability density approximation</article_title>
+            <author>Cobian</author>
+            <journal_title>International Journal for Uncertainty
+Quantification</journal_title>
+            <volume>13</volume>
+            <doi>10.1615/Int.J.UncertaintyQuantification.2022043110</doi>
+            <cYear>2023</cYear>
+            <unstructured_citation>Cobian, E. R., Hauenstein, J. D.,
+Liu, F., &amp; Schiavazzi, D. E. (2023). AdaAnn: Adaptive annealing
+scheduler for probability density approximation. International Journal
+for Uncertainty Quantification, 13.
+https://doi.org/10.1615/Int.J.UncertaintyQuantification.2022043110</unstructured_citation>
+          </citation>
+          <citation key="dinh2016density">
+            <article_title>Density estimation using real
+NVP</article_title>
+            <author>Dinh</author>
+            <journal_title>arXiv preprint
+arXiv:1605.08803</journal_title>
+            <cYear>2016</cYear>
+            <unstructured_citation>Dinh, L., Sohl-Dickstein, J., &amp;
+Bengio, S. (2016). Density estimation using real NVP. arXiv Preprint
+arXiv:1605.08803.</unstructured_citation>
+          </citation>
+          <citation key="kingma2018glow">
+            <article_title>Glow: Generative flow with invertible 1x1
+convolutions</article_title>
+            <author>Kingma</author>
+            <journal_title>Advances in neural information processing
+systems</journal_title>
+            <volume>31</volume>
+            <cYear>2018</cYear>
+            <unstructured_citation>Kingma, D. P., &amp; Dhariwal, P.
+(2018). Glow: Generative flow with invertible 1x1 convolutions. Advances
+in Neural Information Processing Systems, 31.</unstructured_citation>
+          </citation>
+          <citation key="papamakarios2018masked">
+            <article_title>Masked autoregressive flow for density
+estimation</article_title>
+            <author>Papamakarios</author>
+            <journal_title>Advances in neural information processing
+systems</journal_title>
+            <volume>30</volume>
+            <cYear>2017</cYear>
+            <unstructured_citation>Papamakarios, G., Pavlakou, T., &amp;
+Murray, I. (2017). Masked autoregressive flow for density estimation. In
+I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S.
+Vishwanathan, &amp; R. Garnett (Eds.), Advances in neural information
+processing systems (Vol. 30). Curran Associates, Inc.
+https://proceedings.neurips.cc/paper_files/paper/2017/file/6c1da886822c67822bcf3679d04369fa-Paper.pdf</unstructured_citation>
+          </citation>
+          <citation key="kingma2016improved">
+            <article_title>Improved variational inference with inverse
+autoregressive flow</article_title>
+            <author>Kingma</author>
+            <journal_title>Advances in neural information processing
+systems</journal_title>
+            <volume>29</volume>
+            <cYear>2016</cYear>
+            <unstructured_citation>Kingma, D. P., Salimans, T.,
+Jozefowicz, R., Chen, X., Sutskever, I., &amp; Welling, M. (2016).
+Improved variational inference with inverse autoregressive flow.
+Advances in Neural Information Processing Systems, 29,
+4743–4751.</unstructured_citation>
+          </citation>
+          <citation key="germain2015made">
+            <article_title>MADE: Masked autoencoder for distribution
+estimation</article_title>
+            <author>Germain</author>
+            <journal_title>International conference on machine
+learning</journal_title>
+            <cYear>2015</cYear>
+            <unstructured_citation>Germain, M., Gregor, K., Murray, I.,
+&amp; Larochelle, H. (2015). MADE: Masked autoencoder for distribution
+estimation. International Conference on Machine Learning,
+881–889.</unstructured_citation>
+          </citation>
+          <citation key="ioffe2015batch">
+            <article_title>Batch normalization: Accelerating deep
+network training by reducing internal covariate shift</article_title>
+            <author>Ioffe</author>
+            <journal_title>International conference on machine
+learning</journal_title>
+            <cYear>2015</cYear>
+            <unstructured_citation>Ioffe, S., &amp; Szegedy, C. (2015).
+Batch normalization: Accelerating deep network training by reducing
+internal covariate shift. International Conference on Machine Learning,
+448–456.</unstructured_citation>
+          </citation>
+          <citation key="su2023differentially">
+            <article_title>Differentially private normalizing flows for
+density estimation, data synthesis, and variational inference with
+application to electronic health records</article_title>
+            <author>Su</author>
+            <journal_title>arXiv preprint
+arXiv:2302.05787</journal_title>
+            <cYear>2023</cYear>
+            <unstructured_citation>Su, B., Wang, Y., Schiavazzi, D. E.,
+&amp; Liu, F. (2023). Differentially private normalizing flows for
+density estimation, data synthesis, and variational inference with
+application to electronic health records. arXiv Preprint
+arXiv:2302.05787.</unstructured_citation>
+          </citation>
+          <citation key="friedman1991multivariate">
+            <article_title>Multivariate adaptive regression
+splines</article_title>
+            <author>Friedman</author>
+            <journal_title>The annals of statistics</journal_title>
+            <issue>1</issue>
+            <volume>19</volume>
+            <doi>10.1214/aos/1176347963</doi>
+            <cYear>1991</cYear>
+            <unstructured_citation>Friedman, J. H. (1991). Multivariate
+adaptive regression splines. The Annals of Statistics, 19(1), 1–67.
+https://doi.org/10.1214/aos/1176347963</unstructured_citation>
+          </citation>
+          <citation key="gramacy2007tgp">
+            <article_title>Tgp: An R package for Bayesian nonstationary,
+semiparametric nonlinear regression and design by treed Gaussian process
+models</article_title>
+            <author>Gramacy</author>
+            <journal_title>Journal of Statistical
+Software</journal_title>
+            <volume>19</volume>
+            <doi>10.18637/jss.v019.i09</doi>
+            <cYear>2007</cYear>
+            <unstructured_citation>Gramacy, R. B. (2007). Tgp: An R
+package for Bayesian nonstationary, semiparametric nonlinear regression
+and design by treed Gaussian process models. Journal of Statistical
+Software, 19, 1–46.
+https://doi.org/10.18637/jss.v019.i09</unstructured_citation>
+          </citation>
+          <citation key="sobol2003theorems">
+            <article_title>Theorems and examples on high dimensional
+model representation</article_title>
+            <author>Sobol’</author>
+            <journal_title>Reliability Engineering and System
+Safety</journal_title>
+            <issue>2</issue>
+            <volume>79</volume>
+            <doi>10.1016/S0951-8320(02)00229-6</doi>
+            <cYear>2003</cYear>
+            <unstructured_citation>Sobol’, I. M. (2003). Theorems and
+examples on high dimensional model representation. Reliability
+Engineering and System Safety, 79(2), 187–193.
+https://doi.org/10.1016/S0951-8320(02)00229-6</unstructured_citation>
+          </citation>
+          <citation key="brennan2020greedy">
+            <article_title>Greedy inference with structure-exploiting
+lazy maps</article_title>
+            <author>Brennan</author>
+            <journal_title>Advances in Neural Information Processing
+Systems</journal_title>
+            <volume>33</volume>
+            <cYear>2020</cYear>
+            <unstructured_citation>Brennan, M., Bigoni, D., Zahm, O.,
+Spantini, A., &amp; Marzouk, Y. (2020). Greedy inference with
+structure-exploiting lazy maps. Advances in Neural Information
+Processing Systems, 33, 8330–8342.</unstructured_citation>
+          </citation>
+          <citation key="siahkoohi2021preconditioned">
+            <article_title>Preconditioned training of normalizing flows
+for variational inference in inverse problems</article_title>
+            <author>Siahkoohi</author>
+            <journal_title>Third symposium on advances in approximate
+bayesian inference</journal_title>
+            <cYear>2021</cYear>
+            <unstructured_citation>Siahkoohi, A., Rizzuti, G.,
+Louboutin, M., Witte, P., &amp; Herrmann, F. (2021). Preconditioned
+training of normalizing flows for variational inference in inverse
+problems. Third Symposium on Advances in Approximate Bayesian Inference.
+https://openreview.net/forum?id=P9m1sMaNQ8T</unstructured_citation>
+          </citation>
+          <citation key="el2012bayesian">
+            <article_title>Bayesian inference with optimal
+maps</article_title>
+            <author>El Moselhy</author>
+            <journal_title>Journal of Computational
+Physics</journal_title>
+            <issue>23</issue>
+            <volume>231</volume>
+            <doi>10.1016/j.jcp.2012.07.022</doi>
+            <cYear>2012</cYear>
+            <unstructured_citation>El Moselhy, T. A., &amp; Marzouk, Y.
+M. (2012). Bayesian inference with optimal maps. Journal of
+Computational Physics, 231(23), 7815–7850.
+https://doi.org/10.1016/j.jcp.2012.07.022</unstructured_citation>
+          </citation>
+          <citation key="sobol1967distribution">
+            <article_title>On the distribution of points in a cube and
+the approximate evaluation of integrals</article_title>
+            <author>Sobol’</author>
+            <journal_title>Zhurnal Vychislitel’noi Matematiki i
+Matematicheskoi Fiziki</journal_title>
+            <issue>4</issue>
+            <volume>7</volume>
+            <doi>10.1016/0041-5553(67)90144-9</doi>
+            <cYear>1967</cYear>
+            <unstructured_citation>Sobol’, I. M. (1967). On the
+distribution of points in a cube and the approximate evaluation of
+integrals. Zhurnal Vychislitel’noi Matematiki i Matematicheskoi Fiziki,
+7(4), 784–802.
+https://doi.org/10.1016/0041-5553(67)90144-9</unstructured_citation>
+          </citation>
+          <citation key="abril2023pymc">
+            <article_title>PyMC: A modern, and comprehensive
+probabilistic programming framework in Python</article_title>
+            <author>Abril-Pla</author>
+            <journal_title>PeerJ Computer Science</journal_title>
+            <volume>9</volume>
+            <doi>10.7717/peerj-cs.1516</doi>
+            <cYear>2023</cYear>
+            <unstructured_citation>Abril-Pla, O., Andreani, V., Carroll,
+C., Dong, L., Fonnesbeck, C. J., Kochurov, M., Kumar, R., Lao, J.,
+Luhmann, C. C., Martin, O. A., &amp; others. (2023). PyMC: A modern, and
+comprehensive probabilistic programming framework in Python. PeerJ
+Computer Science, 9, e1516.
+https://doi.org/10.7717/peerj-cs.1516</unstructured_citation>
+          </citation>
+          <citation key="luttinen2016bayespy">
+            <article_title>Bayespy: Variational Bayesian inference in
+Python</article_title>
+            <author>Luttinen</author>
+            <journal_title>The Journal of Machine Learning
+Research</journal_title>
+            <issue>1</issue>
+            <volume>17</volume>
+            <cYear>2016</cYear>
+            <unstructured_citation>Luttinen, J. (2016). Bayespy:
+Variational Bayesian inference in Python. The Journal of Machine
+Learning Research, 17(1), 1419–1424.</unstructured_citation>
+          </citation>
+          <citation key="bingham2019pyro">
+            <article_title>Pyro: Deep universal probabilistic
+programming</article_title>
+            <author>Bingham</author>
+            <journal_title>Journal of machine learning
+research</journal_title>
+            <issue>28</issue>
+            <volume>20</volume>
+            <cYear>2019</cYear>
+            <unstructured_citation>Bingham, E., Chen, J. P., Jankowiak,
+M., Obermeyer, F., Pradhan, N., Karaletsos, T., Singh, R., Szerlip, P.,
+Horsfall, P., &amp; Goodman, N. D. (2019). Pyro: Deep universal
+probabilistic programming. Journal of Machine Learning Research, 20(28),
+1–6.</unstructured_citation>
+          </citation>
+          <citation key="huggins2023pyvbmc">
+            <article_title>PyVBMC: Efficient Bayesian inference in
+Python</article_title>
+            <author>Huggins</author>
+            <journal_title>Journal of Open Source
+Software</journal_title>
+            <issue>86</issue>
+            <volume>8</volume>
+            <doi>10.21105/joss.05428</doi>
+            <cYear>2023</cYear>
+            <unstructured_citation>Huggins, B., Li, C., Tobaben, M.,
+Aarnos, M. J., &amp; Acerbi, L. (2023). PyVBMC: Efficient Bayesian
+inference in Python. Journal of Open Source Software, 8(86), 5428.
+https://doi.org/10.21105/joss.05428</unstructured_citation>
+          </citation>
+        </citation_list>
+      </journal_article>
+    </journal>
+  </body>
+</doi_batch>
diff --git a/joss.06309/10.21105.joss.06309.jats b/joss.06309/10.21105.joss.06309.jats
new file mode 100644
index 0000000000..4f67dbcfbd
--- /dev/null
+++ b/joss.06309/10.21105.joss.06309.jats
@@ -0,0 +1,2367 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN"
+                  "JATS-publishing1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
+<front>
+<journal-meta>
+<journal-id></journal-id>
+<journal-title-group>
+<journal-title>Journal of Open Source Software</journal-title>
+<abbrev-journal-title>JOSS</abbrev-journal-title>
+</journal-title-group>
+<issn publication-format="electronic">2475-9066</issn>
+<publisher>
+<publisher-name>Open Journals</publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+<article-id pub-id-type="publisher-id">6309</article-id>
+<article-id pub-id-type="doi">10.21105/joss.06309</article-id>
+<title-group>
+<article-title>LINFA: a Python library for variational inference with
+normalizing flow and annealing</article-title>
+</title-group>
+<contrib-group>
+<contrib contrib-type="author">
+<name>
+<surname>Wang</surname>
+<given-names>Yu</given-names>
+</name>
+<email>ywang50@nd.edu</email>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<name>
+<surname>Cobian</surname>
+<given-names>Emma R.</given-names>
+</name>
+<email>ecobian@nd.edu</email>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<name>
+<surname>Lee</surname>
+<given-names>Jubilee</given-names>
+</name>
+<email>jlee222@nd.edu</email>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<name>
+<surname>Liu</surname>
+<given-names>Fang</given-names>
+</name>
+<email>fang.liu.131@nd.edu</email>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<name>
+<surname>Hauenstein</surname>
+<given-names>Jonathan D.</given-names>
+</name>
+<email>hauenstein@nd.edu</email>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author" corresp="yes">
+<name>
+<surname>Schiavazzi</surname>
+<given-names>Daniele E.</given-names>
+</name>
+<email>dschiavazzi@nd.edu</email>
+<xref ref-type="aff" rid="aff-1"/>
+<xref ref-type="corresp" rid="cor-1"><sup>*</sup></xref>
+</contrib>
+<aff id="aff-1">
+<institution-wrap>
+<institution>Department of Applied and Computational Mathematics and
+Statistics, University of Notre Dame, Notre Dame, IN 46556,
+USA.</institution>
+</institution-wrap>
+</aff>
+</contrib-group>
+<author-notes>
+<corresp id="cor-1">* E-mail: <email>dschiavazzi@nd.edu</email></corresp>
+</author-notes>
+<pub-date date-type="pub" publication-format="electronic" iso-8601-date="2023-11-09">
+<day>9</day>
+<month>11</month>
+<year>2023</year>
+</pub-date>
+<volume>9</volume>
+<issue>96</issue>
+<fpage>6309</fpage>
+<permissions>
+<copyright-statement>Authors of papers retain copyright and release the
+work under a Creative Commons Attribution 4.0 International License (CC
+BY 4.0)</copyright-statement>
+<copyright-year>2022</copyright-year>
+<copyright-holder>The article authors</copyright-holder>
+<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
+<license-p>Authors of papers retain copyright and release the work under
+a Creative Commons Attribution 4.0 International License (CC BY
+4.0)</license-p>
+</license>
+</permissions>
+<kwd-group kwd-group-type="author">
+<kwd>Python</kwd>
+<kwd>variational inference</kwd>
+<kwd>normalizing flow</kwd>
+<kwd>adaptive posterior annealing</kwd>
+<kwd>physics-based models</kwd>
+</kwd-group>
+</article-meta>
+</front>
+<body>
+<sec id="summary">
+  <title>Summary</title>
+  <p>Variational inference is an increasingly popular method in
+  statistics and machine learning for approximating probability
+  distributions. We developed LINFA (Library for Inference with
+  Normalizing Flow and Annealing), a Python library for variational
+  inference to accommodate computationally expensive models and
+  difficult-to-sample distributions with dependent parameters. We
+  discuss the theoretical background, capabilities, and performance of
+  LINFA in various benchmarks. LINFA is publicly available on GitHub at
+  <ext-link ext-link-type="uri" xlink:href="https://github.com/desResLab/LINFA">https://github.com/desResLab/LINFA</ext-link>.</p>
+</sec>
+<sec id="statement-of-need">
+  <title>Statement of need</title>
+  <p>Generating samples from a posterior distribution is a fundamental
+  task in Bayesian inference. The development of sampling-based
+  algorithms from the Markov chain Monte Carlo family
+  (<xref alt="Gelfand &amp; Smith, 1990" rid="ref-gelfand1990sampling" ref-type="bibr">Gelfand
+  &amp; Smith, 1990</xref>;
+  <xref alt="Geman &amp; Geman, 1984" rid="ref-geman1984stochastic" ref-type="bibr">Geman
+  &amp; Geman, 1984</xref>;
+  <xref alt="Hastings, 1970" rid="ref-hastings1970monte" ref-type="bibr">Hastings,
+  1970</xref>;
+  <xref alt="Metropolis et al., 1953" rid="ref-metropolis1953equation" ref-type="bibr">Metropolis
+  et al., 1953</xref>) has made solving Bayesian inverse problems
+  accessible to a wide audience of both researchers and practitioners.
+  However, the number of samples required by these approaches is
+  typically significant and the convergence of Markov chains to their
+  stationary distribution can be slow especially in high-dimensions.
+  Additionally, satisfactory convergence may not be always easy to
+  quantify, even if a number of metrics have been proposed in the
+  literature over the years. More recent paradigms have been proposed in
+  the context of variational inference
+  (<xref alt="Wainwright et al., 2008" rid="ref-wainwright2008graphical" ref-type="bibr">Wainwright
+  et al., 2008</xref>), where an optimization problem is formulated to
+  determine the optimal member of a parametric family of distributions
+  that can approximate a target posterior density. In addition, flexible
+  approaches to parametrize variational distributions through a
+  composition of transformations (closely related to the concept of
+  <italic>trasport maps</italic>, see, e.g., Villani &amp; others
+  (<xref alt="2009" rid="ref-villani2009optimal" ref-type="bibr">2009</xref>))
+  have reached popularity under the name of <italic>normalizing
+  flows</italic>
+  (<xref alt="Dinh et al., 2016" rid="ref-dinh2016density" ref-type="bibr">Dinh
+  et al., 2016</xref>;
+  <xref alt="Kingma et al., 2016" rid="ref-kingma2016improved" ref-type="bibr">Kingma
+  et al., 2016</xref>;
+  <xref alt="Kobyzev et al., 2020" rid="ref-kobyzev2020normalizing" ref-type="bibr">Kobyzev
+  et al., 2020</xref>;
+  <xref alt="Papamakarios et al., 2021" rid="ref-papamakarios2021normalizing" ref-type="bibr">Papamakarios
+  et al., 2021</xref>;
+  <xref alt="Rezende &amp; Mohamed, 2015" rid="ref-rezende2015variational" ref-type="bibr">Rezende
+  &amp; Mohamed, 2015</xref>). The combination of variational inference
+  and normalizing flow has received significant recent interest in the
+  context of general algorithms for solving inverse problems
+  (<xref alt="El Moselhy &amp; Marzouk, 2012" rid="ref-el2012bayesian" ref-type="bibr">El
+  Moselhy &amp; Marzouk, 2012</xref>;
+  <xref alt="Rezende &amp; Mohamed, 2015" rid="ref-rezende2015variational" ref-type="bibr">Rezende
+  &amp; Mohamed, 2015</xref>).</p>
+  <p>However, cases where the computational cost of evaluating the
+  underlying probability distribution is significant occur quite often
+  in engineering and applied sciences, for example when such evaluation
+  requires the solution of an ordinary or partial differential equation.
+  In such cases, inference can easily become intractable. Additionally,
+  strong and nonlinear dependence between model parameters may results
+  in difficult-to-sample posterior distributions characterized by
+  features at multiple scales or by multiple modes. The LINFA library is
+  specifically designed for cases where the model evaluation is
+  computationally expensive. In such cases, the construction of an
+  adaptively trained surrogate model is key to reducing the
+  computational cost of inference
+  (<xref alt="Wang et al., 2022" rid="ref-wang2022variational" ref-type="bibr">Wang
+  et al., 2022</xref>). In addition, LINFA provides an adaptive
+  annealing scheduler, where temperature increments are automatically
+  determined based on the available variational approximant of the
+  posterior distribution. Thus, adaptive annealing makes it easier to
+  sample from complicated densities
+  (<xref alt="Cobian et al., 2023" rid="ref-cobian2023adaann" ref-type="bibr">Cobian
+  et al., 2023</xref>).</p>
+</sec>
+<sec id="capabilities">
+  <title>Capabilities</title>
+  <p>LINFA is designed as a general inference engine and allows the user
+  to define custom input transformations, computational models,
+  surrogates, and likelihood functions.</p>
+  <list list-type="order">
+    <list-item>
+      <p><bold>User-defined input parameter transformations</bold> -
+      Input transformations may reduce the complexity of inference and
+      surrogate model construction in situations where the ranges of the
+      input variables differ substantially or when the input parameters
+      are bounded. A number of pre-defined univariate transformations
+      are provided, i.e, <monospace>identity</monospace>,
+      <monospace>tanh</monospace>, <monospace>linear</monospace>, and
+      <monospace>exp</monospace>. These transformations are
+      independently defined for each input variable, using four
+      parameters <inline-formula><alternatives>
+      <tex-math><![CDATA[(a,b,c,d)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>,
+      providing a nonlinear transformation between the
+      <italic>normalized</italic> interval
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[[a,b]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      and the <italic>physical</italic> interval
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[[c,d]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>.
+      Additional transformations can be defined by implementing the
+      following member functions.</p>
+      <list list-type="bullet">
+        <list-item>
+          <p><monospace>forward</monospace> - It evaluates the
+          transformation from the normalized to the physical space. One
+          transformation needs to be defined for each input dimension.
+          For example, the list of lists</p>
+          <p specific-use="wrapper">
+            <preformat>trsf_info = [['tanh',-7.0,7.0,100.0,1500.0],
+             ['tanh',-7.0,7.0,100.0,1500.0],
+             ['exp',-7.0,7.0,1.0e-5,1.0e-2]]</preformat>
+          </p>
+          <p>defines a hyperbolic tangent transformation for the first
+          two variables and an exponential transformation for the
+          third.</p>
+        </list-item>
+        <list-item>
+          <p><monospace>compute_log_jacob_func</monospace> - This is the
+          log Jacobian of the transformation that needs to be included
+          in the computation of the log posterior density to account for
+          the additional change in volume.</p>
+        </list-item>
+      </list>
+    </list-item>
+    <list-item>
+      <p><bold>User-defined computational models</bold> - LINFA can
+      accommodate any type of models from analytically defined
+      posteriors with the gradient computed through automatic
+      differentiation to legacy computational solvers for which the
+      solution gradient is not available nor easy to compute. New models
+      are created by implementing the methods below.</p>
+      <list list-type="bullet">
+        <list-item>
+          <p><monospace>genDataFile</monospace> - This is a
+          pre-processing function used to generate synthetic
+          observations. It computes the model output corresponding to
+          the default parameter values (usually defined as part of the
+          model) and adds noise with a user-specified distribution.
+          Observations will be stored in a file and are typically
+          assigned to <monospace>model.data</monospace> so they are
+          available for computing the log posterior.</p>
+        </list-item>
+        <list-item>
+          <p><monospace>solve_t</monospace> - This function solves the
+          model for multiple values of the <italic>physical</italic>
+          input parameters specified in a matrix format (with one sample
+          for each row and one column for each input parameter
+          dimension).</p>
+        </list-item>
+      </list>
+    </list-item>
+    <list-item>
+      <p><bold>User-defined surrogate models</bold> - For computational
+      models that are too expensive for online inference, LINFA provides
+      functionalities to create, train, and fine-tune a
+      <italic>surrogate model</italic>. The
+      <monospace>Surrogate</monospace> class implements the following
+      functionalities:</p>
+      <list list-type="bullet">
+        <list-item>
+          <p>A new surrogate model can be created using the
+          <monospace>Surrogate</monospace> constructor.</p>
+        </list-item>
+        <list-item>
+          <p><monospace>limits</monospace> (i.e. upper and lower bounds)
+          are stored as a list of lists using the format</p>
+          <p specific-use="wrapper">
+            <preformat>[[low_0, high_0], [low_1, high_1], ...].</preformat>
+          </p>
+        </list-item>
+        <list-item>
+          <p>A <italic>pre-grid</italic> is defined as an a priori
+          selected point cloud created inside the hyper-rectangle
+          defined by <monospace>limits</monospace>. The pre-grid can be
+          either of type <monospace>'tensor'</monospace> (tensor product
+          grid) where the grid order (number of points in each
+          dimension) is defined through the argument
+          <monospace>gridnum</monospace>, or of type
+          <monospace>'sobol'</monospace> (using low-discrepancy
+          quasi-random Sobol’ sequences, see Sobol’
+          (<xref alt="1967" rid="ref-sobol1967distribution" ref-type="bibr">1967</xref>)),
+          in which case the variable <monospace>gridnum</monospace>
+          defines the total number of samples.</p>
+        </list-item>
+        <list-item>
+          <p>Surrogate model Input/Output. The two functions
+          <monospace>surrogate_save()</monospace> and
+          <monospace>surrogate_load()</monospace> are provided to save a
+          snapshot of a given surrogate or to read it from a file.</p>
+        </list-item>
+        <list-item>
+          <p>The <monospace>pre_train()</monospace> function is provided
+          to perform an initial training of the surrogate model on the
+          pre-grid. In addition, the <monospace>update()</monospace>
+          function is also available to re-train the model once
+          additional training examples are available.</p>
+        </list-item>
+        <list-item>
+          <p>The <monospace>forward()</monospace> function evaluates the
+          surrogate model at multiple input realizations. If a
+          transformation is defined, the surrogate should always be
+          specified in the <italic>normalized domain</italic> with
+          limits defined in terms of the normalized intervals (i.e.,
+          <inline-formula><alternatives>
+          <tex-math><![CDATA[[a,b]]]></tex-math>
+          <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>).</p>
+        </list-item>
+      </list>
+    </list-item>
+    <list-item>
+      <p><bold>User-defined likelihood</bold> - A user-defined
+      likelihood function can be defined by passing the parameters, the
+      model, the surrogate and a coordinate transformation using</p>
+      <p specific-use="wrapper">
+        <preformat>log_density(x, model, surrogate, transformation),</preformat>
+      </p>
+      <p>and then assigning it as a member function of the
+      <monospace>experiment</monospace> class using:</p>
+      <p specific-use="wrapper">
+        <preformat>exp.model_logdensity = lambda x: log_density(x, model, surr, transf).</preformat>
+      </p>
+    </list-item>
+    <list-item>
+      <p><bold>Linear and adaptive annealing schedulers</bold> - LINFA
+      provides two annealing schedulers by default. The first is the
+      <monospace>'Linear'</monospace> scheduler with constant
+      increments. The second is the <monospace>'AdaAnn'</monospace>
+      adaptive scheduler
+      (<xref alt="Cobian et al., 2023" rid="ref-cobian2023adaann" ref-type="bibr">Cobian
+      et al., 2023</xref>) with hyperparameters reported in
+      <xref alt="[tab:adaann]" rid="tabU003Aadaann">[tab:adaann]</xref>.
+      For the AdaAnn scheduler, the user can also specify a different
+      number of parameter updates to be performed at the initial
+      temperature <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{0}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:math></alternatives></inline-formula>,
+      final temperature <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{1}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></alternatives></inline-formula>,
+      and for any temperature <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{0}<t<1]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>&lt;</mml:mo><mml:mi>t</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>.
+      Finally, the batch size (number of samples used to evaluate the
+      expectations in the loss function) can also be differentiated for
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[t=1]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[t<1]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>t</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>.</p>
+    </list-item>
+    <list-item>
+      <p><bold>User-defined hyperparameters</bold> - A complete list of
+      hyperparameters with a description of their functionality can be
+      found in the Appendix.</p>
+    </list-item>
+  </list>
+  <sec id="related-software-modules-and-packages-for-variational-inference">
+    <title>Related software modules and packages for variational
+    inference</title>
+    <p>Other Python modules and packages were found to provide an
+    implementation of variational inference with a number of additional
+    features. An incomplete list of these packages is reported
+    below.</p>
+    <list list-type="bullet">
+      <list-item>
+        <p><ext-link ext-link-type="uri" xlink:href="https://www.pymc.io/projects/examples/en/latest/variational_inference/variational_api_quickstart.html">PyMC</ext-link>
+        (<xref alt="Abril-Pla et al., 2023" rid="ref-abril2023pymc" ref-type="bibr">Abril-Pla
+        et al., 2023</xref>).</p>
+      </list-item>
+      <list-item>
+        <p><ext-link ext-link-type="uri" xlink:href="https://bayespy.org/">BayesPy</ext-link>
+        (<xref alt="Luttinen, 2016" rid="ref-luttinen2016bayespy" ref-type="bibr">Luttinen,
+        2016</xref>) (with an accompanying paper,
+        <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1410.0870">BayesPy:
+        Variational Bayesian Inference in Python</ext-link>).</p>
+      </list-item>
+      <list-item>
+        <p><ext-link ext-link-type="uri" xlink:href="https://docs.pyro.ai/en/stable/inference.html">Pyro</ext-link>
+        (<xref alt="Bingham et al., 2019" rid="ref-bingham2019pyro" ref-type="bibr">Bingham
+        et al., 2019</xref>) (with
+        <ext-link ext-link-type="uri" xlink:href="http://pyro.ai/examples/svi_part_i.html">some
+        examples</ext-link>).</p>
+      </list-item>
+      <list-item>
+        <p><ext-link ext-link-type="uri" xlink:href="https://acerbilab.github.io/pyvbmc/">PyVBMC</ext-link>
+        (<xref alt="Huggins et al., 2023" rid="ref-huggins2023pyvbmc" ref-type="bibr">Huggins
+        et al., 2023</xref>) with accompanying
+        <ext-link ext-link-type="uri" xlink:href="https://joss.theoj.org/papers/10.21105/joss.05428">JOSS
+        article</ext-link>.</p>
+      </list-item>
+      <list-item>
+        <p>Online notebooks (see this
+        <ext-link ext-link-type="uri" xlink:href="https://predictivesciencelab.github.io/data-analytics-se/lecture28/hands-on-28.html">example</ext-link>)
+        which implement variational inference from scratch in
+        <monospace>pytorch</monospace>.</p>
+      </list-item>
+    </list>
+    <p>LINFA is based on normalizing flow transformations and therefore
+    can infer non linear parameter dependence. It also provides the
+    ability to adaptively train a surrogate model (NoFAS) which
+    significantly reduces the computational cost of inference for the
+    parameters of expensive computational models. Finally, LINFA
+    provides an adaptive annealing algorithm (AdaAnn) which autonomously
+    selects the appropriate annealing steps based on the current
+    approximation of the posterior distribution.</p>
+  </sec>
+</sec>
+<sec id="numerical-benchmarks">
+  <title>Numerical benchmarks</title>
+  <p>We tested LINFA on multiple problems. These include inference on
+  unimodal and multi-modal posterior distributions specified in closed
+  form, ordinary differential models and dynamical systems with
+  gradients directly computed through automatic differentiation in
+  PyTorch, identifiable and non-identifiable physics-based models with
+  fixed and adaptive surrogates, and high-dimensional statistical
+  models. Some of the above tests are included with the library and
+  systematically tested using GitHub Actions. A detailed discussion of
+  these test cases is provided in the Appendix. To run the test type</p>
+  <preformat>python -m unittest linfa.linfa_test_suite.NAME_example</preformat>
+  <p>where <monospace>NAME</monospace> is the name of the test case,
+  either <monospace>trivial</monospace>, <monospace>highdim</monospace>,
+  <monospace>rc</monospace>, <monospace>rcr</monospace>,
+  <monospace>adaann</monospace> or
+  <monospace>rcr_nofas_adaann</monospace>.</p>
+</sec>
+<sec id="conclusion-and-future-work">
+  <title>Conclusion and Future Work</title>
+  <p>In this paper, we have introduced the LINFA library for variational
+  inference, briefly discussed the relevant background, its
+  capabilities, and report its performance on a number of test cases.
+  Some interesting directions for future work are mentioned below.</p>
+  <p>Future versions will support user-defined privacy-preserving
+  synthetic data generation and variational inference through
+  differentially private gradient descent algorithms. This will allow
+  the user to perform inference tasks while preserving a pre-defined
+  privacy budget, as discussed in
+  (<xref alt="Su et al., 2023" rid="ref-su2023differentially" ref-type="bibr">Su
+  et al., 2023</xref>). LINFA will also be extended to handle multiple
+  models. This will open new possibilities to solve inverse problems
+  combining variational inference and multi-fidelity surrogates (see,
+  e.g., Siahkoohi et al.
+  (<xref alt="2021" rid="ref-siahkoohi2021preconditioned" ref-type="bibr">2021</xref>)).
+  In addition, for inverse problems with significant dependence among
+  the parameters, it is often possible to simplify the inference task by
+  operating on manifolds of reduced dimensionality
+  (<xref alt="Brennan et al., 2020" rid="ref-brennan2020greedy" ref-type="bibr">Brennan
+  et al., 2020</xref>). New modules for dimensionality reduction will be
+  developed and integrated with the LINFA library. Finally, the ELBO
+  loss typically used in variational inference has known limitations,
+  some of which are related to its close connection with the KL
+  divergence. Future versions of LINFA will provide the option to use
+  alternative losses.</p>
+</sec>
+<sec id="acknowledgements">
+  <title>Acknowledgements</title>
+  <p>The authors gratefully acknowledge the support by the NSF Big Data
+  Science &amp; Engineering grant #1918692 and the computational
+  resources provided through the Center for Research Computing at the
+  University of Notre Dame. DES also acknowledges support from NSF
+  CAREER grant #1942662.</p>
+</sec>
+<sec id="appendix">
+  <title>Appendix</title>
+  <sec id="background-theory">
+    <title>Background theory</title>
+    <sec id="variational-inference-with-normalizing-flow">
+      <title>Variational inference with normalizing flow</title>
+      <p>Consider the problem of estimating (in a Bayesian sense) the
+      parameters <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{z}\in\boldsymbol{\mathcal{Z}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐳</mml:mi><mml:mo>∈</mml:mo><mml:mi>𝒵</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      of a physics-based or statistical model
+      <disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \boldsymbol{x} = \boldsymbol{f}(\boldsymbol{z}) + \boldsymbol{\varepsilon},
+      ]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐱</mml:mi><mml:mo>=</mml:mo><mml:mi>𝐟</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mi>𝛆</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula>
+      from the observations <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}\in\boldsymbol{\mathcal{X}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐱</mml:mi><mml:mo>∈</mml:mo><mml:mi>𝒳</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      and a known statistical characterization of the error
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\varepsilon}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝛆</mml:mi></mml:math></alternatives></inline-formula>.
+      We tackle this problem with variational inference and normalizing
+      flow. A normalizing flow (NF) is a nonlinear transformation
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[F:\mathbb{R}^{d}\times \boldsymbol{\Lambda} \to \mathbb{R}^{d}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>F</mml:mi><mml:mo>:</mml:mo><mml:msup><mml:mi>ℝ</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:mo>×</mml:mo><mml:mi>𝚲</mml:mi><mml:mo>→</mml:mo><mml:msup><mml:mi>ℝ</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      designed to map an easy-to-sample <italic>base</italic>
+      distribution <inline-formula><alternatives>
+      <tex-math><![CDATA[q_{0}(\boldsymbol{z}_{0})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      into a close approximation <inline-formula><alternatives>
+      <tex-math><![CDATA[q_{K}(\boldsymbol{z}_{K})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      of a desired target posterior density
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol{z}|\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="false" form="prefix">|</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>.
+      This transformation can be determined by composing
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[K]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>K</mml:mi></mml:math></alternatives></inline-formula>
+      bijections <disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \boldsymbol{z}_{K} = F(\boldsymbol{z}_{0}) = F_{K} \circ F_{K-1} \circ \cdots \circ F_{k} \circ \cdots \circ F_{1}(\boldsymbol{z}_{0}),
+      ]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>F</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>∘</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>∘</mml:mo><mml:mi>⋯</mml:mi><mml:mo>∘</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>∘</mml:mo><mml:mi>⋯</mml:mi><mml:mo>∘</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula>
+      and evaluating the transformed density through the change of
+      variable formula (see Villani &amp; others
+      (<xref alt="2009" rid="ref-villani2009optimal" ref-type="bibr">2009</xref>)).</p>
+      <p>In the context of variational inference, we seek to determine
+      an <italic>optimal</italic> set of parameters
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\lambda}\in\boldsymbol{\Lambda}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝛌</mml:mi><mml:mo>∈</mml:mo><mml:mi>𝚲</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      so that <inline-formula><alternatives>
+      <tex-math><![CDATA[q_{K}(\boldsymbol{z}_{K})\approx p(\boldsymbol{z}|\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>≈</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="false" form="prefix">|</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>.
+      Given observations <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}\in\mathcal{\boldsymbol{X}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐱</mml:mi><mml:mo>∈</mml:mo><mml:mi>𝐗</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>,
+      a likelihood function <inline-formula><alternatives>
+      <tex-math><![CDATA[l_{\boldsymbol{z}}(\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>l</mml:mi><mml:mi>𝐳</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      (informed by the distribution of the error
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\varepsilon}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝛆</mml:mi></mml:math></alternatives></inline-formula>)
+      and prior <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol{z})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      a NF-based approximation <inline-formula><alternatives>
+      <tex-math><![CDATA[q_K(\boldsymbol{z})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      of the posterior distribution <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol{z}|\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="false" form="prefix">|</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      can be computed by maximizing the lower bound to the log marginal
+      likelihood <inline-formula><alternatives>
+      <tex-math><![CDATA[\log p(\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo>log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      (the so-called <italic>evidence lower bound</italic> or ELBO), or,
+      equivalently, by minimizing a <italic>free energy bound</italic>
+      (see, e.g., Rezende &amp; Mohamed
+      (<xref alt="2015" rid="ref-rezende2015variational" ref-type="bibr">2015</xref>)).</p>
+      <p><named-content id="equU003AELBO" content-type="equation"><disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \begin{split}
+      \mathcal{F}(\boldsymbol x)& = \mathbb{E}_{q_K(\boldsymbol z_K)}\left[\log q_K(\boldsymbol z_K) - \log p(\boldsymbol x, \boldsymbol z_K)\right]\\
+      & = \mathbb{E}_{q_0(\boldsymbol z_0)}[\log q_0(\boldsymbol z_0)] - \mathbb{E}_{q_0(\boldsymbol z_0)}[\log p(\boldsymbol x, \boldsymbol z_K)] - \mathbb{E}_{q_0(\boldsymbol z_0)}\left[\sum_{k=1}^K \log \left|\det \frac{\partial \boldsymbol z_k}{\partial \boldsymbol z_{k-1}}\right|\right].
+      \end{split}]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mtable><mml:mtr><mml:mtd columnalign="right" style="text-align: right"><mml:mi>ℱ</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mtd><mml:mtd columnalign="left" style="text-align: left"><mml:mo>=</mml:mo><mml:msub><mml:mi>𝔼</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mo>log</mml:mo><mml:msub><mml:mi>q</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>−</mml:mo><mml:mo>log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd columnalign="right" style="text-align: right"></mml:mtd><mml:mtd columnalign="left" style="text-align: left"><mml:mo>=</mml:mo><mml:msub><mml:mi>𝔼</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mo>log</mml:mo><mml:msub><mml:mi>q</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow><mml:mo>−</mml:mo><mml:msub><mml:mi>𝔼</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mo>log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow><mml:mo>−</mml:mo><mml:msub><mml:mi>𝔼</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:munderover><mml:mo>∑</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mo>log</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">|</mml:mo><mml:mo>det</mml:mo><mml:mfrac><mml:mrow><mml:mi>∂</mml:mi><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mi>∂</mml:mi><mml:msub><mml:mi>𝐳</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo stretchy="true" form="postfix">|</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow><mml:mi>.</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives></disp-formula></named-content></p>
+      <p>For computational convenience, normalizing flow transformations
+      are selected to be easily invertible and their Jacobian
+      determinant can be computed with a cost that grows linearly with
+      the problem dimensionality. Approaches in the literature include
+      RealNVP
+      (<xref alt="Dinh et al., 2016" rid="ref-dinh2016density" ref-type="bibr">Dinh
+      et al., 2016</xref>), GLOW
+      (<xref alt="Kingma &amp; Dhariwal, 2018" rid="ref-kingma2018glow" ref-type="bibr">Kingma
+      &amp; Dhariwal, 2018</xref>), and autoregressive transformations
+      such as MAF
+      (<xref alt="Papamakarios et al., 2017" rid="ref-papamakarios2018masked" ref-type="bibr">Papamakarios
+      et al., 2017</xref>) and IAF
+      (<xref alt="Kingma et al., 2016" rid="ref-kingma2016improved" ref-type="bibr">Kingma
+      et al., 2016</xref>). Detailed reviews on a wide range of flow
+      formulations can be found in Kobyzev et al.
+      (<xref alt="2020" rid="ref-kobyzev2020normalizing" ref-type="bibr">2020</xref>)
+      and Papamakarios et al.
+      (<xref alt="2021" rid="ref-papamakarios2021normalizing" ref-type="bibr">2021</xref>).</p>
+    </sec>
+    <sec id="maf-and-realnvp">
+      <title>MAF and RealNVP</title>
+      <p>LINFA implements two widely used normalizing flow formulations,
+      MAF
+      (<xref alt="Papamakarios et al., 2017" rid="ref-papamakarios2018masked" ref-type="bibr">Papamakarios
+      et al., 2017</xref>) and RealNVP
+      (<xref alt="Dinh et al., 2016" rid="ref-dinh2016density" ref-type="bibr">Dinh
+      et al., 2016</xref>). MAF belongs to the class of
+      <italic>autoregressive</italic> normalizing flows. Given the
+      latent variable <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{z} = (z_{1},z_{2},\dots,z_{d})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐳</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      it assumes <inline-formula><alternatives>
+      <tex-math><![CDATA[p(z_i|z_{1},\dots,z_{i-1}) = \phi[(z_i - \mu_i) / e^{\alpha_i}]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false" form="prefix">|</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>ϕ</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>/</mml:mi><mml:msup><mml:mi>e</mml:mi><mml:msub><mml:mi>α</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msup><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[\phi]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>ϕ</mml:mi></mml:math></alternatives></inline-formula>
+      is the standard normal distribution,
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\mu_i = f_{\mu_i}(z_{1},\dots,z_{i-1})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\alpha_i = f_{\alpha_i}(z_{1},\dots,z_{i-1}),\,i=1,2,\dots,d]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>α</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:msub><mml:mi>α</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="0.167em"></mml:mspace><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>,
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[f_{\mu_i}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>f</mml:mi><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[f_{\alpha_i}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>f</mml:mi><mml:msub><mml:mi>α</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>
+      are masked autoencoder neural networks (MADE, Germain et al.
+      (<xref alt="2015" rid="ref-germain2015made" ref-type="bibr">2015</xref>)).
+      In a MADE autoencoder the network connectivities are multiplied by
+      Boolean masks so the input-output relation maintains a lower
+      triangular structure, making the computation of the Jacobian
+      determinant particularly simple. MAF transformations are then
+      composed of multiple MADE layers, possibly interleaved by batch
+      normalization layers
+      (<xref alt="Ioffe &amp; Szegedy, 2015" rid="ref-ioffe2015batch" ref-type="bibr">Ioffe
+      &amp; Szegedy, 2015</xref>), typically used to add stability
+      during training and increase network accuracy
+      (<xref alt="Papamakarios et al., 2017" rid="ref-papamakarios2018masked" ref-type="bibr">Papamakarios
+      et al., 2017</xref>).</p>
+      <p>RealNVP is another widely used flow where, at each layer the
+      first <inline-formula><alternatives>
+      <tex-math><![CDATA[d']]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>d</mml:mi><mml:mi>′</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      variables are left unaltered while the remaining
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[d-d']]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>d</mml:mi><mml:mo>−</mml:mo><mml:mi>d</mml:mi><mml:mi>′</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      are subject to an affine transformation of the form
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\widehat{\boldsymbol{z}}_{d'+1:d} = \boldsymbol{z}_{d'+1:d}\,\odot\,e^{\boldsymbol{\alpha}} + \boldsymbol{\mu}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mover><mml:mi>𝐳</mml:mi><mml:mo accent="true">̂</mml:mo></mml:mover><mml:mrow><mml:mi>d</mml:mi><mml:mi>′</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>′</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mspace width="0.167em"></mml:mspace><mml:mo>⊙</mml:mo><mml:mspace width="0.167em"></mml:mspace><mml:msup><mml:mi>e</mml:mi><mml:mi>𝛂</mml:mi></mml:msup><mml:mo>+</mml:mo><mml:mi>𝛍</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\mu} = f_{\mu}(\boldsymbol{z}_{1:d'})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝛍</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>μ</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mi>d</mml:mi><mml:mi>′</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\alpha} = f_{\alpha}(\boldsymbol{z}_{d'+1:d})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝛂</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>α</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>′</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      are MADE autoencoders. In this context, MAF could be seen as a
+      generalization of RealNVP by setting
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\mu_i=\alpha_i=0]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>α</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      for <inline-formula><alternatives>
+      <tex-math><![CDATA[i\leq d']]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>i</mml:mi><mml:mo>≤</mml:mo><mml:mi>d</mml:mi><mml:mi>′</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      (<xref alt="Papamakarios et al., 2017" rid="ref-papamakarios2018masked" ref-type="bibr">Papamakarios
+      et al., 2017</xref>).</p>
+    </sec>
+    <sec id="normalizing-flow-with-adaptive-surrogate-nofas">
+      <title>Normalizing flow with adaptive surrogate (NoFAS)</title>
+      <p>LINFA is designed to accommodate black-box models
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{f}: \boldsymbol{\mathcal{Z}} \to \boldsymbol{\mathcal{X}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐟</mml:mi><mml:mo>:</mml:mo><mml:mi>𝒵</mml:mi><mml:mo>→</mml:mo><mml:mi>𝒳</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      between the random inputs <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{z} = (z_1, z_2, \cdots, z_d)^T \in \boldsymbol{\mathcal{Z}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐳</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>⋯</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup><mml:mo>∈</mml:mo><mml:mi>𝒵</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      and the outputs <inline-formula><alternatives>
+      <tex-math><![CDATA[(x_1, x_2,\cdots,x_m)^T \in \boldsymbol{\mathcal{X}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>⋯</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup><mml:mo>∈</mml:mo><mml:mi>𝒳</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>,
+      and assumes <inline-formula><alternatives>
+      <tex-math><![CDATA[n]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>n</mml:mi></mml:math></alternatives></inline-formula>
+      observations <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol x = \{\boldsymbol x_i\}_{i=1}^n \subset \boldsymbol{\mathcal{X}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐱</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false" form="prefix">{</mml:mo><mml:msub><mml:mi>𝐱</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msubsup><mml:mo stretchy="false" form="postfix">}</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:msubsup><mml:mo>⊂</mml:mo><mml:mi>𝒳</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      to be available. Our goal is to infer
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol z]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐳</mml:mi></mml:math></alternatives></inline-formula>
+      and to quantify its uncertainty given
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐱</mml:mi></mml:math></alternatives></inline-formula>.
+      We embrace a variational Bayesian paradigm and sample from the
+      posterior distribution <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol z\vert \boldsymbol x)\propto \ell_{\boldsymbol z}(\boldsymbol x,\boldsymbol{f})\,p(\boldsymbol z)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="false" form="postfix">|</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>∝</mml:mo><mml:msub><mml:mo>ℓ</mml:mo><mml:mi>𝐳</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐟</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mspace width="0.167em"></mml:mspace><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      with prior <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol z)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      via normalizing flows.</p>
+      <p>This requires the evaluation of the gradient of the ELBO
+      <xref alt="1" rid="equU003AELBO">1</xref> with respect to the NF
+      parameters <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\lambda}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝛌</mml:mi></mml:math></alternatives></inline-formula>,
+      replacing <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol x, \boldsymbol z_K)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      with <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol x\vert\boldsymbol z_K)\,p(\boldsymbol z)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="false" form="postfix">|</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mspace width="0.167em"></mml:mspace><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[=\ell_{\boldsymbol z_K}(\boldsymbol{x},\boldsymbol{f})\,p(\boldsymbol z)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mo>ℓ</mml:mo><mml:msub><mml:mi>𝐳</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐱</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐟</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mspace width="0.167em"></mml:mspace><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      and approximating the expectations with their MC estimates.
+      However, the likelihood function needs to be evaluated at every MC
+      realization, which can be costly if the model
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{f}(\boldsymbol{z})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐟</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      is computationally expensive. In addition, automatic
+      differentiation through a legacy (e.g. physics-based) solver may
+      be an impractical, time-consuming, or require the development of
+      an adjoint solver.</p>
+      <p>Our solution is to replace the model
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{f}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐟</mml:mi></mml:math></alternatives></inline-formula>
+      with a computationally inexpensive surrogate
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\widehat{\boldsymbol{f}}: \boldsymbol{\mathcal{Z}} \times \boldsymbol{\mathcal{W}} \to \boldsymbol{\mathcal{X}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mover><mml:mi>𝐟</mml:mi><mml:mo accent="true">̂</mml:mo></mml:mover><mml:mo>:</mml:mo><mml:mi>𝒵</mml:mi><mml:mo>×</mml:mo><mml:mi>𝒲</mml:mi><mml:mo>→</mml:mo><mml:mi>𝒳</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      parameterized by the weigths <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{w} \in \boldsymbol{\mathcal{W}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐰</mml:mi><mml:mo>∈</mml:mo><mml:mi>𝒲</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>,
+      whose derivatives can be obtained at a relatively low
+      computational cost, but intrinsic bias in the selected surrogate
+      formulation, a limited number of training examples, and locally
+      optimal <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{w}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐰</mml:mi></mml:math></alternatives></inline-formula>
+      can compromise the accuracy of <inline-formula><alternatives>
+      <tex-math><![CDATA[\widehat{\boldsymbol{f}}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mover><mml:mi>𝐟</mml:mi><mml:mo accent="true">̂</mml:mo></mml:mover></mml:math></alternatives></inline-formula>.</p>
+      <p>To resolve these issues, LINFA implements NoFAS, which updates
+      the surrogate model adaptively by smartly weighting the samples of
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{z}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐳</mml:mi></mml:math></alternatives></inline-formula>
+      from NF thanks to a <italic>memory-aware</italic> loss function.
+      Once a newly updated surrogate is obtained, the likelihood
+      function is updated, leading to a new posterior distribution that
+      will be approximated by VI-NF, producing, in turn, new samples for
+      the next surrogate model update, and so on. Additional details can
+      be found in Wang et al.
+      (<xref alt="2022" rid="ref-wang2022variational" ref-type="bibr">2022</xref>).</p>
+    </sec>
+    <sec id="adaptive-annealing">
+      <title>Adaptive Annealing</title>
+      <p>Annealing is a technique to parametrically smooth a target
+      density to improve sampling efficiency and accuracy during
+      inference. In the discrete case, this is achieved by incrementing
+      an <italic>inverse temperature</italic>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      and setting <inline-formula><alternatives>
+      <tex-math><![CDATA[p_k(\boldsymbol{z},\boldsymbol{x}) = p^{t_k}(\boldsymbol{z},\boldsymbol{x}),\,\,\text{for } k=0,\dots,K]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mi>p</mml:mi><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="0.167em"></mml:mspace><mml:mspace width="0.167em"></mml:mspace><mml:mrow><mml:mtext mathvariant="normal">for </mml:mtext><mml:mspace width="0.333em"></mml:mspace></mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:mi>K</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[0 < t_{0} < \cdots < t_{K} \le 1]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>0</mml:mn><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>&lt;</mml:mo><mml:mi>⋯</mml:mi><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>≤</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>.
+      The result of exponentiation produces a smooth unimodal
+      distribution for a sufficiently small
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[t_0]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:math></alternatives></inline-formula>,
+      recovering the target density as <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      approaches 1. In other words, annealing provides a continuous
+      deformation from an easier to approximate unimodal distribution to
+      a desired target density.</p>
+      <p>A linear annealing scheduler with fixed temperature increments
+      is often used in practice (see, e.g., Rezende &amp; Mohamed
+      (<xref alt="2015" rid="ref-rezende2015variational" ref-type="bibr">2015</xref>)),
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[t_j=t_{0} + j (1-t_{0})/K]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:mi>j</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>/</mml:mi><mml:mi>K</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      for <inline-formula><alternatives>
+      <tex-math><![CDATA[j=0,\ldots,K]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:mi>K</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      with constant increments <inline-formula><alternatives>
+      <tex-math><![CDATA[\epsilon = (1-t_{0})/K]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>ϵ</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>/</mml:mi><mml:mi>K</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>.
+      Intuitively, small temperature changes are desirable to carefully
+      explore the parameter spaces at the beginning of the annealing
+      process, whereas larger changes can be taken as
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      increases, after annealing has helped to capture important
+      features of the target distribution (e.g., locating all the
+      relevant modes).</p>
+      <p>The AdaAnn scheduler determines the increment
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\epsilon_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      that approximately produces a pre-defined change in the KL
+      divergence between two distributions annealed
+      at~<inline-formula><alternatives>
+      <tex-math><![CDATA[t_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{k+1}=t_{k}+\epsilon_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>,
+      respectively. Letting the KL divergence equal a constant
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\tau^2/2]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>τ</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mi>/</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[\tau]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>τ</mml:mi></mml:math></alternatives></inline-formula>
+      is referred to as the <italic>KL tolerance</italic>, the step size
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\epsilon_k]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      becomes</p>
+      <p><named-content id="equU003Aadaann" content-type="equation"><disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \epsilon_k = \tau/ \sqrt{\mathbb{V}_{p^{t_k}}[\log p(\boldsymbol z,\boldsymbol{x})]}.]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>τ</mml:mi><mml:mi>/</mml:mi><mml:msqrt><mml:mrow><mml:msub><mml:mi>𝕍</mml:mi><mml:msup><mml:mi>p</mml:mi><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:msup></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mo>log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mrow></mml:msqrt><mml:mi>.</mml:mi></mml:mrow></mml:math></alternatives></disp-formula></named-content></p>
+      <p>The denominator is large when the support of the annealed
+      distribution <inline-formula><alternatives>
+      <tex-math><![CDATA[p^{t_{k}}(\boldsymbol{z},\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>p</mml:mi><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      is wider than the support of the target
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol{z},\boldsymbol{x})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo>,</mml:mo><mml:mi>𝐱</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      and progressively reduces with increasing
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>.
+      Further detail on the derivation of the expression for
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\epsilon_{k}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      can be found in Cobian et al.
+      (<xref alt="2023" rid="ref-cobian2023adaann" ref-type="bibr">2023</xref>).</p>
+    </sec>
+  </sec>
+  <sec id="numerical-benchmarks-1">
+    <title>Numerical benchmarks</title>
+    <sec id="simple-two-dimensional-map-with-gaussian-likelihood">
+      <title>Simple two-dimensional map with Gaussian likelihood</title>
+      <p>A model <inline-formula><alternatives>
+      <tex-math><![CDATA[f:\mathbb{R}^{2}\to \mathbb{R}^{2}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>f</mml:mi><mml:mo>:</mml:mo><mml:msup><mml:mi>ℝ</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>→</mml:mo><mml:msup><mml:mi>ℝ</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      is chosen in this experiment having the closed-form expression
+      <disp-formula><alternatives>
+      <tex-math><![CDATA[
+      f(\boldsymbol z) = f(z_{1},z_{2}) = (z_1^3 / 10 + \exp(z_2 / 3), z_1^3 / 10 - \exp(z_2 / 3))^T.
+      ]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mn>1</mml:mn><mml:mn>3</mml:mn></mml:msubsup><mml:mi>/</mml:mi><mml:mn>10</mml:mn><mml:mo>+</mml:mo><mml:mo>exp</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mi>/</mml:mi><mml:mn>3</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mn>1</mml:mn><mml:mn>3</mml:mn></mml:msubsup><mml:mi>/</mml:mi><mml:mn>10</mml:mn><mml:mo>−</mml:mo><mml:mo>exp</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mi>/</mml:mi><mml:mn>3</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup><mml:mi>.</mml:mi></mml:mrow></mml:math></alternatives></disp-formula>
+      Observations <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐱</mml:mi></mml:math></alternatives></inline-formula>
+      are generated as</p>
+      <p><named-content id="eqnU003Aexp1" content-type="equation"><disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \boldsymbol{x} = \boldsymbol{x}^{*} + 0.05\,|\boldsymbol{x}^{*}|\,\odot\boldsymbol{x}_{0},]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐱</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>+</mml:mo><mml:mn>0.05</mml:mn><mml:mspace width="0.167em"></mml:mspace><mml:mrow><mml:mo stretchy="true" form="prefix">|</mml:mo><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">|</mml:mo></mml:mrow><mml:mspace width="0.167em"></mml:mspace><mml:mo>⊙</mml:mo><mml:msub><mml:mi>𝐱</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula></named-content></p>
+      <p>where <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}_{0} \sim \mathcal{N}(0,\boldsymbol I_2)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>𝐱</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>∼</mml:mo><mml:mi>𝒩</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>𝐈</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[\odot]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mo>⊙</mml:mo></mml:math></alternatives></inline-formula>
+      is the Hadamard product. We set the <italic>true</italic> model
+      parameters at <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{z}^{*} = (3, 5)^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>,
+      with output <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}^{*} = f(\boldsymbol z^{*})=(7.99, -2.59)^{T}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>7.99</mml:mn><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mn>2.59</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>,
+      and simulate 50 sets of observations from
+      <xref alt="3" rid="eqnU003Aexp1">3</xref>. The likelihood of
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol z]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐳</mml:mi></mml:math></alternatives></inline-formula>
+      given <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐱</mml:mi></mml:math></alternatives></inline-formula>
+      is assumed Gaussian, and we adopt a noninformative uniform prior
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol z)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>.
+      We allocate a budget of <inline-formula><alternatives>
+      <tex-math><![CDATA[4\times4=16]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>4</mml:mn><mml:mo>×</mml:mo><mml:mn>4</mml:mn><mml:mo>=</mml:mo><mml:mn>16</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      model solutions to the pre-grid and use the rest to adaptively
+      calibrate <inline-formula><alternatives>
+      <tex-math><![CDATA[\widehat{f}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mover><mml:mi>f</mml:mi><mml:mo accent="true">̂</mml:mo></mml:mover></mml:math></alternatives></inline-formula>
+      using <inline-formula><alternatives>
+      <tex-math><![CDATA[2]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mn>2</mml:mn></mml:math></alternatives></inline-formula>
+      samples every <inline-formula><alternatives>
+      <tex-math><![CDATA[1000]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mn>1000</mml:mn></mml:math></alternatives></inline-formula>
+      normalizing flow iterations.</p>
+      <p>Results in terms of loss profile, variational approximation,
+      and posterior predictive distribution are shown in
+      <xref alt="[fig:trivial]" rid="figU003Atrivial">[fig:trivial]</xref>.</p>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/dc902429c568a01233895f939fde04beefadfa2c.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/c3535b90c591a17ed3ac29cf77228ab16ca394a5.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/525926ee9a119ed1cdade2148856af20429d5d8c.png" /></p>
+      <fig id="figU003Atrivial">
+        <caption><p>Results from the simple two-dimensional map. Loss
+        profile (left), posterior samples (center), and posterior
+        predictive distribution (right).</p></caption>
+      </fig>
+    </sec>
+    <sec id="high-dimensional-example">
+      <title>High-dimensional example</title>
+      <p>We consider a map <inline-formula><alternatives>
+      <tex-math><![CDATA[f: \mathbb{R}^{5}\to\mathbb{R}^{4}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>f</mml:mi><mml:mo>:</mml:mo><mml:msup><mml:mi>ℝ</mml:mi><mml:mn>5</mml:mn></mml:msup><mml:mo>→</mml:mo><mml:msup><mml:mi>ℝ</mml:mi><mml:mn>4</mml:mn></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      expressed as <disp-formula><alternatives>
+      <tex-math><![CDATA[
+      f(\boldsymbol{z}) = \boldsymbol{A}\,\boldsymbol{g}(e^{\boldsymbol{z}}),
+      ]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐳</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>𝐀</mml:mi><mml:mspace width="0.167em"></mml:mspace><mml:mi>𝐠</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mi>𝐳</mml:mi></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula>
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[g_i(\boldsymbol{r}) = (2\cdot |2\,a_{i} - 1| + r_i) / (1 + r_i)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝐫</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>2</mml:mn><mml:mo>⋅</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">|</mml:mo><mml:mn>2</mml:mn><mml:mspace width="0.167em"></mml:mspace><mml:msub><mml:mi>a</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="true" form="postfix">|</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>/</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      with <inline-formula><alternatives>
+      <tex-math><![CDATA[r_i > 0]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&gt;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      for <inline-formula><alternatives>
+      <tex-math><![CDATA[i=1,\dots,5]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      is the <italic>Sobol’</italic> function
+      (<xref alt="Sobol’, 2003" rid="ref-sobol2003theorems" ref-type="bibr">Sobol’,
+      2003</xref>) and <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{A}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐀</mml:mi></mml:math></alternatives></inline-formula>
+      is a <inline-formula><alternatives>
+      <tex-math><![CDATA[4\times5]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>4</mml:mn><mml:mo>×</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      matrix. We also set <disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \boldsymbol{a} = (0.084, 0.229, 0.913, 0.152, 0.826)^T \text{ and }\boldsymbol{A} = \frac{1}{\sqrt{2}}
+      \begin{pmatrix}
+      1 & 1 & 0 & 0 & 0\\
+      0 & 1 & 1 & 0 & 0\\
+      0 & 0 & 1 & 1 & 0\\
+      0 & 0 & 0 & 1 & 1\\
+      \end{pmatrix}.
+      ]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐚</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>0.084</mml:mn><mml:mo>,</mml:mo><mml:mn>0.229</mml:mn><mml:mo>,</mml:mo><mml:mn>0.913</mml:mn><mml:mo>,</mml:mo><mml:mn>0.152</mml:mn><mml:mo>,</mml:mo><mml:mn>0.826</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup><mml:mrow><mml:mspace width="0.333em"></mml:mspace><mml:mtext mathvariant="normal"> and </mml:mtext><mml:mspace width="0.333em"></mml:mspace></mml:mrow><mml:mi>𝐀</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:msqrt><mml:mn>2</mml:mn></mml:msqrt></mml:mfrac><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mtable><mml:mtr><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="center" style="text-align: center"><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>.</mml:mi></mml:mrow></mml:math></alternatives></disp-formula>
+      The true parameter vector is <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{z}^{*} = (2.75,]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mo stretchy="false" form="prefix">(</mml:mo><mml:mn>2.75</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[-1.5, 0.25,]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo>−</mml:mo><mml:mn>1.5</mml:mn><mml:mo>,</mml:mo><mml:mn>0.25</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[-2.5,]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo>−</mml:mo><mml:mn>2.5</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[1.75)^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>1.75</mml:mn><mml:msup><mml:mo stretchy="false" form="postfix">)</mml:mo><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>.
+      While the Sobol’ function is bijective and analytic,
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[f]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>f</mml:mi></mml:math></alternatives></inline-formula>
+      is over-parameterized and non identifiabile. This is also
+      confirmed by the fact that the curve segment
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\gamma(t) = g^{-1}(g(\boldsymbol z^*) + \boldsymbol v\,t)\in Z]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>γ</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mi>g</mml:mi><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mi>𝐯</mml:mi><mml:mspace width="0.167em"></mml:mspace><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>∈</mml:mo><mml:mi>Z</mml:mi></mml:mrow></mml:math></alternatives></inline-formula>
+      gives the same model solution as <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}^{*} = f(\boldsymbol{z}^{*}) = f(\gamma(t)) \approx (1.4910,]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>γ</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>≈</mml:mo><mml:mo stretchy="false" form="prefix">(</mml:mo><mml:mn>1.4910</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[1.6650,]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>1.6650</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[1.8715,]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>1.8715</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[1.7011)^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>1.7011</mml:mn><mml:msup><mml:mo stretchy="false" form="postfix">)</mml:mo><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      for <inline-formula><alternatives>
+      <tex-math><![CDATA[t \in (-0.0153, 0.0686]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>t</mml:mi><mml:mo>∈</mml:mo><mml:mo stretchy="false" form="prefix">(</mml:mo><mml:mo>−</mml:mo><mml:mn>0.0153</mml:mn><mml:mo>,</mml:mo><mml:mn>0.0686</mml:mn><mml:mo stretchy="false" form="postfix">]</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol v = (1,-1,1,-1,1)^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐯</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>−</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>.
+      This is consistent with the one-dimensional null-space of the
+      matrix <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol A]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐀</mml:mi></mml:math></alternatives></inline-formula>.
+      We also generate synthetic observations from the Gaussian
+      distribution <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x} = \boldsymbol{x}^{*} + 0.01\cdot |\boldsymbol{x}^{*}| \odot \boldsymbol{x}_{0}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝐱</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>+</mml:mo><mml:mn>0.01</mml:mn><mml:mo>⋅</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">|</mml:mo><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">|</mml:mo></mml:mrow><mml:mo>⊙</mml:mo><mml:msub><mml:mi>𝐱</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>
+      with <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}_{0} \sim \mathcal{N}(0,\boldsymbol I_5)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>𝐱</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>∼</mml:mo><mml:mi>𝒩</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>𝐈</mml:mi><mml:mn>5</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      and results shown in
+      <xref alt="[fig:highdim]" rid="figU003Ahighdim">[fig:highdim]</xref>.</p>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/4c4bffb56a97ca19317250a2bc63fac79dbef607.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/8c0395f4f978a6a90a815298a1948c4239bce4a2.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/513ae401afb64dbf332607f3b0287961e9b0269d.png" /></p>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/65f4baeaf3d37ac4ed61631d0210ef5ded70b6d0.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/756a55cf8967e719623055cedf2fda47c9c66404.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/7fac50cefd5bf44f00401262143b48def080ecd1.png" /></p>
+      <fig id="figU003Ahighdim">
+        <caption><p>Results from the high-dimensional example. The top
+        row contains the loss profile (left) and samples from the
+        posterior predictive distribution plus the available
+        observations (right). Samples from the posterior distribution
+        are instead shown in the bottom row.</p></caption>
+      </fig>
+    </sec>
+    <sec id="two-element-windkessel-model">
+      <title>Two-element Windkessel Model</title>
+      <p>The two-element Windkessel model (often referred to as the
+      <italic>RC</italic> model) is the simplest representation of the
+      human systemic circulation and requires two parameters, i.e., a
+      resistance <inline-formula><alternatives>
+      <tex-math><![CDATA[R \in [100, 1500]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>R</mml:mi><mml:mo>∈</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mn>100</mml:mn><mml:mo>,</mml:mo><mml:mn>1500</mml:mn><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      Barye<inline-formula><alternatives>
+      <tex-math><![CDATA[\cdot]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mo>⋅</mml:mo></mml:math></alternatives></inline-formula>
+      s/ml and a capacitance <inline-formula><alternatives>
+      <tex-math><![CDATA[C \in [1\times 10^{-5}, 1 \times 10^{-2}]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>C</mml:mi><mml:mo>∈</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mn>1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      ml/Barye. We provide a periodic time history of the aortic flow
+      (see Wang et al.
+      (<xref alt="2022" rid="ref-wang2022variational" ref-type="bibr">2022</xref>)
+      for additional details) and use the RC model to predict the time
+      history of the proximal pressure <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{p}(t)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      specifically its maximum, minimum, and average values over a
+      typical heart cycle, while assuming the distal resistance
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{d}(t)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      as a constant in time, equal to 55 mmHg. In our experiment, we set
+      the true resistance and capacitance as
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[z_{K,1}^{*}=R^{*} = 1000]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msubsup><mml:mi>z</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mi>R</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mn>1000</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      Barye<inline-formula><alternatives>
+      <tex-math><![CDATA[\cdot]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mo>⋅</mml:mo></mml:math></alternatives></inline-formula>
+      s/ml and <inline-formula><alternatives>
+      <tex-math><![CDATA[z_{K,2}^{*}=C^{*} = 5\times 10^{-5}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msubsup><mml:mi>z</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mi>C</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      ml/Barye, and determine <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{p}(t)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      from a RK4 numerical solution of the following
+      algebraic-differential system</p>
+      <p><named-content id="equU003ARC" content-type="equation"><disp-formula><alternatives>
+      <tex-math><![CDATA[
+      Q_{d} = \frac{P_{p}-P_{d}}{R},\quad \frac{d P_{p}}{d t} = \frac{Q_{p} - Q_{d}}{C},]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mi>R</mml:mi></mml:mfrac><mml:mo>,</mml:mo><mml:mspace width="1.0em"></mml:mspace><mml:mfrac><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>Q</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mi>C</mml:mi></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula></named-content></p>
+      <p>where <inline-formula><alternatives>
+      <tex-math><![CDATA[Q_{p}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>Q</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      is the flow entering the RC system and
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[Q_{d}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>Q</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:math></alternatives></inline-formula>
+      is the distal flow. Synthetic observations are generated by adding
+      Gaussian noise to the true model solution
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}^{*}=(x^{*}_{1},x^{*}_{2},x^{*}_{3})=(P_{p,\text{min}},]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn>1</mml:mn><mml:mo>*</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn>2</mml:mn><mml:mo>*</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mn>3</mml:mn><mml:mo>*</mml:mo></mml:msubsup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo stretchy="false" form="prefix">(</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">min</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{p,\text{max}},]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">max</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{p,\text{avg}})= (78.28, 101.12, 85.75)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">avg</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false" form="postfix">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>78.28</mml:mn><mml:mo>,</mml:mo><mml:mn>101.12</mml:mn><mml:mo>,</mml:mo><mml:mn>85.75</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      i.e., <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝐱</mml:mi></mml:math></alternatives></inline-formula>
+      follows a multivariate Gaussian distribution with mean
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{x}^{*}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msup><mml:mi>𝐱</mml:mi><mml:mo>*</mml:mo></mml:msup></mml:math></alternatives></inline-formula>
+      and a diagonal covariance matrix with entries
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[0.05\,x_{i}^{*}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>0.05</mml:mn><mml:mspace width="0.167em"></mml:mspace><mml:msubsup><mml:mi>x</mml:mi><mml:mi>i</mml:mi><mml:mo>*</mml:mo></mml:msubsup></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[i=1,2,3]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      corresponds to the maximum, minimum, and average pressures,
+      respectively. The aim is to quantify the uncertainty in the RC
+      model parameters given 50 repeated pressure measurements. We
+      imposed a non-informative prior on <inline-formula><alternatives>
+      <tex-math><![CDATA[R]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>R</mml:mi></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[C]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>C</mml:mi></mml:math></alternatives></inline-formula>.
+      Results are shown in
+      <xref alt="[fig:rc_res]" rid="figU003Arc_res">[fig:rc_res]</xref>.</p>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/5bf7588f16a97199f322e84388ea950d406cb51c.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/b855e6f6aa193e6cdc7690e3a4c5f157655f4159.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/fb0b6c03997f222be5f8ed5c25f9e4ff98bf75be.png" /></p>
+      <fig id="figU003Arc_res">
+        <caption><p>Results from the RC model. Loss profile (left),
+        posterior samples (center) for R and C, and the posterior
+        predictive distribution for <inline-formula><alternatives>
+        <tex-math><![CDATA[P_{p,\text{min}}]]></tex-math>
+        <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">min</mml:mtext></mml:mrow></mml:msub></mml:math></alternatives></inline-formula>
+        and <inline-formula><alternatives>
+        <tex-math><![CDATA[P_{p,\text{max}}]]></tex-math>
+        <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">max</mml:mtext></mml:mrow></mml:msub></mml:math></alternatives></inline-formula>
+        (right, <inline-formula><alternatives>
+        <tex-math><![CDATA[P_{p,\text{avg}}]]></tex-math>
+        <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">avg</mml:mtext></mml:mrow></mml:msub></mml:math></alternatives></inline-formula>
+        not shown).</p></caption>
+      </fig>
+    </sec>
+    <sec id="three-element-wndkessel-circulatory-model-nofas-adaann">
+      <title>Three-element Wndkessel Circulatory Model (NoFAS +
+      AdaAnn)</title>
+      <p>The three-parameter Windkessel or <italic>RCR</italic> model is
+      characterized by proximal and distal resistance parameters
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[R_{p}, R_{d} \in [100, 1500]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mn>100</mml:mn><mml:mo>,</mml:mo><mml:mn>1500</mml:mn><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      Barye<inline-formula><alternatives>
+      <tex-math><![CDATA[\cdot]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mo>⋅</mml:mo></mml:math></alternatives></inline-formula>s/ml,
+      and one capacitance parameter <inline-formula><alternatives>
+      <tex-math><![CDATA[C \in [1\times 10^{-5}, 1\times 10^{-2}]]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>C</mml:mi><mml:mo>∈</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">[</mml:mo><mml:mn>1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="true" form="postfix">]</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      ml/Barye. This model is not identifiable. The average distal
+      pressure is only affected by the total system resistance, i.e. the
+      sum <inline-formula><alternatives>
+      <tex-math><![CDATA[R_{p}+R_{d}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>,
+      leading to a negative correlation between these two parameters.
+      Thus, an increment in the proximal resistance is compensated by a
+      reduction in the distal resistance (so the average distal pressure
+      remains the same) which, in turn, reduces the friction encountered
+      by the flow exiting the capacitor. An increase in the value of
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[C]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>C</mml:mi></mml:math></alternatives></inline-formula>
+      is finally needed to restore the average, minimum and maximum
+      pressure. This leads to a positive correlation between
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[C]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>C</mml:mi></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[R_{d}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>R</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:math></alternatives></inline-formula>.</p>
+      <p>The output consists of the maximum, minimum, and average values
+      of the proximal pressure <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{p}(t)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      i.e., <inline-formula><alternatives>
+      <tex-math><![CDATA[(P_{p,\text{min}}, P_{p,\text{max}}, P_{p,\text{avg}})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">min</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">max</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">avg</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula>
+      over one heart cycle. The true parameters are
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[z^{*}_{K,1} = R^{*}_{p} = 1000]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msubsup><mml:mi>z</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mi>R</mml:mi><mml:mi>p</mml:mi><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1000</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      Barye<inline-formula><alternatives>
+      <tex-math><![CDATA[\cdot]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mo>⋅</mml:mo></mml:math></alternatives></inline-formula>s/ml,
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[z^{*}_{K,2}=R^{*}_{d} = 1000]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msubsup><mml:mi>z</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mi>R</mml:mi><mml:mi>d</mml:mi><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1000</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      Barye<inline-formula><alternatives>
+      <tex-math><![CDATA[\cdot]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mo>⋅</mml:mo></mml:math></alternatives></inline-formula>s/ml,
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[C^{*} = 5\times 10^{-5}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msup><mml:mi>C</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>−</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      ml/Barye. The proximal pressure is computed from the solution of
+      the algebraic-differential system <disp-formula><alternatives>
+      <tex-math><![CDATA[
+      Q_{p} = \frac{P_{p} - P_{c}}{R_{p}},\quad Q_{d} = \frac{P_{c}-P_{d}}{R_{d}},\quad \frac{d\, P_{c}}{d\,t} = \frac{Q_{p}-Q_{d}}{C},
+      ]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:mfrac><mml:mo>,</mml:mo><mml:mspace width="1.0em"></mml:mspace><mml:msub><mml:mi>Q</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mfrac><mml:mo>,</mml:mo><mml:mspace width="1.0em"></mml:mspace><mml:mfrac><mml:mrow><mml:mi>d</mml:mi><mml:mspace width="0.167em"></mml:mspace><mml:msub><mml:mi>P</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mspace width="0.167em"></mml:mspace><mml:mi>t</mml:mi></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>Q</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>Q</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mi>C</mml:mi></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula>
+      where the distal pressure is set to <inline-formula><alternatives>
+      <tex-math><![CDATA[P_{d}=55]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>55</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      mmHg. Synthetic observations are generated from
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[N(\boldsymbol\mu, \boldsymbol\Sigma)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝛍</mml:mi><mml:mo>,</mml:mo><mml:mi>𝚺</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>,
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[\mu=(f_{1}(\boldsymbol{z}^{*}),f_{2}(\boldsymbol{z}^{*}),f_{3}(\boldsymbol{z}^{*}))^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msup><mml:mi>𝐳</mml:mi><mml:mo>*</mml:mo></mml:msup><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>
+      = <inline-formula><alternatives>
+      <tex-math><![CDATA[(P_{p,\text{min}}, P_{p,\text{max}}, P_{p,\text{ave}})^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">min</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">max</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mtext mathvariant="normal">ave</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:math></alternatives></inline-formula>
+      = <inline-formula><alternatives>
+      <tex-math><![CDATA[(100.96,148.02,116.50)^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>100.96</mml:mn><mml:mo>,</mml:mo><mml:mn>148.02</mml:mn><mml:mo>,</mml:mo><mml:mn>116.50</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol\Sigma]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝚺</mml:mi></mml:math></alternatives></inline-formula>
+      is a diagonal matrix with entries <inline-formula><alternatives>
+      <tex-math><![CDATA[(5.05, 7.40, 5.83)^T]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>5.05</mml:mn><mml:mo>,</mml:mo><mml:mn>7.40</mml:mn><mml:mo>,</mml:mo><mml:mn>5.83</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:math></alternatives></inline-formula>.
+      The budgeted number of true model solutions is
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[216]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mn>216</mml:mn></mml:math></alternatives></inline-formula>;
+      the fixed surrogate model is evaluated on a
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[6\times 6\times 6 = 216]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>6</mml:mn><mml:mo>×</mml:mo><mml:mn>6</mml:mn><mml:mo>×</mml:mo><mml:mn>6</mml:mn><mml:mo>=</mml:mo><mml:mn>216</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      pre-grid while the adaptive surrogate is evaluated with a pre-grid
+      of size <inline-formula><alternatives>
+      <tex-math><![CDATA[4\times 4\times 4 = 64]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mn>4</mml:mn><mml:mo>×</mml:mo><mml:mn>4</mml:mn><mml:mo>×</mml:mo><mml:mn>4</mml:mn><mml:mo>=</mml:mo><mml:mn>64</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      and the other 152 evaluations are adaptively selected.</p>
+      <p>This example also demonstrates how NoFAS can be combined with
+      annealing for improved convergence. The results in
+      <xref alt="[fig:rcr_res]" rid="figU003Arcr_res">[fig:rcr_res]</xref>
+      are generated using the AdaAnn adaptive annealing scheduler with
+      intial inverse temperature <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{0}=0.05]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0.05</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>,
+      KL tolerance <inline-formula><alternatives>
+      <tex-math><![CDATA[\tau=0.01]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>τ</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>
+      and a batch size of 100 samples. The number of parameter updates
+      is set to 500, 5000 and 5 for <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{0}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:math></alternatives></inline-formula>,
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{1}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></alternatives></inline-formula>
+      and <inline-formula><alternatives>
+      <tex-math><![CDATA[t_{0}<t<t_{1}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>&lt;</mml:mo><mml:mi>t</mml:mi><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></alternatives></inline-formula>,
+      respectively and 1000 Monte Carlo realizations are used to
+      evaluate the denominator in equation
+      <xref alt="2" rid="equU003Aadaann">2</xref>. The posterior samples
+      capture well the nonlinear correlation among the parameters and
+      generate a fairly accurate posterior predictive distribution that
+      overlaps with the observations. Additional details can be found in
+      Wang et al.
+      (<xref alt="2022" rid="ref-wang2022variational" ref-type="bibr">2022</xref>)
+      and Cobian et al.
+      (<xref alt="2023" rid="ref-cobian2023adaann" ref-type="bibr">2023</xref>).</p>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/1a3b7e4d933d9bdf4bbb379153304a9582bef712.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/83c10a95c9e8577de7159e5332cb410dd8c94d0a.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/2eaec4a76818e93b9b9882941aef7ef703afc362.png" /></p>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/6f4cecc646a81891401cfd2b466cd7835a17cdad.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/7402b5de266da8fe49096be58754bd349b724d4f.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/1cac7f5f56892bf8a24224f2ea35f1336cd54f1b.png" /></p>
+      <fig id="figU003Arcr_res">
+        <caption><p>Results from the RCR model. The top row contains the
+        loss profile (left) and samples from the posterior predictive
+        distribution plus the available observations (right). Samples
+        from the posterior distribution are instead shown in the bottom
+        row.</p></caption>
+      </fig>
+    </sec>
+    <sec id="friedman-1-model-adaann">
+      <title>Friedman 1 model (AdaAnn)</title>
+      <p>We consider a modified version of the Friedman 1 dataset
+      (<xref alt="Friedman, 1991" rid="ref-friedman1991multivariate" ref-type="bibr">Friedman,
+      1991</xref>) to examine the performance of our adaptive annealing
+      scheduler in a high-dimensional context. According to the original
+      model in Friedman
+      (<xref alt="1991" rid="ref-friedman1991multivariate" ref-type="bibr">1991</xref>),
+      the data are generated as
+      <named-content id="eqnU003Afriedman1" content-type="equation"><disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \textstyle y_i = \mu_i(\boldsymbol{\beta})+ \epsilon_i, \mbox{ where }
+      \mu_i(\boldsymbol{\beta})=\beta_1\text{sin}(\pi x_{i,1}x_{i,2})+ \beta_2(x_{i,3}-\beta_3)^2+\sum_{j=4}^{10}\beta_jx_{i,j},]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝛃</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mrow><mml:mspace width="0.333em"></mml:mspace><mml:mtext mathvariant="normal"> where </mml:mtext><mml:mspace width="0.333em"></mml:mspace></mml:mrow><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝛃</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mtext mathvariant="normal">sin</mml:mtext><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>π</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:munderover><mml:mo>∑</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mn>10</mml:mn></mml:munderover><mml:msub><mml:mi>β</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula></named-content>
+      where <inline-formula><alternatives>
+      <tex-math><![CDATA[\epsilon_i\sim\mathcal{N}(0,1)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>ϵ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>∼</mml:mo><mml:mi>𝒩</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>.
+      We made a slight modification to the model in
+      <xref alt="5" rid="eqnU003Afriedman1">5</xref> as
+      <named-content id="eqnU003Afriedman1_modified" content-type="equation"><disp-formula><alternatives>
+      <tex-math><![CDATA[
+      \mu_i(\boldsymbol{\beta}) = \textstyle \beta_1\text{sin}(\pi x_{i,1}x_{i,2})+ \beta_2^2(x_{i,3}-\beta_3)^2+\sum_{j=4}^{10}\beta_jx_{i,j},]]></tex-math>
+      <mml:math display="block" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>μ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝛃</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mtext mathvariant="normal">sin</mml:mtext><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>π</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msubsup><mml:mi>β</mml:mi><mml:mn>2</mml:mn><mml:mn>2</mml:mn></mml:msubsup><mml:msup><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:munderover><mml:mo>∑</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mn>10</mml:mn></mml:munderover><mml:msub><mml:mi>β</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></alternatives></disp-formula></named-content>
+      and set the true parameter combination to
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\beta}=(\beta_1,\ldots,\beta_{10})=(10,\pm \sqrt{20}, 0.5, 10, 5, 0, 0, 0, 0, 0)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝛃</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mi>…</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>10</mml:mn></mml:msub><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>10</mml:mn><mml:mo>,</mml:mo><mml:mo>±</mml:mo><mml:msqrt><mml:mn>20</mml:mn></mml:msqrt><mml:mo>,</mml:mo><mml:mn>0.5</mml:mn><mml:mo>,</mml:mo><mml:mn>10</mml:mn><mml:mo>,</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>.
+      Note that both <xref alt="5" rid="eqnU003Afriedman1">5</xref> and
+      <xref alt="6" rid="eqnU003Afriedman1_modified">6</xref> contain
+      linear, nonlinear, and interaction terms of the input variables
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[X_1]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>X</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></alternatives></inline-formula>
+      to <inline-formula><alternatives>
+      <tex-math><![CDATA[X_{10}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>X</mml:mi><mml:mn>10</mml:mn></mml:msub></mml:math></alternatives></inline-formula>,
+      five of which (<inline-formula><alternatives>
+      <tex-math><![CDATA[X_6]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>X</mml:mi><mml:mn>6</mml:mn></mml:msub></mml:math></alternatives></inline-formula>
+      to <inline-formula><alternatives>
+      <tex-math><![CDATA[X_{10}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>X</mml:mi><mml:mn>10</mml:mn></mml:msub></mml:math></alternatives></inline-formula>)
+      are irrelevant to <inline-formula><alternatives>
+      <tex-math><![CDATA[Y]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>Y</mml:mi></mml:math></alternatives></inline-formula>.
+      Each <inline-formula><alternatives>
+      <tex-math><![CDATA[X]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>X</mml:mi></mml:math></alternatives></inline-formula>
+      is drawn independently from <inline-formula><alternatives>
+      <tex-math><![CDATA[\mathcal{U}(0,1)]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>𝒰</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>.
+      We used R package <monospace>tgp</monospace>
+      (<xref alt="Gramacy, 2007" rid="ref-gramacy2007tgp" ref-type="bibr">Gramacy,
+      2007</xref>) to generate a Friedman1 dataset with a sample size of
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[n]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>n</mml:mi></mml:math></alternatives></inline-formula>=1000.
+      We impose a non-informative uniform prior
+      <inline-formula><alternatives>
+      <tex-math><![CDATA[p(\boldsymbol{\beta})]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="true" form="prefix">(</mml:mo><mml:mi>𝛃</mml:mi><mml:mo stretchy="true" form="postfix">)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></inline-formula>
+      and, unlike the original modal, we now expect a bimodal posterior
+      distribution of <inline-formula><alternatives>
+      <tex-math><![CDATA[\boldsymbol{\beta}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mi>𝛃</mml:mi></mml:math></alternatives></inline-formula>.
+      Results in terms of marginal statistics and their convergence for
+      the mode with positive <inline-formula><alternatives>
+      <tex-math><![CDATA[z_{K,2}]]></tex-math>
+      <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></alternatives></inline-formula>
+      are illustrated in
+      <xref alt="[table:Friedman_bimodal_stats]" rid="tableU003AFriedman_bimodal_stats">[table:Friedman_bimodal_stats]</xref>
+      and
+      <xref alt="[fig:adaann_res]" rid="figU003Aadaann_res">[fig:adaann_res]</xref>.</p>
+      <boxed-text id="tableU003AFriedman_bimodal_stats">
+        <table-wrap>
+          <caption>
+            <p>Posterior mean and standard deviation for positive mode
+            in the modified Friedman test case.</p>
+          </caption>
+          <table>
+            <tbody>
+              <tr>
+                <td align="left"><bold>True</bold></td>
+                <td align="center" colspan="2"><bold>Mode 1</bold></td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><bold>Value</bold></td>
+                <td align="center">Post. Mean</td>
+                <td align="center">Post. SD</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_1 = 10]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>10</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">10.0285</td>
+                <td align="center">0.1000</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_2 = \pm \sqrt{20}]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mo>±</mml:mo><mml:msqrt><mml:mn>20</mml:mn></mml:msqrt></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">4.2187</td>
+                <td align="center">0.1719</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_3 = 0.5]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0.5</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">0.4854</td>
+                <td align="center">0.0004</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_4 = 10]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>4</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>10</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">10.0987</td>
+                <td align="center">0.0491</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_5 = 5]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>5</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">5.0182</td>
+                <td align="center">0.1142</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_6 = 0]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>6</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">0.1113</td>
+                <td align="center">0.0785</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_7 = 0]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>7</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">0.0707</td>
+                <td align="center">0.0043</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_8 = 0]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>8</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">-0.1315</td>
+                <td align="center">0.1008</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_9 = 0]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>9</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">0.0976</td>
+                <td align="center">0.0387</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+              <tr>
+                <td align="left"><inline-formula><alternatives>
+                <tex-math><![CDATA[\beta_{10} = 0]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>10</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+                <td align="center">0.1192</td>
+                <td align="center">0.0463</td>
+                <td align="center"></td>
+                <td align="center"></td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+      <p><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/f0812f32444b59e7c811fcf2e3d28438ccc816f9.png" /><inline-graphic mimetype="image" mime-subtype="png" xlink:href="media/74f5e0d84680a4185e1810fc9e21bb5ecd5c40e6.png" /></p>
+      <fig id="figU003Aadaann_res">
+        <caption><p>Loss profile (left) and posterior marginal
+        statistics (right) for positive mode in the modified Friedman
+        test case.</p></caption>
+      </fig>
+    </sec>
+    <sec id="hyperparameters-in-linfa">
+      <title>Hyperparameters in LINFA</title>
+      <p>This section contains the list of all hyperparameters in the
+      library, their default values, and a description of the
+      functionalities they control. General hyperparameters are listed
+      in
+      <xref alt="[tab:par_general]" rid="tabU003Apar_general">[tab:par_general]</xref>,
+      those related to the optimization process in
+      <xref alt="[tab:par_optimizers]" rid="tabU003Apar_optimizers">[tab:par_optimizers]</xref>,
+      and to the output folder and files in
+      <xref alt="[tab:par_output]" rid="tabU003Apar_output">[tab:par_output]</xref>.
+      Hyperparameters for the proposed NoFAS and AdaAnn approaches are
+      listed in
+      <xref alt="[tab:surr_optimizers]" rid="tabU003Asurr_optimizers">[tab:surr_optimizers]</xref>
+      and
+      <xref alt="[tab:adaann]" rid="tabU003Aadaann">[tab:adaann]</xref>,
+      respectively. Finally, a hyperparameter used to select the
+      hardware device is described in
+      <xref alt="[tab:par_device]" rid="tabU003Apar_device">[tab:par_device]</xref>.</p>
+      <boxed-text id="tabU003Apar_output">
+        <table-wrap>
+          <caption>
+            <p>Output parameters</p>
+          </caption>
+          <table>
+            <thead>
+              <tr>
+                <th align="left"><bold>Option</bold></th>
+                <th align="left"><bold>Type</bold></th>
+                <th align="left"><bold>Description</bold></th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td align="left"><italic><monospace>output_dir</monospace></italic></td>
+                <td align="left">string</td>
+                <td align="left">Name of output folder where results
+                files are saved.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>log_file</monospace></italic></td>
+                <td align="left">string</td>
+                <td align="left">Name of the log file which stores the
+                iteration number, annealing temperature, and value of
+                the loss function at each iteration.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>seed</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Seed for the random number
+                generator.</td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+      <boxed-text id="tabU003Asurr_optimizers">
+        <table-wrap>
+          <caption>
+            <p>Surrogate model parameters (NoFAS)</p>
+          </caption>
+          <table>
+            <thead>
+              <tr>
+                <th align="left"><bold>Option</bold></th>
+                <th align="left"><bold>Type</bold></th>
+                <th align="left"><bold>Description</bold></th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td align="left"><italic><monospace>n_sample</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Batch size used when saving results to
+                the disk (i.e., once every
+                <monospace>save_interval</monospace> iterations).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>calibrate_interval</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of NF iteration between
+                successive updates of the surrogate model (default
+                <italic><monospace>1000</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>budget</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Maximum allowable number of true model
+                evaluations.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>surr_pre_it</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of pre-training iterations for
+                surrogate model (default
+                <italic><monospace>40000</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>surr_upd_it</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of iterations for the surrogate
+                model update (default
+                <italic><monospace>6000</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>surr_folder</monospace></italic></td>
+                <td align="left">string</td>
+                <td align="left">Folder where the surrogate model is
+                stored (default
+                <italic><monospace>’./’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>use_new_surr</monospace></italic></td>
+                <td align="left">bool</td>
+                <td align="left">Start by pre-training a new surrogate
+                and ignore existing surrogates (default
+                <italic><monospace>True</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>store_surr_interval</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Save interval for surrogate model
+                (<italic><monospace>None</monospace></italic> for no
+                save, default
+                <italic><monospace>None</monospace></italic>).</td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+      <boxed-text id="tabU003Apar_device">
+        <table-wrap>
+          <caption>
+            <p>Device parameters</p>
+          </caption>
+          <table>
+            <thead>
+              <tr>
+                <th align="left"><bold>Option</bold></th>
+                <th align="left"><bold>Type</bold></th>
+                <th align="left"><bold>Description</bold></th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td align="left"><italic><monospace>no_cuda</monospace></italic></td>
+                <td align="left">bool</td>
+                <td align="left">Do not use GPU acceleration.</td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+      <boxed-text id="tabU003Apar_optimizers">
+        <table-wrap>
+          <caption>
+            <p>Optimizer and learning rate parameters</p>
+          </caption>
+          <table>
+            <thead>
+              <tr>
+                <th align="left"><bold>Option</bold></th>
+                <th align="left"><bold>Type</bold></th>
+                <th align="left"><bold>Description</bold></th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td align="left"><italic><monospace>optimizer</monospace></italic></td>
+                <td align="left">string</td>
+                <td align="left">Type of SGD optimizer (default
+                <italic><monospace>’Adam’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>lr</monospace></italic></td>
+                <td align="left">float</td>
+                <td align="left">Learning rate (default
+                <italic><monospace>0.003</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>lr_decay</monospace></italic></td>
+                <td align="left">float</td>
+                <td align="left">Learning rate decay (default
+                <italic><monospace>0.9999</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>lr_scheduler</monospace></italic></td>
+                <td align="left">string</td>
+                <td align="left">Type of learning rate scheduler
+                (<italic><monospace>’StepLR’</monospace></italic> or
+                <italic><monospace>’ExponentialLR’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>lr_step</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of steps before learning rate
+                reduction for the step scheduler.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>log_interval</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of iterations between successive
+                loss printouts (default
+                <italic><monospace>10</monospace></italic>).</td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+      <boxed-text id="tabU003Apar_general">
+        <table-wrap>
+          <caption>
+            <p>General parameters</p>
+          </caption>
+          <table>
+            <thead>
+              <tr>
+                <th align="left"><bold>Option</bold></th>
+                <th align="left"><bold>Type</bold></th>
+                <th align="left"><bold>Description</bold></th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td align="left"><italic><monospace>name</monospace></italic></td>
+                <td align="left">str</td>
+                <td align="left">Name of the experiment.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>flow_type</monospace></italic></td>
+                <td align="left">str</td>
+                <td align="left">type of normalizing flow
+                (<italic><monospace>’maf’</monospace></italic>,<italic><monospace>’realnvp’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>n_blocks</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of normalizing flow layers
+                (default
+                <italic><monospace>5</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>hidden_size</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of neurons in MADE and RealNVP
+                hidden layers (default
+                <italic><monospace>100</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>n_hidden</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of hidden layers in MADE
+                (default 1).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>activation_fn</monospace></italic></td>
+                <td align="left">str</td>
+                <td align="left">Activation function for MADE network
+                used by MAF (default
+                <italic><monospace>’relu’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>input_order</monospace></italic></td>
+                <td align="left">str</td>
+                <td align="left">Input order for MADE mask creation
+                (<italic><monospace>’sequential’</monospace></italic> or
+                <italic><monospace>’random’</monospace></italic>,
+                default
+                <italic><monospace>’sequential’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>batch_norm_order</monospace></italic></td>
+                <td align="left">bool</td>
+                <td align="left">Adds batchnorm layer after each MAF or
+                RealNVP layer (default
+                <italic><monospace>True</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>save_interval</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">How often to save results from the
+                normalizing flow iterations. Saved results include
+                posterior samples, loss profile, samples from the
+                posterior predictive distribution, observations, and
+                marginal statistics.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>input_size</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Input dimensionality (default
+                <italic><monospace>2</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>batch_size</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of samples from the basic
+                distribution generated at each iteration (default
+                <italic><monospace>100</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>true_data_num</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of additional true model
+                evaluations at each surrogate model update (default
+                <italic><monospace>2</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>n_iter</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Total number of NF iterations (default
+                <italic><monospace>25001</monospace></italic>).</td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+      <boxed-text id="tabU003Aadaann">
+        <table-wrap>
+          <caption>
+            <p>Parameters for the adaptive annealing scheduler
+            (AdaAnn)</p>
+          </caption>
+          <table>
+            <thead>
+              <tr>
+                <th align="left"><bold>Option</bold></th>
+                <th align="left"><bold>Type</bold></th>
+                <th align="left"><bold>Description</bold></th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td align="left"><italic><monospace>annealing</monospace></italic></td>
+                <td align="left">bool</td>
+                <td align="left">Flag to activate the annealing
+                scheduler. If this is
+                <italic><monospace>False</monospace></italic>, the
+                target posterior distribution is left unchanged during
+                the iterations.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>scheduler</monospace></italic></td>
+                <td align="left">string</td>
+                <td align="left">Type of annealing scheduler
+                (<italic><monospace>’AdaAnn’</monospace></italic> or
+                <italic><monospace>’fixed’</monospace></italic>, default
+                <italic><monospace>’AdaAnn’</monospace></italic>).</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>tol</monospace></italic></td>
+                <td align="left">float</td>
+                <td align="left">KL tolerance. It is kept constant
+                during inference and used in the numerator of equation
+                <xref alt="2" rid="equU003Aadaann">2</xref>.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>t0</monospace></italic></td>
+                <td align="left">float</td>
+                <td align="left">Initial inverse temperature.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>N</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of batch samples during
+                annealing.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>N_1</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of batch samples at
+                <inline-formula><alternatives>
+                <tex-math><![CDATA[t=1]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>T_0</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of initial parameter updates at
+                <inline-formula><alternatives>
+                <tex-math><![CDATA[t_0]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:msub><mml:mi>t</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:math></alternatives></inline-formula>.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>T</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of parameter updates after each
+                temperature update. During such updates the temperature
+                is kept fixed.</td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>T_1</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of parameter updates at
+                <inline-formula><alternatives>
+                <tex-math><![CDATA[t=1]]></tex-math>
+                <mml:math display="inline" xmlns:mml="http://www.w3.org/1998/Math/MathML"><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></td>
+              </tr>
+              <tr>
+                <td align="left"><italic><monospace>M</monospace></italic></td>
+                <td align="left">int</td>
+                <td align="left">Number of Monte Carlo samples used to
+                evaluate the denominator in equation
+                <xref alt="2" rid="equU003Aadaann">2</xref>.</td>
+              </tr>
+            </tbody>
+          </table>
+        </table-wrap>
+      </boxed-text>
+    </sec>
+  </sec>
+</sec>
+</body>
+<back>
+<ref-list>
+  <ref id="ref-geman1984stochastic">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Geman</surname><given-names>Stuart</given-names></name>
+        <name><surname>Geman</surname><given-names>Donald</given-names></name>
+      </person-group>
+      <article-title>Stochastic relaxation, Gibbs distributions, and the Bayesian restoration of images</article-title>
+      <source>IEEE Transactions on pattern analysis and machine intelligence</source>
+      <publisher-name>IEEE</publisher-name>
+      <year iso-8601-date="1984">1984</year>
+      <issue>6</issue>
+      <pub-id pub-id-type="doi">10.1109/TPAMI.1984.4767596</pub-id>
+      <fpage>721</fpage>
+      <lpage>741</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-metropolis1953equation">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Metropolis</surname><given-names>Nicholas</given-names></name>
+        <name><surname>Rosenbluth</surname><given-names>Arianna W</given-names></name>
+        <name><surname>Rosenbluth</surname><given-names>Marshall N</given-names></name>
+        <name><surname>Teller</surname><given-names>Augusta H</given-names></name>
+        <name><surname>Teller</surname><given-names>Edward</given-names></name>
+      </person-group>
+      <article-title>Equation of state calculations by fast computing machines</article-title>
+      <source>The journal of chemical physics</source>
+      <publisher-name>American Institute of Physics</publisher-name>
+      <year iso-8601-date="1953">1953</year>
+      <volume>21</volume>
+      <issue>6</issue>
+      <pub-id pub-id-type="doi">10.1063/1.1699114</pub-id>
+      <fpage>1087</fpage>
+      <lpage>1092</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-hastings1970monte">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Hastings</surname><given-names>W Keith</given-names></name>
+      </person-group>
+      <article-title>Monte Carlo sampling methods using Markov chains and their applications</article-title>
+      <publisher-name>Oxford University Press</publisher-name>
+      <year iso-8601-date="1970">1970</year>
+      <pub-id pub-id-type="doi">10.1093/biomet/57.1.97</pub-id>
+    </element-citation>
+  </ref>
+  <ref id="ref-gelfand1990sampling">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Gelfand</surname><given-names>Alan E</given-names></name>
+        <name><surname>Smith</surname><given-names>Adrian FM</given-names></name>
+      </person-group>
+      <article-title>Sampling-based approaches to calculating marginal densities</article-title>
+      <source>Journal of the American statistical association</source>
+      <publisher-name>Taylor &amp; Francis</publisher-name>
+      <year iso-8601-date="1990">1990</year>
+      <volume>85</volume>
+      <issue>410</issue>
+      <pub-id pub-id-type="doi">10.1080/01621459.1990.10476213</pub-id>
+      <fpage>398</fpage>
+      <lpage>409</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-wainwright2008graphical">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Wainwright</surname><given-names>Martin J</given-names></name>
+        <name><surname>Jordan</surname><given-names>Michael I</given-names></name>
+        <name><surname>others</surname></name>
+      </person-group>
+      <article-title>Graphical models, exponential families, and variational inference</article-title>
+      <source>Foundations and Trends in Machine Learning</source>
+      <publisher-name>Now Publishers, Inc.</publisher-name>
+      <year iso-8601-date="2008">2008</year>
+      <volume>1</volume>
+      <issue>1–2</issue>
+      <pub-id pub-id-type="doi">10.1561/2200000001</pub-id>
+      <fpage>1</fpage>
+      <lpage>305</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-villani2009optimal">
+    <element-citation publication-type="book">
+      <person-group person-group-type="author">
+        <name><surname>Villani</surname><given-names>Cédric</given-names></name>
+        <name><surname>others</surname></name>
+      </person-group>
+      <source>Optimal transport: Old and new</source>
+      <publisher-name>Springer</publisher-name>
+      <year iso-8601-date="2009">2009</year>
+      <volume>338</volume>
+      <pub-id pub-id-type="doi">10.1007/978-3-540-71050-9</pub-id>
+    </element-citation>
+  </ref>
+  <ref id="ref-kobyzev2020normalizing">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Kobyzev</surname><given-names>Ivan</given-names></name>
+        <name><surname>Prince</surname><given-names>Simon JD</given-names></name>
+        <name><surname>Brubaker</surname><given-names>Marcus A</given-names></name>
+      </person-group>
+      <article-title>Normalizing flows: An introduction and review of current methods</article-title>
+      <source>IEEE transactions on pattern analysis and machine intelligence</source>
+      <publisher-name>IEEE</publisher-name>
+      <year iso-8601-date="2020">2020</year>
+      <volume>43</volume>
+      <issue>11</issue>
+      <pub-id pub-id-type="doi">10.1109/TPAMI.2020.2992934</pub-id>
+      <fpage>3964</fpage>
+      <lpage>3979</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-papamakarios2021normalizing">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Papamakarios</surname><given-names>George</given-names></name>
+        <name><surname>Nalisnick</surname><given-names>Eric</given-names></name>
+        <name><surname>Rezende</surname><given-names>Danilo Jimenez</given-names></name>
+        <name><surname>Mohamed</surname><given-names>Shakir</given-names></name>
+        <name><surname>Lakshminarayanan</surname><given-names>Balaji</given-names></name>
+      </person-group>
+      <article-title>Normalizing flows for probabilistic modeling and inference</article-title>
+      <source>The Journal of Machine Learning Research</source>
+      <publisher-name>JMLRORG</publisher-name>
+      <year iso-8601-date="2021">2021</year>
+      <volume>22</volume>
+      <issue>1</issue>
+      <fpage>2617</fpage>
+      <lpage>2680</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-rezende2015variational">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Rezende</surname><given-names>Danilo</given-names></name>
+        <name><surname>Mohamed</surname><given-names>Shakir</given-names></name>
+      </person-group>
+      <article-title>Variational inference with normalizing flows</article-title>
+      <source>International conference on machine learning</source>
+      <publisher-name>PMLR</publisher-name>
+      <year iso-8601-date="2015">2015</year>
+      <fpage>1530</fpage>
+      <lpage>1538</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-wang2022variational">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Wang</surname><given-names>Yu</given-names></name>
+        <name><surname>Liu</surname><given-names>Fang</given-names></name>
+        <name><surname>Schiavazzi</surname><given-names>Daniele E</given-names></name>
+      </person-group>
+      <article-title>Variational inference with NoFAS: Normalizing flow with adaptive surrogate for computationally expensive models</article-title>
+      <source>Journal of Computational Physics</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2022">2022</year>
+      <volume>467</volume>
+      <pub-id pub-id-type="doi">10.1016/j.jcp.2022.111454</pub-id>
+      <fpage>111454</fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-cobian2023adaann">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Cobian</surname><given-names>Emma R</given-names></name>
+        <name><surname>Hauenstein</surname><given-names>Jonathan D</given-names></name>
+        <name><surname>Liu</surname><given-names>Fang</given-names></name>
+        <name><surname>Schiavazzi</surname><given-names>Daniele E</given-names></name>
+      </person-group>
+      <article-title>AdaAnn: Adaptive annealing scheduler for probability density approximation</article-title>
+      <source>International Journal for Uncertainty Quantification</source>
+      <publisher-name>Begel House Inc.</publisher-name>
+      <year iso-8601-date="2023">2023</year>
+      <volume>13</volume>
+      <pub-id pub-id-type="doi">10.1615/Int.J.UncertaintyQuantification.2022043110</pub-id>
+    </element-citation>
+  </ref>
+  <ref id="ref-dinh2016density">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Dinh</surname><given-names>Laurent</given-names></name>
+        <name><surname>Sohl-Dickstein</surname><given-names>Jascha</given-names></name>
+        <name><surname>Bengio</surname><given-names>Samy</given-names></name>
+      </person-group>
+      <article-title>Density estimation using real NVP</article-title>
+      <source>arXiv preprint arXiv:1605.08803</source>
+      <year iso-8601-date="2016">2016</year>
+    </element-citation>
+  </ref>
+  <ref id="ref-kingma2018glow">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Kingma</surname><given-names>Durk P</given-names></name>
+        <name><surname>Dhariwal</surname><given-names>Prafulla</given-names></name>
+      </person-group>
+      <article-title>Glow: Generative flow with invertible 1x1 convolutions</article-title>
+      <source>Advances in neural information processing systems</source>
+      <year iso-8601-date="2018">2018</year>
+      <volume>31</volume>
+    </element-citation>
+  </ref>
+  <ref id="ref-papamakarios2018masked">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Papamakarios</surname><given-names>George</given-names></name>
+        <name><surname>Pavlakou</surname><given-names>Theo</given-names></name>
+        <name><surname>Murray</surname><given-names>Iain</given-names></name>
+      </person-group>
+      <article-title>Masked autoregressive flow for density estimation</article-title>
+      <source>Advances in neural information processing systems</source>
+      <person-group person-group-type="editor">
+        <name><surname>Guyon</surname><given-names>I.</given-names></name>
+        <name><surname>Luxburg</surname><given-names>U. Von</given-names></name>
+        <name><surname>Bengio</surname><given-names>S.</given-names></name>
+        <name><surname>Wallach</surname><given-names>H.</given-names></name>
+        <name><surname>Fergus</surname><given-names>R.</given-names></name>
+        <name><surname>Vishwanathan</surname><given-names>S.</given-names></name>
+        <name><surname>Garnett</surname><given-names>R.</given-names></name>
+      </person-group>
+      <publisher-name>Curran Associates, Inc.</publisher-name>
+      <year iso-8601-date="2017">2017</year>
+      <volume>30</volume>
+      <uri>https://proceedings.neurips.cc/paper_files/paper/2017/file/6c1da886822c67822bcf3679d04369fa-Paper.pdf</uri>
+      <fpage></fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-kingma2016improved">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Kingma</surname><given-names>Durk P</given-names></name>
+        <name><surname>Salimans</surname><given-names>Tim</given-names></name>
+        <name><surname>Jozefowicz</surname><given-names>Rafal</given-names></name>
+        <name><surname>Chen</surname><given-names>Xi</given-names></name>
+        <name><surname>Sutskever</surname><given-names>Ilya</given-names></name>
+        <name><surname>Welling</surname><given-names>Max</given-names></name>
+      </person-group>
+      <article-title>Improved variational inference with inverse autoregressive flow</article-title>
+      <source>Advances in neural information processing systems</source>
+      <year iso-8601-date="2016">2016</year>
+      <volume>29</volume>
+      <fpage>4743</fpage>
+      <lpage>4751</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-germain2015made">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Germain</surname><given-names>Mathieu</given-names></name>
+        <name><surname>Gregor</surname><given-names>Karol</given-names></name>
+        <name><surname>Murray</surname><given-names>Iain</given-names></name>
+        <name><surname>Larochelle</surname><given-names>Hugo</given-names></name>
+      </person-group>
+      <article-title>MADE: Masked autoencoder for distribution estimation</article-title>
+      <source>International conference on machine learning</source>
+      <publisher-name>PMLR</publisher-name>
+      <year iso-8601-date="2015">2015</year>
+      <fpage>881</fpage>
+      <lpage>889</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-ioffe2015batch">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Ioffe</surname><given-names>Sergey</given-names></name>
+        <name><surname>Szegedy</surname><given-names>Christian</given-names></name>
+      </person-group>
+      <article-title>Batch normalization: Accelerating deep network training by reducing internal covariate shift</article-title>
+      <source>International conference on machine learning</source>
+      <publisher-name>PMLR</publisher-name>
+      <year iso-8601-date="2015">2015</year>
+      <fpage>448</fpage>
+      <lpage>456</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-su2023differentially">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Su</surname><given-names>Bingyue</given-names></name>
+        <name><surname>Wang</surname><given-names>Yu</given-names></name>
+        <name><surname>Schiavazzi</surname><given-names>Daniele E</given-names></name>
+        <name><surname>Liu</surname><given-names>Fang</given-names></name>
+      </person-group>
+      <article-title>Differentially private normalizing flows for density estimation, data synthesis, and variational inference with application to electronic health records</article-title>
+      <source>arXiv preprint arXiv:2302.05787</source>
+      <year iso-8601-date="2023">2023</year>
+    </element-citation>
+  </ref>
+  <ref id="ref-friedman1991multivariate">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Friedman</surname><given-names>Jerome H</given-names></name>
+      </person-group>
+      <article-title>Multivariate adaptive regression splines</article-title>
+      <source>The annals of statistics</source>
+      <publisher-name>Institute of Mathematical Statistics</publisher-name>
+      <year iso-8601-date="1991">1991</year>
+      <volume>19</volume>
+      <issue>1</issue>
+      <pub-id pub-id-type="doi">10.1214/aos/1176347963</pub-id>
+      <fpage>1</fpage>
+      <lpage>67</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-gramacy2007tgp">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Gramacy</surname><given-names>Robert B</given-names></name>
+      </person-group>
+      <article-title>Tgp: An R package for Bayesian nonstationary, semiparametric nonlinear regression and design by treed Gaussian process models</article-title>
+      <source>Journal of Statistical Software</source>
+      <year iso-8601-date="2007">2007</year>
+      <volume>19</volume>
+      <pub-id pub-id-type="doi">10.18637/jss.v019.i09</pub-id>
+      <fpage>1</fpage>
+      <lpage>46</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-sobol2003theorems">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Sobol’</surname><given-names>Ilya M</given-names></name>
+      </person-group>
+      <article-title>Theorems and examples on high dimensional model representation</article-title>
+      <source>Reliability Engineering and System Safety</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2003">2003</year>
+      <volume>79</volume>
+      <issue>2</issue>
+      <pub-id pub-id-type="doi">10.1016/S0951-8320(02)00229-6</pub-id>
+      <fpage>187</fpage>
+      <lpage>193</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-brennan2020greedy">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Brennan</surname><given-names>Michael</given-names></name>
+        <name><surname>Bigoni</surname><given-names>Daniele</given-names></name>
+        <name><surname>Zahm</surname><given-names>Olivier</given-names></name>
+        <name><surname>Spantini</surname><given-names>Alessio</given-names></name>
+        <name><surname>Marzouk</surname><given-names>Youssef</given-names></name>
+      </person-group>
+      <article-title>Greedy inference with structure-exploiting lazy maps</article-title>
+      <source>Advances in Neural Information Processing Systems</source>
+      <year iso-8601-date="2020">2020</year>
+      <volume>33</volume>
+      <fpage>8330</fpage>
+      <lpage>8342</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-siahkoohi2021preconditioned">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Siahkoohi</surname><given-names>Ali</given-names></name>
+        <name><surname>Rizzuti</surname><given-names>Gabrio</given-names></name>
+        <name><surname>Louboutin</surname><given-names>Mathias</given-names></name>
+        <name><surname>Witte</surname><given-names>Philipp</given-names></name>
+        <name><surname>Herrmann</surname><given-names>Felix</given-names></name>
+      </person-group>
+      <article-title>Preconditioned training of normalizing flows for variational inference in inverse problems</article-title>
+      <source>Third symposium on advances in approximate bayesian inference</source>
+      <year iso-8601-date="2021">2021</year>
+      <uri>https://openreview.net/forum?id=P9m1sMaNQ8T</uri>
+    </element-citation>
+  </ref>
+  <ref id="ref-el2012bayesian">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>El Moselhy</surname><given-names>Tarek A</given-names></name>
+        <name><surname>Marzouk</surname><given-names>Youssef M</given-names></name>
+      </person-group>
+      <article-title>Bayesian inference with optimal maps</article-title>
+      <source>Journal of Computational Physics</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2012">2012</year>
+      <volume>231</volume>
+      <issue>23</issue>
+      <pub-id pub-id-type="doi">10.1016/j.jcp.2012.07.022</pub-id>
+      <fpage>7815</fpage>
+      <lpage>7850</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-sobol1967distribution">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Sobol’</surname><given-names>Ilya M</given-names></name>
+      </person-group>
+      <article-title>On the distribution of points in a cube and the approximate evaluation of integrals</article-title>
+      <source>Zhurnal Vychislitel’noi Matematiki i Matematicheskoi Fiziki</source>
+      <publisher-name>Russian Academy of Sciences, Branch of Mathematical Sciences</publisher-name>
+      <year iso-8601-date="1967">1967</year>
+      <volume>7</volume>
+      <issue>4</issue>
+      <pub-id pub-id-type="doi">10.1016/0041-5553(67)90144-9</pub-id>
+      <fpage>784</fpage>
+      <lpage>802</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-abril2023pymc">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Abril-Pla</surname><given-names>Oriol</given-names></name>
+        <name><surname>Andreani</surname><given-names>Virgile</given-names></name>
+        <name><surname>Carroll</surname><given-names>Colin</given-names></name>
+        <name><surname>Dong</surname><given-names>Larry</given-names></name>
+        <name><surname>Fonnesbeck</surname><given-names>Christopher J</given-names></name>
+        <name><surname>Kochurov</surname><given-names>Maxim</given-names></name>
+        <name><surname>Kumar</surname><given-names>Ravin</given-names></name>
+        <name><surname>Lao</surname><given-names>Junpeng</given-names></name>
+        <name><surname>Luhmann</surname><given-names>Christian C</given-names></name>
+        <name><surname>Martin</surname><given-names>Osvaldo A</given-names></name>
+        <name><surname>others</surname></name>
+      </person-group>
+      <article-title>PyMC: A modern, and comprehensive probabilistic programming framework in Python</article-title>
+      <source>PeerJ Computer Science</source>
+      <publisher-name>PeerJ Inc.</publisher-name>
+      <year iso-8601-date="2023">2023</year>
+      <volume>9</volume>
+      <pub-id pub-id-type="doi">10.7717/peerj-cs.1516</pub-id>
+      <fpage>e1516</fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-luttinen2016bayespy">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Luttinen</surname><given-names>Jaakko</given-names></name>
+      </person-group>
+      <article-title>Bayespy: Variational Bayesian inference in Python</article-title>
+      <source>The Journal of Machine Learning Research</source>
+      <publisher-name>JMLR. org</publisher-name>
+      <year iso-8601-date="2016">2016</year>
+      <volume>17</volume>
+      <issue>1</issue>
+      <fpage>1419</fpage>
+      <lpage>1424</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-bingham2019pyro">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Bingham</surname><given-names>Eli</given-names></name>
+        <name><surname>Chen</surname><given-names>Jonathan P</given-names></name>
+        <name><surname>Jankowiak</surname><given-names>Martin</given-names></name>
+        <name><surname>Obermeyer</surname><given-names>Fritz</given-names></name>
+        <name><surname>Pradhan</surname><given-names>Neeraj</given-names></name>
+        <name><surname>Karaletsos</surname><given-names>Theofanis</given-names></name>
+        <name><surname>Singh</surname><given-names>Rohit</given-names></name>
+        <name><surname>Szerlip</surname><given-names>Paul</given-names></name>
+        <name><surname>Horsfall</surname><given-names>Paul</given-names></name>
+        <name><surname>Goodman</surname><given-names>Noah D</given-names></name>
+      </person-group>
+      <article-title>Pyro: Deep universal probabilistic programming</article-title>
+      <source>Journal of machine learning research</source>
+      <year iso-8601-date="2019">2019</year>
+      <volume>20</volume>
+      <issue>28</issue>
+      <fpage>1</fpage>
+      <lpage>6</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-huggins2023pyvbmc">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Huggins</surname><given-names>Bobby</given-names></name>
+        <name><surname>Li</surname><given-names>Chengkun</given-names></name>
+        <name><surname>Tobaben</surname><given-names>Marlon</given-names></name>
+        <name><surname>Aarnos</surname><given-names>Mikko J.</given-names></name>
+        <name><surname>Acerbi</surname><given-names>Luigi</given-names></name>
+      </person-group>
+      <article-title>PyVBMC: Efficient Bayesian inference in Python</article-title>
+      <source>Journal of Open Source Software</source>
+      <publisher-name>The Open Journal</publisher-name>
+      <year iso-8601-date="2023">2023</year>
+      <volume>8</volume>
+      <issue>86</issue>
+      <uri>https://doi.org/10.21105/joss.05428</uri>
+      <pub-id pub-id-type="doi">10.21105/joss.05428</pub-id>
+      <fpage>5428</fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+</ref-list>
+</back>
+</article>
diff --git a/joss.06309/10.21105.joss.06309.pdf b/joss.06309/10.21105.joss.06309.pdf
new file mode 100644
index 0000000000..db7c437803
Binary files /dev/null and b/joss.06309/10.21105.joss.06309.pdf differ
diff --git a/joss.06309/media/1a3b7e4d933d9bdf4bbb379153304a9582bef712.png b/joss.06309/media/1a3b7e4d933d9bdf4bbb379153304a9582bef712.png
new file mode 100644
index 0000000000..78865493b1
Binary files /dev/null and b/joss.06309/media/1a3b7e4d933d9bdf4bbb379153304a9582bef712.png differ
diff --git a/joss.06309/media/1cac7f5f56892bf8a24224f2ea35f1336cd54f1b.png b/joss.06309/media/1cac7f5f56892bf8a24224f2ea35f1336cd54f1b.png
new file mode 100644
index 0000000000..4fe4df0cca
Binary files /dev/null and b/joss.06309/media/1cac7f5f56892bf8a24224f2ea35f1336cd54f1b.png differ
diff --git a/joss.06309/media/2eaec4a76818e93b9b9882941aef7ef703afc362.png b/joss.06309/media/2eaec4a76818e93b9b9882941aef7ef703afc362.png
new file mode 100644
index 0000000000..327a31edd9
Binary files /dev/null and b/joss.06309/media/2eaec4a76818e93b9b9882941aef7ef703afc362.png differ
diff --git a/joss.06309/media/4c4bffb56a97ca19317250a2bc63fac79dbef607.png b/joss.06309/media/4c4bffb56a97ca19317250a2bc63fac79dbef607.png
new file mode 100644
index 0000000000..ca0a3004bb
Binary files /dev/null and b/joss.06309/media/4c4bffb56a97ca19317250a2bc63fac79dbef607.png differ
diff --git a/joss.06309/media/513ae401afb64dbf332607f3b0287961e9b0269d.png b/joss.06309/media/513ae401afb64dbf332607f3b0287961e9b0269d.png
new file mode 100644
index 0000000000..d3ef5cb8e5
Binary files /dev/null and b/joss.06309/media/513ae401afb64dbf332607f3b0287961e9b0269d.png differ
diff --git a/joss.06309/media/525926ee9a119ed1cdade2148856af20429d5d8c.png b/joss.06309/media/525926ee9a119ed1cdade2148856af20429d5d8c.png
new file mode 100644
index 0000000000..e6a7b9e635
Binary files /dev/null and b/joss.06309/media/525926ee9a119ed1cdade2148856af20429d5d8c.png differ
diff --git a/joss.06309/media/5bf7588f16a97199f322e84388ea950d406cb51c.png b/joss.06309/media/5bf7588f16a97199f322e84388ea950d406cb51c.png
new file mode 100644
index 0000000000..0d45855b54
Binary files /dev/null and b/joss.06309/media/5bf7588f16a97199f322e84388ea950d406cb51c.png differ
diff --git a/joss.06309/media/65f4baeaf3d37ac4ed61631d0210ef5ded70b6d0.png b/joss.06309/media/65f4baeaf3d37ac4ed61631d0210ef5ded70b6d0.png
new file mode 100644
index 0000000000..6463968a19
Binary files /dev/null and b/joss.06309/media/65f4baeaf3d37ac4ed61631d0210ef5ded70b6d0.png differ
diff --git a/joss.06309/media/6f4cecc646a81891401cfd2b466cd7835a17cdad.png b/joss.06309/media/6f4cecc646a81891401cfd2b466cd7835a17cdad.png
new file mode 100644
index 0000000000..87022ade94
Binary files /dev/null and b/joss.06309/media/6f4cecc646a81891401cfd2b466cd7835a17cdad.png differ
diff --git a/joss.06309/media/7402b5de266da8fe49096be58754bd349b724d4f.png b/joss.06309/media/7402b5de266da8fe49096be58754bd349b724d4f.png
new file mode 100644
index 0000000000..266cda53a3
Binary files /dev/null and b/joss.06309/media/7402b5de266da8fe49096be58754bd349b724d4f.png differ
diff --git a/joss.06309/media/74f5e0d84680a4185e1810fc9e21bb5ecd5c40e6.png b/joss.06309/media/74f5e0d84680a4185e1810fc9e21bb5ecd5c40e6.png
new file mode 100644
index 0000000000..0ace68acc1
Binary files /dev/null and b/joss.06309/media/74f5e0d84680a4185e1810fc9e21bb5ecd5c40e6.png differ
diff --git a/joss.06309/media/756a55cf8967e719623055cedf2fda47c9c66404.png b/joss.06309/media/756a55cf8967e719623055cedf2fda47c9c66404.png
new file mode 100644
index 0000000000..5934d4cbde
Binary files /dev/null and b/joss.06309/media/756a55cf8967e719623055cedf2fda47c9c66404.png differ
diff --git a/joss.06309/media/7fac50cefd5bf44f00401262143b48def080ecd1.png b/joss.06309/media/7fac50cefd5bf44f00401262143b48def080ecd1.png
new file mode 100644
index 0000000000..30fac46f94
Binary files /dev/null and b/joss.06309/media/7fac50cefd5bf44f00401262143b48def080ecd1.png differ
diff --git a/joss.06309/media/83c10a95c9e8577de7159e5332cb410dd8c94d0a.png b/joss.06309/media/83c10a95c9e8577de7159e5332cb410dd8c94d0a.png
new file mode 100644
index 0000000000..18b0d1813e
Binary files /dev/null and b/joss.06309/media/83c10a95c9e8577de7159e5332cb410dd8c94d0a.png differ
diff --git a/joss.06309/media/8c0395f4f978a6a90a815298a1948c4239bce4a2.png b/joss.06309/media/8c0395f4f978a6a90a815298a1948c4239bce4a2.png
new file mode 100644
index 0000000000..1257e81ef0
Binary files /dev/null and b/joss.06309/media/8c0395f4f978a6a90a815298a1948c4239bce4a2.png differ
diff --git a/joss.06309/media/b855e6f6aa193e6cdc7690e3a4c5f157655f4159.png b/joss.06309/media/b855e6f6aa193e6cdc7690e3a4c5f157655f4159.png
new file mode 100644
index 0000000000..6723a4faca
Binary files /dev/null and b/joss.06309/media/b855e6f6aa193e6cdc7690e3a4c5f157655f4159.png differ
diff --git a/joss.06309/media/c3535b90c591a17ed3ac29cf77228ab16ca394a5.png b/joss.06309/media/c3535b90c591a17ed3ac29cf77228ab16ca394a5.png
new file mode 100644
index 0000000000..20857e9cf1
Binary files /dev/null and b/joss.06309/media/c3535b90c591a17ed3ac29cf77228ab16ca394a5.png differ
diff --git a/joss.06309/media/dc902429c568a01233895f939fde04beefadfa2c.png b/joss.06309/media/dc902429c568a01233895f939fde04beefadfa2c.png
new file mode 100644
index 0000000000..11d7af88ae
Binary files /dev/null and b/joss.06309/media/dc902429c568a01233895f939fde04beefadfa2c.png differ
diff --git a/joss.06309/media/f0812f32444b59e7c811fcf2e3d28438ccc816f9.png b/joss.06309/media/f0812f32444b59e7c811fcf2e3d28438ccc816f9.png
new file mode 100644
index 0000000000..be2760811e
Binary files /dev/null and b/joss.06309/media/f0812f32444b59e7c811fcf2e3d28438ccc816f9.png differ
diff --git a/joss.06309/media/fb0b6c03997f222be5f8ed5c25f9e4ff98bf75be.png b/joss.06309/media/fb0b6c03997f222be5f8ed5c25f9e4ff98bf75be.png
new file mode 100644
index 0000000000..43dd84e6bf
Binary files /dev/null and b/joss.06309/media/fb0b6c03997f222be5f8ed5c25f9e4ff98bf75be.png differ

True	Mode 1
Value	Post. Mean	Post. SD
+ + β1=10	10.0285	0.1000
+ + β2=±20	4.2187	0.1719
+ + β3=0.5	0.4854	0.0004
+ + β4=10	10.0987	0.0491
+ + β5=5	5.0182	0.1142
+ + β6=0	0.1113	0.0785
+ + β7=0	0.0707	0.0043
+ + β8=0	-0.1315	0.1008
+ + β9=0	0.0976	0.0387
+ + β10=0	0.1192	0.0463
Option	Type	Description
output_dir	string	Name of output folder where results + files are saved.
log_file	string	Name of the log file which stores the + iteration number, annealing temperature, and value of + the loss function at each iteration.
seed	int	Seed for the random number + generator.
Option	Type	Description
n_sample	int	Batch size used when saving results to + the disk (i.e., once every + save_interval iterations).
calibrate_interval	int	Number of NF iteration between + successive updates of the surrogate model (default + 1000).
budget	int	Maximum allowable number of true model + evaluations.
surr_pre_it	int	Number of pre-training iterations for + surrogate model (default + 40000).
surr_upd_it	int	Number of iterations for the surrogate + model update (default + 6000).
surr_folder	string	Folder where the surrogate model is + stored (default + ’./’).
use_new_surr	bool	Start by pre-training a new surrogate + and ignore existing surrogates (default + True).
store_surr_interval	int	Save interval for surrogate model + (None for no + save, default + None).
Option	Type	Description
optimizer	string	Type of SGD optimizer (default + ’Adam’).
lr	float	Learning rate (default + 0.003).
lr_decay	float	Learning rate decay (default + 0.9999).
lr_scheduler	string	Type of learning rate scheduler + (’StepLR’ or + ’ExponentialLR’).
lr_step	int	Number of steps before learning rate + reduction for the step scheduler.
log_interval	int	Number of iterations between successive + loss printouts (default + 10).
Option	Type	Description
name	str	Name of the experiment.
flow_type	str	type of normalizing flow + (’maf’,’realnvp’).
n_blocks	int	Number of normalizing flow layers + (default + 5).
hidden_size	int	Number of neurons in MADE and RealNVP + hidden layers (default + 100).
n_hidden	int	Number of hidden layers in MADE + (default 1).
activation_fn	str	Activation function for MADE network + used by MAF (default + ’relu’).
input_order	str	Input order for MADE mask creation + (’sequential’ or + ’random’, + default + ’sequential’).
batch_norm_order	bool	Adds batchnorm layer after each MAF or + RealNVP layer (default + True).
save_interval	int	How often to save results from the + normalizing flow iterations. Saved results include + posterior samples, loss profile, samples from the + posterior predictive distribution, observations, and + marginal statistics.
input_size	int	Input dimensionality (default + 2).
batch_size	int	Number of samples from the basic + distribution generated at each iteration (default + 100).
true_data_num	int	Number of additional true model + evaluations at each surrogate model update (default + 2).
n_iter	int	Total number of NF iterations (default + 25001).
Option	Type	Description
annealing	bool	Flag to activate the annealing + scheduler. If this is + False, the + target posterior distribution is left unchanged during + the iterations.
scheduler	string	Type of annealing scheduler + (’AdaAnn’ or + ’fixed’, default + ’AdaAnn’).
tol	float	KL tolerance. It is kept constant + during inference and used in the numerator of equation + 2.
t0	float	Initial inverse temperature.
N	int	Number of batch samples during + annealing.
N_1	int	Number of batch samples at + + + t=1.
T_0	int	Number of initial parameter updates at + + + t0.
T	int	Number of parameter updates after each + temperature update. During such updates the temperature + is kept fixed.
T_1	int	Number of parameter updates at + + + t=1
M	int	Number of Monte Carlo samples used to + evaluate the denominator in equation + 2.