From 4a00cadeadd28d1dc710d3d94835e8657beb4599 Mon Sep 17 00:00:00 2001 From: "Apostolos Chalkis (TolisChal)" Date: Tue, 17 Oct 2023 07:00:09 -0600 Subject: [PATCH] No-U-Turn Sampler in HMC (#216) * initialize nuts sampler class * implement the burnin of nuts sampler * add tests and resolve bugs * implement e0 estimation in burnin of nuts * optimize leapfrog * integrate nuts into the R interface * document random walk in sample_points.cpp in R interface * fix burnin for the non-truncated case * resolve bugs in hmc and nuts pipelines * improve the preprocess in burin step of nuts * split large lines in sample_points.cpp * Add NUTS C++ example and update CMake (#29) * resolve PR comments * fix minor bug * fix compiler bug * fix error in building the C++ examples * resolve warnings in sample_points * fix lpsolve cran warning * fix cran warning on mac * improve lpsolve cmake for cran check * fix R warning in mac test * remove lpsolve header * resolve PR comments --------- Co-authored-by: Marios Papachristou Co-authored-by: Apostolos Chalkis (TolisChal) --- R-proj/R/RcppExports.R | 5 - R-proj/examples/logconcave/nuts_rand_poly.R | 55 +++ .../logconcave/simple_hmc_rand_poly.R | 4 +- R-proj/man/sample_points.Rd | 18 +- R-proj/src/sample_points.cpp | 54 ++- examples/logconcave/CMakeLists.txt | 18 +- external/cmake-files/LPSolve.cmake | 2 +- include/cartesian_geom/point.h | 1 + include/ode_solvers/leapfrog.hpp | 39 +- .../estimate_L_smooth_parameter.hpp | 73 ++++ .../hamiltonian_monte_carlo_walk.hpp | 9 +- include/random_walks/nuts_hmc_walk.hpp | 380 ++++++++++++++++++ include/random_walks/random_walks.hpp | 1 + include/sampling/random_point_generators.hpp | 19 +- include/sampling/sampling.hpp | 38 +- test/CMakeLists.txt | 5 + test/logconcave_sampling_test.cpp | 89 ++++ 17 files changed, 732 insertions(+), 78 deletions(-) create mode 100644 R-proj/examples/logconcave/nuts_rand_poly.R create mode 100644 include/preprocess/estimate_L_smooth_parameter.hpp create mode 100644 include/random_walks/nuts_hmc_walk.hpp diff --git a/R-proj/R/RcppExports.R b/R-proj/R/RcppExports.R index 5713d11a3..c3f192aee 100644 --- a/R-proj/R/RcppExports.R +++ b/R-proj/R/RcppExports.R @@ -361,11 +361,6 @@ sample_points <- function(P, n, random_walk = NULL, distribution = NULL, seed = .Call(`_volesti_sample_points`, P, n, random_walk, distribution, seed) } -#' @export -sample_spectra <- function(file = NULL, N = NULL, walk_length = NULL) { - .Call(`_volesti_sample_spectra`, file, N, walk_length) -} - #' Write a SDPA format file #' #' Outputs a spectrahedron (the matrices defining a linear matrix inequality) and a vector (the objective function) diff --git a/R-proj/examples/logconcave/nuts_rand_poly.R b/R-proj/examples/logconcave/nuts_rand_poly.R new file mode 100644 index 000000000..00400ba72 --- /dev/null +++ b/R-proj/examples/logconcave/nuts_rand_poly.R @@ -0,0 +1,55 @@ +# VolEsti (volume computation and sampling library) + +# Copyright (c) 2012-2020 Vissarion Fisikopoulos +# Copyright (c) 2018-2020 Apostolos Chalkis +# Copyright (c) 2020-2020 Marios Papachristou + +# Contributed and/or modified by Marios Papachristou, as part of Google Summer of Code 2020 program. + +# Licensed under GNU LGPL.3, see LICENCE file + +# Example script for using the logconcave sampling methods + +# Import required libraries +library(ggplot2) +library(volesti) + +# Sampling from logconcave density example + +# Helper function +norm_vec <- function(x) sqrt(sum(x^2)) + +# Negative log-probability oracle +f <- function(x) (norm_vec(x)^2 + sum(x)) + +# Negative log-probability gradient oracle +grad_f <- function(x) (2 * x + 1) + +dimension <- 50 +facets <- 200 + +# Create domain of truncation +H <- gen_rand_hpoly(dimension, facets, seed = 15) + +# Rounding +Tr <- rounding(H, seed = 127) + +P <- Hpolytope$new(A = Tr$Mat[1:nrow(Tr$Mat), 2:ncol(Tr$Mat)], b = Tr$Mat[,1]) + +x_min = matrix(0, dimension, 1) + +# Warm start point from truncated Gaussian +warm_start <- sample_points(P, n = 1, random_walk = list("nburns" = 5000), distribution = list("density" = "gaussian", "variance" = 1/2, "mode" = x_min)) + +# Sample points +n_samples <- 20000 + +samples <- sample_points(P, n = n_samples, random_walk = list("walk" = "NUTS", "solver" = "leapfrog", "starting_point" = warm_start[,1]), + distribution = list("density" = "logconcave", "negative_logprob" = f, "negative_logprob_gradient" = grad_f)) + +# Plot histogram +hist(samples[1,], probability=TRUE, breaks = 100) + +psrfs <- psrf_univariate(samples) +n_ess <- ess(samples) + diff --git a/R-proj/examples/logconcave/simple_hmc_rand_poly.R b/R-proj/examples/logconcave/simple_hmc_rand_poly.R index 9f785aaa8..b352c9a16 100644 --- a/R-proj/examples/logconcave/simple_hmc_rand_poly.R +++ b/R-proj/examples/logconcave/simple_hmc_rand_poly.R @@ -29,10 +29,10 @@ dimension <- 50 facets <- 200 # Create domain of truncation -H <- gen_rand_hpoly(dimension, facets) +H <- gen_rand_hpoly(dimension, facets, seed = 15) # Rounding -Tr <- rounding(H) +Tr <- rounding(H, seed = 127) P <- Hpolytope$new(A = Tr$Mat[1:nrow(Tr$Mat), 2:ncol(Tr$Mat)], b = Tr$Mat[,1]) diff --git a/R-proj/man/sample_points.Rd b/R-proj/man/sample_points.Rd index f5ea6f2bf..ca2ef87fa 100644 --- a/R-proj/man/sample_points.Rd +++ b/R-proj/man/sample_points.Rd @@ -13,7 +13,7 @@ sample_points(P, n, random_walk = NULL, distribution = NULL, seed = NULL) \item{random_walk}{Optional. A list that declares the random walk and some related parameters as follows: \itemize{ -\item{\code{walk} }{ A string to declare the random walk: i) \code{'CDHR'} for Coordinate Directions Hit-and-Run, ii) \code{'RDHR'} for Random Directions Hit-and-Run, iii) \code{'BaW'} for Ball Walk, iv) \code{'BiW'} for Billiard walk, v) \code{'dikin'} for dikin walk, vi) \code{'vaidya'} for vaidya walk, vii) \code{'john'} for john walk, viii) \code{'BCDHR'} boundary sampling by keeping the extreme points of CDHR or ix) \code{'BRDHR'} boundary sampling by keeping the extreme points of RDHR x) \code{'HMC'} for Hamiltonian Monte Carlo (logconcave) xi) \code{'ULD'} for Underdamped Langevin Dynamics using the Randomized Midpoint Method. The default walk is \code{'aBiW'} for the uniform distribution or \code{'CDHR'} for the Gaussian distribution and H-polytopes and \code{'BiW'} or \code{'RDHR'} for the same distributions and V-polytopes and zonotopes.} +\item{\code{walk} }{ A string to declare the random walk: i) \code{'CDHR'} for Coordinate Directions Hit-and-Run, ii) \code{'RDHR'} for Random Directions Hit-and-Run, iii) \code{'BaW'} for Ball Walk, iv) \code{'BiW'} for Billiard walk, v) \code{'dikin'} for dikin walk, vi) \code{'vaidya'} for vaidya walk, vii) \code{'john'} for john walk, viii) \code{'BCDHR'} boundary sampling by keeping the extreme points of CDHR or ix) \code{'BRDHR'} boundary sampling by keeping the extreme points of RDHR x) \code{'NUTS'} for NUTS Hamiltonian Monte Carlo sampler (logconcave densities) xi) \code{'HMC'} for Hamiltonian Monte Carlo (logconcave densities) xii) \code{'ULD'} for Underdamped Langevin Dynamics using the Randomized Midpoint Method (logconcave densities) xiii) \code{'ExactHMC'} for exact Hamiltonian Monte Carlo with reflections (spherical Gaussian or exponential distribution). The default walk is \code{'aBiW'} for the uniform distribution, \code{'CDHR'} for the Gaussian distribution and H-polytopes and \code{'BiW'} or \code{'RDHR'} for the same distributions and V-polytopes and zonotopes. \code{'NUTS'} is the default sampler for logconcave densities.} \item{\code{walk_length} }{ The number of the steps per generated point for the random walk. The default value is \eqn{1}.} \item{\code{nburns} }{ The number of points to burn before start sampling. The default value is \eqn{1}.} \item{\code{starting_point} }{ A \eqn{d}-dimensional numerical vector that declares a starting point in the interior of the polytope for the random walk. The default choice is the center of the ball as that one computed by the function \code{inner_ball()}.} @@ -25,13 +25,14 @@ sample_points(P, n, random_walk = NULL, distribution = NULL, seed = NULL) \item{distribution}{Optional. A list that declares the target density and some related parameters as follows: \itemize{ -\item{\code{density} }{ A string: (a) \code{'uniform'} for the uniform distribution or b) \code{'gaussian'} for the multidimensional spherical distribution. The default target distribution is uniform. c) Logconcave with form proportional to exp(-f(x)) where f(x) is L-smooth and m-strongly-convex. } -\item{\code{variance} }{ The variance of the multidimensional spherical gaussian. The default value is 1.} +\item{\code{density} }{ A string: (a) \code{'uniform'} for the uniform distribution or b) \code{'gaussian'} for the multidimensional spherical distribution c) \code{logconcave} with form proportional to exp(-f(x)) where f(x) is L-smooth and m-strongly-convex d) \code{'exponential'} for the exponential distribution. The default target distribution is the uniform distribution.} +\item{\code{variance} }{ The variance of the multidimensional spherical gaussian or the exponential distribution. The default value is 1.} \item{\code{mode} }{ A \eqn{d}-dimensional numerical vector that declares the mode of the Gaussian distribution. The default choice is the center of the as that one computed by the function \code{inner_ball()}.} -\item{\code{L_}} { Smoothness constant (for logconcave). } -\item{\code{m}} { Strong-convexity constant (for logconcave). } -\item{\code{negative_logprob}} { Negative log-probability (for logconcave). } -\item{\code{negative_logprob_gradient}} { Negative log-probability gradient (for logconcave). } +\item{\code{bias} }{ The bias vector for the exponential distribution. The default vector is \eqn{c_1 = 1} and \eqn{c_i = 0} for \eqn{i \neq 1}.} +\item{\code{L_} }{ Smoothness constant (for logconcave). } +\item{\code{m} }{ Strong-convexity constant (for logconcave). } +\item{\code{negative_logprob} }{ Negative log-probability (for logconcave). } +\item{\code{negative_logprob_gradient} }{ Negative log-probability gradient (for logconcave). } }} \item{seed}{Optional. A fixed seed for the number generator.} @@ -75,4 +76,7 @@ points = sample_points(P, n = 100, random_walk = list("walk" = "BRDHR")) \cite{Shen, Ruoqi, and Yin Tat Lee, \dQuote{"The randomized midpoint method for log-concave sampling.",} \emph{Advances in Neural Information Processing Systems}, 2019.} + +\cite{Augustin Chevallier, Sylvain Pion, Frederic Cazals, +\dQuote{"Hamiltonian Monte Carlo with boundary reflections, and application to polytope volume calculations,"} \emph{Research Report preprint hal-01919855}, 2018.} } diff --git a/R-proj/src/sample_points.cpp b/R-proj/src/sample_points.cpp index b1150877f..4756d0915 100644 --- a/R-proj/src/sample_points.cpp +++ b/R-proj/src/sample_points.cpp @@ -36,6 +36,7 @@ enum random_walks { brdhr, bcdhr, hmc, + nuts, gaussian_hmc, exponential_hmc, uld @@ -213,6 +214,26 @@ void sample_from_polytope(Polytope &P, int type, RNGType &rng, PointList &randPo } break; + case nuts: + + logconcave_sampling < + PointList, + Polytope, + RNGType, + NutsHamiltonianMonteCarloWalk, + NT, + Point, + NegativeGradientFunctor, + NegativeLogprobFunctor, + LeapfrogODESolver < + Point, + NT, + Polytope, + NegativeGradientFunctor + > + >(randPoints, P, rng, walkL, numpoints, StartingPoint, nburns, *F, *f); + + break; case uld: logconcave_sampling < @@ -244,7 +265,16 @@ void sample_from_polytope(Polytope &P, int type, RNGType &rng, PointList &randPo //' @param n The number of points that the function is going to sample from the convex polytope. //' @param random_walk Optional. A list that declares the random walk and some related parameters as follows: //' \itemize{ -//' \item{\code{walk} }{ A string to declare the random walk: i) \code{'CDHR'} for Coordinate Directions Hit-and-Run, ii) \code{'RDHR'} for Random Directions Hit-and-Run, iii) \code{'BaW'} for Ball Walk, iv) \code{'BiW'} for Billiard walk, v) \code{'dikin'} for dikin walk, vi) \code{'vaidya'} for vaidya walk, vii) \code{'john'} for john walk, viii) \code{'BCDHR'} boundary sampling by keeping the extreme points of CDHR or ix) \code{'BRDHR'} boundary sampling by keeping the extreme points of RDHR x) \code{'HMC'} for Hamiltonian Monte Carlo (logconcave densities) xi) \code{'ULD'} for Underdamped Langevin Dynamics using the Randomized Midpoint Method xii) \code{'ExactHMC'} for exact Hamiltonian Monte Carlo with reflections (spherical Gaussian or exponential distribution). The default walk is \code{'aBiW'} for the uniform distribution or \code{'CDHR'} for the Gaussian distribution and H-polytopes and \code{'BiW'} or \code{'RDHR'} for the same distributions and V-polytopes and zonotopes.} +//' \item{\code{walk} }{ A string to declare the random walk: i) \code{'CDHR'} for Coordinate Directions Hit-and-Run, +//' ii) \code{'RDHR'} for Random Directions Hit-and-Run, iii) \code{'BaW'} for Ball Walk, +//' iv) \code{'BiW'} for Billiard walk, v) \code{'dikin'} for dikin walk, vi) \code{'vaidya'} for vaidya walk, +//' vii) \code{'john'} for john walk, viii) \code{'BCDHR'} boundary sampling by keeping the extreme points of CDHR or +//' ix) \code{'BRDHR'} boundary sampling by keeping the extreme points of RDHR x) \code{'NUTS'} for +//' NUTS Hamiltonian Monte Carlo sampler (logconcave densities) xi) \code{'HMC'} for Hamiltonian Monte Carlo (logconcave densities) +//' xii) \code{'ULD'} for Underdamped Langevin Dynamics using the Randomized Midpoint Method (logconcave densities) +//' xiii) \code{'ExactHMC'} for exact Hamiltonian Monte Carlo with reflections (spherical Gaussian or exponential distribution). +//' The default walk is \code{'aBiW'} for the uniform distribution, \code{'CDHR'} for the Gaussian distribution and H-polytopes +//' and \code{'BiW'} or \code{'RDHR'} for the same distributions and V-polytopes and zonotopes. \code{'NUTS'} is the default sampler for logconcave densities.} //' \item{\code{walk_length} }{ The number of the steps per generated point for the random walk. The default value is \eqn{1}.} //' \item{\code{nburns} }{ The number of points to burn before start sampling. The default value is \eqn{1}.} //' \item{\code{starting_point} }{ A \eqn{d}-dimensional numerical vector that declares a starting point in the interior of the polytope for the random walk. The default choice is the center of the ball as that one computed by the function \code{inner_ball()}.} @@ -410,18 +440,24 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable P, Rcpp::Function negative_logprob = Rcpp::as(distribution)["negative_logprob"]; Rcpp::Function negative_logprob_gradient = Rcpp::as(distribution)["negative_logprob_gradient"]; - NT L_, m; + NT L_ = 1, m = 1; if (Rcpp::as(distribution).containsElementNamed("L_")) { L_ = Rcpp::as(Rcpp::as(distribution)["L_"]); + if (L_ <= NT(0)) { + throw Rcpp::exception("The smoothness constant must be positive"); + } } else { - throw Rcpp::exception("The smoothness constant is absent"); + L_ = -1; } if (Rcpp::as(distribution).containsElementNamed("m")) { m = Rcpp::as(Rcpp::as(distribution)["m"]); + if (m <= NT(0)) { + throw Rcpp::exception("The strong-convexity constant must be positive"); + } } else { - throw Rcpp::exception("The strong-convexity constant is absent"); + m = -1; } if (Rcpp::as(random_walk).containsElementNamed("step_size")) { @@ -443,7 +479,6 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable P, throw Rcpp::exception("Invalid ODE solver specified. Aborting."); } } else { - Rcpp::warning("Solver set to leapfrog."); solver = leapfrog; } @@ -495,6 +530,8 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable P, throw Rcpp::exception("Exponential sampling is supported only for H-polytopes"); } walk = exponential_hmc; + } else if (logconcave) { + walk = nuts; } else if (gaussian) { if (type == 1) { walk = cdhr; @@ -571,7 +608,12 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable P, } } else if (Rcpp::as(Rcpp::as(random_walk)["walk"]).compare(std::string("HMC")) == 0) { if (!logconcave) throw Rcpp::exception("HMC is not supported for non first-order sampling"); + if (F->params.L < 0) throw Rcpp::exception("The smoothness constant is absent"); + if (F->params.m < 0) throw Rcpp::exception("The strong-convexity constant is absent"); walk = hmc; + } else if (Rcpp::as(Rcpp::as(random_walk)["walk"]).compare(std::string("NUTS")) == 0) { + if (!logconcave) throw Rcpp::exception("NUTS is not supported for non first-order sampling"); + walk = nuts; } else if (Rcpp::as(Rcpp::as(random_walk)["walk"]).compare(std::string("ULD")) == 0) { if (!logconcave) throw Rcpp::exception("ULD is not supported for non first-order sampling"); walk = uld; @@ -703,7 +745,7 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable P, if (numpoints % 2 == 1 && (walk == brdhr || walk == bcdhr)) numpoints--; MT RetMat(dim, numpoints); unsigned int jj = 0; - + for (typename std::list::iterator rpit = randPoints.begin(); rpit!=randPoints.end(); rpit++, jj++) { if (gaussian) { RetMat.col(jj) = (*rpit).getCoefficients() + mode.getCoefficients(); diff --git a/examples/logconcave/CMakeLists.txt b/examples/logconcave/CMakeLists.txt index 0448ce3e3..33725d0c9 100644 --- a/examples/logconcave/CMakeLists.txt +++ b/examples/logconcave/CMakeLists.txt @@ -20,7 +20,7 @@ endif(COMMAND cmake_policy) option(DISABLE_NLP_ORACLES "Disable non-linear oracles (used in collocation)" ON) option(BUILTIN_EIGEN "Use eigen from ../external" OFF) -option(USE_MKL "Use MKL library to build eigen" ON) +option(USE_MKL "Use MKL library to build eigen" OFF) if(DISABLE_NLP_ORACLES) add_definitions(-DDISABLE_NLP_ORACLES) @@ -73,16 +73,24 @@ else () include_directories(BEFORE /usr/include/eigen3) endif(BUILTIN_EIGEN) +find_library(BLAS NAMES libblas.so libblas.dylib PATHS /usr/local/Cellar/lapack/3.9.1_1/lib /usr/lib/x86_64-linux-gnu /usr/lib/i386-linux-gnu /usr/local/Cellar/openblas/0.3.15_1/lib /usr/lib) + if (USE_MKL) - find_library(BLAS NAME libblas.so PATHS /usr/lib/x86_64-linux-gnu /usr/lib/i386-linux-gnu) + find_library(BLAS NAMES libblas.so libblas.dylib PATHS /usr/local/Cellar/lapack/3.9.1_1/lib /usr/lib/x86_64-linux-gnu /usr/lib/i386-linux-gnu /usr/local/Cellar/openblas/0.3.15_1/lib /usr/lib) + find_library(GFORTRAN NAME libgfortran.dylib PATHS /usr/local/Cellar/gcc/10.2.0_4/lib/gcc/10) + find_library(LAPACK NAME liblapack.dylib PATHS /usr/lib) + find_library(OPENMP NAME libiomp5.dylib PATHS /opt/intel/oneapi/compiler/2021.1.1/mac/compiler/lib) + include_directories (BEFORE ${MKLROOT}/include) - set(PROJECT_LIBS ${BLAS_LIBRARIES}) + set(PROJECT_LIBS ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${GFORTRAN_LIBRARIES}) set(MKL_LINK "-L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl") add_definitions(-DEIGEN_USE_MKL_ALL) +else() + set(MKL_LINK "") endif(USE_MKL) # Find lpsolve library -find_library(LP_SOLVE NAMES liblpsolve55.so PATHS /usr/lib/lp_solve) +find_library(LP_SOLVE NAMES liblpsolve55.so liblpsolve55.dylib PATHS /usr/lib/lp_solve /usr/local/lib) if (NOT LP_SOLVE) message(FATAL_ERROR "This program requires the lp_solve library, and will not be compiled.") @@ -145,6 +153,6 @@ else () TARGET_LINK_LIBRARIES(simple_hmc ${LP_SOLVE} ${BLAS} ${MKL_LINK} ) TARGET_LINK_LIBRARIES(exponential_exact_hmc ${LP_SOLVE} ${BLAS} ${MKL_LINK} ) TARGET_LINK_LIBRARIES(gaussian_exact_hmc ${LP_SOLVE} ${BLAS} ${MKL_LINK} ) - + endif() diff --git a/external/cmake-files/LPSolve.cmake b/external/cmake-files/LPSolve.cmake index b3f3cf98b..c5a173837 100644 --- a/external/cmake-files/LPSolve.cmake +++ b/external/cmake-files/LPSolve.cmake @@ -26,7 +26,7 @@ function(GetLPSolve) message(STATUS "lp_solve library found: ${LP_SOLVE_DIR}") endif() - + include_directories(${LP_SOLVE_DIR}) endfunction() diff --git a/include/cartesian_geom/point.h b/include/cartesian_geom/point.h index a2209f4f5..b8c8e32b4 100644 --- a/include/cartesian_geom/point.h +++ b/include/cartesian_geom/point.h @@ -81,6 +81,7 @@ class point void set_dimension(const unsigned int dim) { d = dim; + coeffs.setZero(d); } void set_coord(const unsigned int i, FT coord) diff --git a/include/ode_solvers/leapfrog.hpp b/include/ode_solvers/leapfrog.hpp index 4c1f299b0..b7f2724b4 100644 --- a/include/ode_solvers/leapfrog.hpp +++ b/include/ode_solvers/leapfrog.hpp @@ -56,6 +56,8 @@ struct LeapfrogODESolver { pts xs; pts xs_prev; + Point grad_x; + MT _AA; std::pair pbpair; @@ -69,11 +71,11 @@ struct LeapfrogODESolver { eta(step), eta0(step), t(initial_time), F(oracle), Ks(boundaries), xs(initial_state), adaptive(adaptive_) { dim = xs[0].dimension(); _update_parameters = update_parameters(); + grad_x.set_dimension(dim); initialize(); }; - void initialize() { for (unsigned int i = 0; i < xs.size(); i++) { VT ar, av; @@ -88,7 +90,6 @@ struct LeapfrogODESolver { Av.push_back(av); lambda_prev.push_back(NT(0)); } - //step(); } void disable_adaptive() { @@ -101,27 +102,28 @@ struct LeapfrogODESolver { void step(int k, bool accepted) { num_steps++; - if (adaptive) eta = (eta0 * num_steps) / (num_steps + num_reflections); xs_prev = xs; unsigned int x_index, v_index, it; t += eta; + Point y; for (unsigned int i = 1; i < xs.size(); i += 2) { - //pbpair.second = -1; + x_index = i - 1; v_index = i; // v' <- v + eta / 2 F(x) - Point z = F(v_index, xs_prev, t); - z = (eta / 2) * z; - xs[v_index] = xs[v_index] + z; + if (k == 0 && !accepted) { + grad_x = F(v_index, xs_prev, t); + } + xs[v_index] += (eta / 2) * grad_x; // x <- x + eta v' - Point y = xs[v_index]; + y = xs[v_index]; if (Ks[x_index] == NULL) { - xs[x_index] = xs_prev[x_index] + eta*y; + xs[x_index] += eta*y; } else { // Find intersection (assuming a line trajectory) between x and y @@ -173,10 +175,8 @@ struct LeapfrogODESolver { } // tilde v <- v + eta / 2 F(tilde x) - z = F(v_index, xs, t); - z = (eta / 2) * z; - xs[v_index] = xs[v_index] + z; - + grad_x = F(v_index, xs, t); + xs[v_index] += (eta / 2) * grad_x; } } @@ -202,6 +202,19 @@ struct LeapfrogODESolver { xs[index] = p; } + NT get_eta() { + return eta; + } + + void set_eta(NT &eta_) { + eta = eta_; + eta0 = eta_; + } + + bounds get_bounds() { + return Ks; + } + }; diff --git a/include/preprocess/estimate_L_smooth_parameter.hpp b/include/preprocess/estimate_L_smooth_parameter.hpp new file mode 100644 index 000000000..8f5a1e89c --- /dev/null +++ b/include/preprocess/estimate_L_smooth_parameter.hpp @@ -0,0 +1,73 @@ +// VolEsti (volume computation and sampling library) + +// Copyright (c) 2012-2020 Vissarion Fisikopoulos +// Copyright (c) 2018-2020 Apostolos Chalkis + +//Contributed and/or modified by Alexandros Manochis, as part of Google Summer of Code 2020 program. + +// Licensed under GNU LGPL.3, see LICENCE file + + +#ifndef ESTIMATE_L_SMOOTH_PARAMETER_HPP +#define ESTIMATE_L_SMOOTH_PARAMETER_HPP + +#include "random_walks/random_walks.hpp" + +template +< + typename WalkTypePolicy = AcceleratedBilliardWalk, + typename Polytope, + typename Point, + typename NegativeGradientFunctor, + typename RandomNumberGenerator +> +double estimate_L_smooth(Polytope &P, Point &p, unsigned int const& walk_length, + NegativeGradientFunctor F, RandomNumberGenerator &rng) +{ + typedef typename Point::FT NT; + typedef typename WalkTypePolicy::template Walk + < + Polytope, + RandomNumberGenerator + > RandomWalk; + + P.ComputeInnerBall(); + + unsigned int d = P.dimension(); + unsigned int rnum = 5 * d; + std::vector randPoints(1); + std::vector vecPoint1; + std::vector vecPoint2; + std::vector< std::vector > listOfPoints; + Point F1; + + RandomWalk walk(P, p, rng); + for (unsigned int i=0; i::lowest(), Ltemp; + + for (auto pit=listOfPoints.begin(); pit!=(listOfPoints.end()-1); ++pit) + { + F1 = F(1, *pit, 0); + + for (auto qit=(pit+1); qit!=listOfPoints.end(); ++qit) + { + vecPoint2 = *qit; + Ltemp = (F1 - F(1, *qit, 0)).length() / ((*pit)[0] - (*qit)[0]).length(); + + if (Ltemp > L) + { + L = Ltemp; + } + } + } + return L; +} + + +#endif diff --git a/include/random_walks/hamiltonian_monte_carlo_walk.hpp b/include/random_walks/hamiltonian_monte_carlo_walk.hpp index 3c50ca5e9..23342ea57 100644 --- a/include/random_walks/hamiltonian_monte_carlo_walk.hpp +++ b/include/random_walks/hamiltonian_monte_carlo_walk.hpp @@ -114,12 +114,10 @@ struct HamiltonianMonteCarloWalk { }; - inline void apply( - RandomNumberGenerator &rng, - int walk_length=1, - bool metropolis_filter=true) + inline void apply(RandomNumberGenerator &rng, + int walk_length=1, + bool metropolis_filter=true) { - num_runs++; // Pick a random velocity @@ -156,6 +154,7 @@ struct HamiltonianMonteCarloWalk { } } else { x = x_tilde; + accepted = true; } discard_ratio = (1.0 * total_discarded_samples) / num_runs; diff --git a/include/random_walks/nuts_hmc_walk.hpp b/include/random_walks/nuts_hmc_walk.hpp new file mode 100644 index 000000000..857064c0e --- /dev/null +++ b/include/random_walks/nuts_hmc_walk.hpp @@ -0,0 +1,380 @@ +// VolEsti (volume computation and sampling library) + +// Copyright (c) 2012-2022 Vissarion Fisikopoulos +// Copyright (c) 2018-2022 Apostolos Chalkis +// Copyright (c) 2020-2022 Elias Tsigaridas +// Copyright (c) 2020-2022 Marios Papachristou + +// Licensed under GNU LGPL.3, see LICENCE file + +// References +// Matthew D. Hoffman, Andrew Gelman. "The No-U-Turn Sampler: +// Adaptively Setting Path Lengths in Hamiltonian Monte Carlo", 2011. + +// Comment: Compared to [Matthew D. Hoffman, Andrew Gelman, 2011] +// we modify the step of Nesterov's algorithm in the burn in phase. + +#ifndef NUTS_HAMILTONIAN_MONTE_CARLO_WALK_HPP +#define NUTS_HAMILTONIAN_MONTE_CARLO_WALK_HPP + + +#include "generators/boost_random_number_generator.hpp" +#include "random_walks/gaussian_helpers.hpp" +#include "ode_solvers/ode_solvers.hpp" +#include "preprocess/estimate_L_smooth_parameter.hpp" + +struct NutsHamiltonianMonteCarloWalk { + + template + < + typename NT, + typename OracleFunctor + > + struct parameters { + NT epsilon; // tolerance in mixing + NT eta; // step size + + parameters( + OracleFunctor const& F, + unsigned int dim, + NT epsilon_=2) + { + epsilon = epsilon_; + eta = F.params.L > 0 ? 10.0 / (dim * sqrt(F.params.L)) : 0.005; + } + }; + + template + < + typename Point, + typename Polytope, + typename RandomNumberGenerator, + typename NegativeGradientFunctor, + typename NegativeLogprobFunctor, + typename Solver + > + struct Walk { + + typedef std::vector pts; + typedef typename Point::FT NT; + typedef std::vector bounds; + + // Hyperparameters of the sampler + parameters ¶ms; + + // Numerical ODE solver + Solver *solver; + + // Dimension + unsigned int dim; + + // Discarded Samples + long num_runs = 0; + long total_acceptance = 0; + + // Average acceptance probability + NT average_acceptance = 0; + + // References to xs + Point x, v; + + // Helper points + Point v_pl, v_min, v_min_j, v_pl_j, X_pl, X_pl_j, X_min, X, X_rnd_j, X_min_j, x_pl_min; + + // Gradient function + NegativeGradientFunctor &F; + + bool accepted; + + // Burnin parameters + NT eps_step, mu, log_tilde_eps, H_tilde, alpha, na; + const NT delta = NT(0.65), Delta_max = NT(1000), gamma = NT(0.05), t0 = NT(10), kk = NT(0.85); + + // Density exponent + NegativeLogprobFunctor &f; + + Walk(Polytope *P, + Point &p, + NegativeGradientFunctor &neg_grad_f, + NegativeLogprobFunctor &neg_logprob_f, + parameters ¶m, + bool burn_in_phase = true) : + params(param), + F(neg_grad_f), + f(neg_logprob_f) + { + dim = p.dimension(); + + v_pl.set_dimension(dim); + v_min.set_dimension(dim); + v_min_j.set_dimension(dim); + v_pl_j.set_dimension(dim); + X_pl.set_dimension(dim); + X_pl_j.set_dimension(dim); + X_min.set_dimension(dim); + X.set_dimension(dim); + X_rnd_j.set_dimension(dim); + X_min_j.set_dimension(dim); + x_pl_min.set_dimension(dim); + + eps_step = params.eta; + mu = std::log(10*eps_step); + log_tilde_eps = NT(0); + H_tilde = NT(0); + alpha = NT(0); + na = NT(0); + + // Starting point is provided from outside + x = p; + + accepted = false; + + // Initialize solver + solver = new Solver(0, params.eta, pts{x, x}, F, bounds{P, NULL}); + disable_adaptive(); + + if (burn_in_phase) + { + RandomNumberGenerator rng(dim); + burnin(rng); + } + }; + + + inline void burnin(RandomNumberGenerator &rng, + unsigned int N = 1000, + unsigned int walk_length=1) + { + reset_num_runs(); + Point p = x; + NT L; + + if ((solver->get_bounds())[0] == NULL) + { + L = (NT(100) / NT(dim)) * (NT(100) / NT(dim)); + } + else + { + Polytope K = *(solver->get_bounds())[0]; + L = estimate_L_smooth(K, p, walk_length, F, rng); + } + + eps_step = NT(5) / (NT(dim) * std::sqrt(L)); + solver->set_eta(eps_step); + + for (int i = 0; i < N; i++) + { + apply(rng, walk_length, true); + solver->set_eta(eps_step); + } + reset_num_runs(); + } + + + inline void apply(RandomNumberGenerator &rng, + unsigned int walk_length=1, + bool burnin = false) + { + num_runs++; + + int x_counting_total = 0; + + // Pick a random velocity + v = GetDirection::apply(dim, rng, false); + + v_pl = v; + v_min = NT(-1) * v; + X_pl = x; + X_min = x; + + NT h1 = hamiltonian(x, v); + + NT uu = std::log(rng.sample_urdist()) - h1; + int j = -1; + bool s = true; + bool updated = false; + bool pos_state_single = false; + + if (burnin) + { + alpha = NT(0); + } + + while (s) + { + j++; + + if (burnin) + { + na = std::pow(NT(2), NT(j)); + } + + NT dir = rng.sample_urdist(); + + if (dir > 0.5) + { + v = v_pl; + X = X_pl; + } + else + { + v = v_min; + X = X_min; + } + X_rnd_j = X; + + int x_counting = 0; + int num_samples = int(std::pow(NT(2), NT(j))); + accepted = false; + + for (int k = 1; k <= num_samples; k++) + { + if (!accepted) + { + solver->set_state(0, X); + solver->set_state(1, v); + } + + // Get proposals + solver->steps(walk_length, accepted); + accepted = true; + + X = solver->get_state(0); + v = solver->get_state(1); + + NT hj = hamiltonian(X, v); + + if (burnin) + { + alpha += std::min(NT(1), std::exp(-hj + h1)); + } + + if (uu > Delta_max - hj) + { + s = false; + break; + } + + bool pos_state = false; + if (uu < -hj) + { + pos_state = true; + pos_state_single = true; + x_counting = x_counting + 1; + x_counting_total = x_counting_total + 1; + } + + if (k == 1) + { + if (dir > 0.5) + { + X_min_j = X; + v_min_j = v; + } + else + { + X_pl_j = X; + v_pl_j = v; + } + } + if (k == num_samples) + { + if (dir > 0.5) + { + x_pl_min = X - X_min_j; + if ((x_pl_min.dot(v) < 0) || (x_pl_min.dot(v_min_j) < 0)) + { + s = false; + } + } + else + { + x_pl_min = X_pl_j - X; + if ((x_pl_min.dot(v) < 0) || (x_pl_min.dot(v_pl_j) < 0)) + { + s = false; + } + } + } + if ((rng.sample_urdist() < (1/NT(x_counting))) && pos_state) + { + X_rnd_j = X; + } + } + + if (dir > 0.5) + { + X_pl = X; + v_pl = v; + } + else + { + X_min = X; + v_min = v; + } + + if (s && (rng.sample_urdist() < (NT(x_counting) / NT(x_counting_total)))) + { + x = X_rnd_j; + if (pos_state_single) + { + updated = true; + } + } + + if (s) + { + x_pl_min = X_pl - X_min; + if ((x_pl_min.dot(v_min) < 0) || (x_pl_min.dot(v_pl) < 0)) + { + s = false; + } + } + } + + if (updated) + { + total_acceptance++; + } + average_acceptance = NT(total_acceptance) / NT(num_runs); + + if (burnin) + { + H_tilde = (NT(1) - NT(1) / (NT(num_runs) + t0)) * H_tilde + (NT(1) / (NT(num_runs) + t0)) * (delta - alpha / na); + NT log_eps = mu - (std::sqrt(NT(num_runs)) / gamma) * H_tilde; + + // TODO: use the following to generalize Nesterov's algorithm + //log_tilde_eps = std::pow(mu,-kk) * log_eps + (NT(1) - std::pow(mu,-kk))*log_tilde_eps; + + eps_step = std::exp(log_eps); + } + } + + inline NT hamiltonian(Point &pos, Point &vel) const { + return f(pos) + 0.5 * vel.dot(vel); + } + + inline NT get_eta_solver() { + return solver->get_eta(); + } + + void disable_adaptive() { + solver->disable_adaptive(); + } + + void enable_adaptive() { + solver->enable_adaptive(); + } + + void reset_num_runs() { + num_runs = 0; + total_acceptance = 0; + } + + NT get_ratio_acceptance() { + return average_acceptance; + } + }; +}; + +#endif // HAMILTONIAN_MONTE_CARLO_WALK_HPP diff --git a/include/random_walks/random_walks.hpp b/include/random_walks/random_walks.hpp index 4affee74d..6c36457c1 100644 --- a/include/random_walks/random_walks.hpp +++ b/include/random_walks/random_walks.hpp @@ -28,6 +28,7 @@ #include "random_walks/exponential_hamiltonian_monte_carlo_exact_walk.hpp" #include "random_walks/uniform_accelerated_billiard_walk_parallel.hpp" #include "random_walks/hamiltonian_monte_carlo_walk.hpp" +#include "random_walks/nuts_hmc_walk.hpp" #include "random_walks/langevin_walk.hpp" #include "random_walks/crhmc/crhmc_walk.hpp" #endif // RANDOM_WALKS_RANDOM_WALKS_HPP diff --git a/include/sampling/random_point_generators.hpp b/include/sampling/random_point_generators.hpp index 86b10666c..a2a811dd1 100644 --- a/include/sampling/random_point_generators.hpp +++ b/include/sampling/random_point_generators.hpp @@ -231,26 +231,15 @@ struct LogconcaveRandomPointGenerator { template - < - typename Polytope, - typename Point, - typename PointList, - typename WalkPolicy, - typename RandomNumberGenerator, - typename NegativeGradientFunctor, - typename NegativeLogprobFunctor, - typename Parameters + < typename PointList, + typename WalkPolicy, + typename RandomNumberGenerator > - static void apply(Polytope &P, - Point &p, // a point to start - unsigned int const& rnum, + static void apply(unsigned int const& rnum, unsigned int const& walk_length, PointList &randPoints, WalkPolicy &policy, RandomNumberGenerator &rng, - NegativeGradientFunctor &F, - NegativeLogprobFunctor &f, - Parameters ¶meters, Walk &walk) { typedef double NT; diff --git a/include/sampling/sampling.hpp b/include/sampling/sampling.hpp index 4457b9cb6..0fdab8702 100644 --- a/include/sampling/sampling.hpp +++ b/include/sampling/sampling.hpp @@ -23,10 +23,10 @@ #define SAMPLE_ONLY_H template void uniform_sampling(PointList &randPoints, Polytope &P, @@ -232,14 +232,14 @@ template < typename Solver > void logconcave_sampling(PointList &randPoints, - Polytope &P, - RandomNumberGenerator &rng, - const unsigned int &walk_len, - const unsigned int &rnum, - const Point &starting_point, - unsigned int const& nburns, - NegativeGradientFunctor &F, - NegativeLogprobFunctor &f) + Polytope &P, + RandomNumberGenerator &rng, + const unsigned int &walk_len, + const unsigned int &rnum, + const Point &starting_point, + unsigned int const& nburns, + NegativeGradientFunctor &F, + NegativeLogprobFunctor &f) { typedef typename WalkTypePolicy::template Walk < @@ -272,16 +272,16 @@ void logconcave_sampling(PointList &randPoints, walk logconcave_walk(&P, p, F, f, params); typedef LogconcaveRandomPointGenerator RandomPointGenerator; - - RandomPointGenerator::apply(P, p, nburns, walk_len, randPoints, - push_back_policy, rng, F, f, params, logconcave_walk); - + + if (nburns > 0) { + RandomPointGenerator::apply(nburns, walk_len, randPoints, + push_back_policy, rng, logconcave_walk); + } logconcave_walk.disable_adaptive(); randPoints.clear(); - RandomPointGenerator::apply(P, p, rnum, walk_len, randPoints, - push_back_policy, rng, F, f, params, logconcave_walk); - + RandomPointGenerator::apply(rnum, walk_len, randPoints, + push_back_policy, rng, logconcave_walk); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ddd40e362..d95a49c7f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -318,6 +318,11 @@ else () COMMAND logconcave_sampling_test -tc=uld) add_test(NAME logconcave_sampling_test_exponential_biomass_sampling COMMAND logconcave_sampling_test -tc=exponential_biomass_sampling) + add_test(NAME logconcave_sampling_test_nuts_hmc_truncated + COMMAND logconcave_sampling_test -tc=benchmark_nuts_hmc_truncated) + add_test(NAME logconcave_sampling_test_nuts_hmc + COMMAND logconcave_sampling_test -tc=benchmark_nuts_hmc) + add_executable (crhmc_sampling_test crhmc_sampling_test.cpp $) add_test(NAME crhmc_sampling_test_crhmc diff --git a/test/logconcave_sampling_test.cpp b/test/logconcave_sampling_test.cpp index f8f6901cd..8e53f489b 100644 --- a/test/logconcave_sampling_test.cpp +++ b/test/logconcave_sampling_test.cpp @@ -39,6 +39,8 @@ #include "generators/h_polytopes_generator.h" #include "generators/convex_bodies_generator.h" +#include "diagnostics/univariate_psrf.hpp" + #include "preprocess/svd_rounding.hpp" #include "misc/misc.h" @@ -308,6 +310,80 @@ void benchmark_hmc(bool truncated) { } +template +void benchmark_nuts_hmc(bool truncated) { + typedef Cartesian Kernel; + typedef typename Kernel::Point Point; + typedef std::vector pts; + typedef HPolytope Hpolytope; + typedef BoostRandomNumberGenerator RandomNumberGenerator; + typedef CustomFunctor::GradientFunctor NegativeGradientFunctor; + typedef CustomFunctor::FunctionFunctor NegativeLogprobFunctor; + typedef LeapfrogODESolver Solver; + + typedef Eigen::Matrix MT; + typedef Eigen::Matrix VT; + + NegativeGradientFunctor F; + NegativeLogprobFunctor f; + RandomNumberGenerator rng(1); + unsigned int dim_min = 10; + unsigned int dim_max = 100; + int n_samples = 1000; + long ETA; + bool automatic_burnin = false; + std::chrono::time_point start, stop; + + for (unsigned int dim = dim_min; dim <= dim_max; dim+=10) + { + MT samples(dim, n_samples); + Point x0(dim); + NutsHamiltonianMonteCarloWalk::parameters hmc_params(F, dim); + if (truncated) + { + Hpolytope P = generate_cube(dim, false); + + std::cout << "eta0: " << hmc_params.eta << std::endl; + + NutsHamiltonianMonteCarloWalk::Walk + hmc(&P, x0, F, f, hmc_params, automatic_burnin); + + hmc.burnin(rng); + std::cout << "eta: " << hmc.get_eta_solver() << std::endl; + + start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < n_samples; i++) { + hmc.apply(rng); + samples.col(i) = hmc.x.getCoefficients(); + } + stop = std::chrono::high_resolution_clock::now(); + std::cout << "proportion of sucussfull steps: " << hmc.get_ratio_acceptance() << std::endl; + } + else + { + NutsHamiltonianMonteCarloWalk::Walk + hmc(NULL, x0, F, f, hmc_params, automatic_burnin); + + hmc.burnin(rng); + std::cout << "eta: " << hmc.get_eta_solver() << std::endl; + + start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < n_samples; i++) { + hmc.apply(rng); + samples.col(i) = hmc.x.getCoefficients(); + } + stop = std::chrono::high_resolution_clock::now(); + std::cout << "proportion of sucussfull steps: " << hmc.get_ratio_acceptance() << std::endl; + } + + std::cout << "PSRF: " << univariate_psrf(samples).maxCoeff() << std::endl; + + ETA = (long) std::chrono::duration_cast(stop - start).count(); + std::cout<< "time: " << ETA << "\n" << std::endl; + } + +} + template void test_hmc() { typedef Cartesian Kernel; @@ -878,6 +954,11 @@ void call_test_benchmark_hmc(bool truncated) { benchmark_hmc(truncated); } +template +void call_test_benchmark_nuts_hmc(bool truncated) { + benchmark_nuts_hmc(truncated); +} + template void call_test_benchmark_polytopes_grid_search() { typedef Cartesian Kernel; @@ -1196,6 +1277,14 @@ TEST_CASE("exponential_biomass_sampling") { call_test_exp_sampling(); } +TEST_CASE("benchmark_nuts_hmc_truncated") { + call_test_benchmark_nuts_hmc(true); +} + +TEST_CASE("benchmark_nuts_hmc") { + call_test_benchmark_nuts_hmc(false); +} + TEST_CASE("benchmark_hmc") { call_test_benchmark_hmc(false); }