Add gptoolsStan package.

onnela-lab · Dec 15, 2023 · 8a5efab · 8a5efab
1 parent 600db68
commit 8a5efab
Show file tree

Hide file tree

Showing 15 changed files with 1,397 additions and 0 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,5 @@
+^gptoolsStan\.Rproj$
+^R-package\.Rproj$
+^\.Rproj\.user$
+^vignettes/getting_started$
+Makefile
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,26 @@
+name: gptoolsStan R package
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+  workflow_dispatch:
+
+env:
+  cmdstanVersion: "2.31.0"
+
+jobs:
+  build:
+    name: Build
+    runs-on: "ubuntu-latest"
+    steps:
+      - uses: "actions/checkout@v2"
+      - uses: "r-lib/actions/setup-r@v2"
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: |
+            any::rcmdcheck
+            cmdstanr=url::https://mc-stan.org/r-packages/src/contrib/cmdstanr_0.7.0.tar.gz
+          needs: check
+      - uses: r-lib/actions/check-r-package@v2
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+.Rproj.user
+*.Rproj
+inst/doc
+vignettes/getting_started
+*.html
+check
+.vscode
+build
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,23 @@
+Package: gptoolsStan
+Title: Gaussian Processes on Graphs and Lattices in Stan
+Version: 0.1.0
+Authors@R: c(
+    person("Till", "Hoffmann", , "[email protected]", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0003-4403-0722")),
+    person("Jukka-Pekka", "Onnela", , "[email protected]", role = c("ctb"),
+           comment = c(ORCID = "0000-0001-6613-8668")))
+Description: Gaussian processes are flexible distributions to model functional data. Whilst
+    theoretically appealing, they are computationally cumbersome except for small datasets.
+    This package implements two methods for scaling GP inference in Stan. First, a sparse
+    approximation of the likelihood that is generally applicable and, second, an exact method for
+    regularly spaced data modeled by stationary kernels using fast Fourier methods.
+License: BSD
+Encoding: UTF-8
+Language: en-US
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
+Suggests:
+    knitr,
+    rmarkdown,
+    cmdstanr
+VignetteBuilder: knitr
diff --git a/Makefile b/Makefile
@@ -0,0 +1,15 @@
+.PHONY : build check man vignettes
+
+vignettes/getting_started.html : vignettes/getting_started.Rmd vignettes/getting_started.stan
+	Rscript -e "devtools::build_rmd('$<')"
+
+man :
+	Rscript -e 'devtools::document()'
+
+build :
+	mkdir -p build
+	cd build && R CMD build ..
+
+check : build
+	mkdir -p check
+	cd check && R CMD check ../build/*.tar.gz
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,3 @@
+# Generated by roxygen2: do not edit by hand
+
+export(gptools_include_path)
diff --git a/R/model.R b/R/model.R
@@ -0,0 +1,6 @@
+#' Get the gptools include path for compiling Stan programs with `cmdstanr`.
+#'
+#' @export
+gptools_include_path <- function() {
+    system.file("extdata", package="gptoolsStan")
+}
diff --git a/inst/extdata/gptools/fft.stan b/inst/extdata/gptools/fft.stan
@@ -0,0 +1,2 @@
+#include gptools/fft1.stan
+#include gptools/fft2.stan
diff --git a/inst/extdata/gptools/fft1.stan b/inst/extdata/gptools/fft1.stan
@@ -0,0 +1,173 @@
+// IMPORTANT: stan uses the questionable R indexing which is one-based and inclusive on both ends.
+// I.e., x[1:3] includes x[1] to x[3]. More generally, x[i:j] comprises j - i + 1 elements. It could
+// at least have been exclusive on the right...
+
+/**
+Evaluate the scale of Fourier coefficients.
+
+:param cov_rfft: Precomputed real fast Fourier transform of the kernel with shape
+    :code:`(..., n %/% 2 + 1)`.
+:param n: Size of the real signal. Necessary because the size cannot be inferred from
+    :code:`cov_rfft`.
+:returns: Scale of Fourier coefficients with shape :code:`(..., n %/% 2 + 1)`.
+*/
+vector gp_evaluate_rfft_scale(vector cov_rfft, int n) {
+    int nrfft = n %/% 2 + 1;
+    vector[nrfft] result = n * cov_rfft / 2;
+    // Check positive-definiteness.
+    real minval = min(result);
+    if (minval < 0) {
+        reject("covariance matrix is not positive-definite (minimum eigenvalue is ", minval, ")");
+    }
+    // The first element has larger scale because it only has a real part but must still have the
+    // right variance. The same applies to the last element if the number of elements is even
+    // (Nyqvist frequency).
+    result[1] *= 2;
+    if (n % 2 == 0) {
+        result[nrfft] *= 2;
+    }
+    return sqrt(result);
+}
+
+
+/**
+Unpack the Fourier coefficients of a real Fourier transform with :code:`n %/% 2 + 1` elements to a
+vector of :code:`n` elements.
+
+:param z: Real Fourier transform coefficients.
+:param n: Size of the real signal. Necessary because the size cannot be inferred from :code:`rfft`.
+
+:returns: Unpacked vector of :code:`n` elements comprising the :code:`n %/% 2 + 1` real parts of the
+    zero frequency term, complex terms, and Nyqvist frequency term (for even :code:`n`). The
+    subsequent :code:`(n - 1) %/% 2` elements are the imaginary parts of complex coefficients.
+*/
+vector gp_unpack_rfft(complex_vector x, int n) {
+    vector[n] z;
+    int ncomplex = (n - 1) %/% 2;
+    int nrfft = n %/% 2 + 1;
+    z[1:nrfft] = get_real(x);
+    z[1 + nrfft:n] = get_imag(x[2:1 + ncomplex]);
+    return z;
+}
+
+
+/**
+Transform a Gaussian process realization to white noise in the Fourier domain.
+
+:param y: Realization of the Gaussian process with shape :code:`(..., n)`.
+:param loc: Mean of the Gaussian process with shape :code:`(..., n)`.
+:param cov_rfft: Precomputed real fast Fourier transform of the kernel with shape
+    :code:`(..., n // 2 + 1)`.
+:returns: Fourier-domain white noise with shape :code:`(..., n)`. See :stan:func:`gp_unpack_rfft`
+    for details on the data structure.
+*/
+vector gp_rfft(vector y, vector loc, vector cov_rfft) {
+    return gp_unpack_rfft(rfft(y - loc) ./ gp_evaluate_rfft_scale(cov_rfft, size(y)), size(y));
+}
+
+
+/**
+Evaluate the log absolute determinant of the Jacobian associated with
+:stan:func:`gp_rfft`.
+
+:param cov_rfft: Precomputed real fast Fourier transform of the kernel with shape
+    :code:`(..., n %/% 2 + 1)`.
+:param n: Size of the real signal. Necessary because the size cannot be inferred from :code:`rfft`.
+:returns: Log absolute determinant of the Jacobian.
+*/
+real gp_rfft_log_abs_det_jacobian(vector cov_rfft, int n) {
+    vector[n %/% 2 + 1] rfft_scale = gp_evaluate_rfft_scale(cov_rfft, n);
+    return - sum(log(rfft_scale[1:n %/% 2 + 1])) -sum(log(rfft_scale[2:(n + 1) %/% 2]))
+        - log(2) * ((n - 1) %/% 2) + n * log(n) / 2;
+}
+
+
+/**
+Evaluate the log probability of a one-dimensional Gaussian process realization in Fourier space.
+
+:param y: Realization of a Gaussian process with :code:`n` grid points.
+:param loc: Mean of the Gaussian process of size :code:`n`.
+:param cov_rfft: Precomputed real fast Fourier transform of the kernel of size :code:`n %/% 2 + 1`.
+:returns: Log probability of the Gaussian process realization.
+*/
+real gp_rfft_lpdf(vector y, vector loc, vector cov_rfft) {
+    int n = size(y);
+    int nrfft = n %/% 2 + 1;
+    vector[n] z = gp_rfft(y, loc, cov_rfft);
+    return std_normal_lpdf(z) + gp_rfft_log_abs_det_jacobian(cov_rfft, n);
+}
+
+
+/**
+Transform a real vector with :code:`n` elements to a vector of complex Fourier coefficients with
+:code:`n` elements ready for inverse real fast Fourier transformation.
+*/
+complex_vector gp_pack_rfft(vector z) {
+    int n = size(z);  // Number of observations.
+    int ncomplex = (n - 1) %/% 2;  // Number of complex Fourier coefficients.
+    int nrfft = n %/% 2 + 1;  // Number of elements in the real FFT.
+    int neg_offset = (n + 1) %/% 2;  // Offset at which the negative frequencies start.
+    // Zero frequency, real part of positive frequency coefficients, and Nyqvist frequency.
+    complex_vector[nrfft] fft = z[1:nrfft];
+    // Imaginary part of positive frequency coefficients.
+    fft[2:ncomplex + 1] += 1.0i * z[nrfft + 1:n];
+    return fft;
+}
+
+
+/**
+Transform white noise in the Fourier domain to a Gaussian process realization, i.e., a
+*non-centered* parameterization in the Fourier domain.
+
+The :code:`n` real white noise variables must be assembled into a length-:code:`n %/% 2 + 1` complex
+vector with structure expected by the fast Fourier transform. The input vector :code:`z` comprises
+
+- the real zero frequency term,
+- :code:`(n - 1) %/% 2` real parts of positive frequency terms,
+- the real Nyqvist frequency term if :code:`n` is even,
+- and :code:`(n - 1) %/% 2` imaginary parts of positive frequency terms.
+
+:param z: Fourier-domain white noise comprising :code:`n` elements.
+:param loc: Mean of the Gaussian process.
+:param cov_rfft: Real fast Fourier transform of the covariance kernel.
+
+:returns: Realization of the Gaussian process with :code:`n` elements.
+*/
+vector gp_inv_rfft(vector z, vector loc, vector cov_rfft) {
+    int n = size(z);
+    vector[n %/% 2 + 1] rfft_scale = gp_evaluate_rfft_scale(cov_rfft, n);
+    return get_real(inv_rfft(rfft_scale .* gp_pack_rfft(z), n)) + loc;
+}
+
+/**
+Evaluate the real fast Fourier transform of the periodic squared exponential kernel.
+
+:param n: Number of grid points.
+:param sigma: Scale of the covariance.
+:param length_scale: Correlation length.
+:param period: Period for circular boundary conditions.
+:returns: Fourier transform of the squared exponential kernel of size :code:`n %/% 2 + 1`.
+*/
+vector gp_periodic_exp_quad_cov_rfft(int n, real sigma, real length_scale, real period) {
+    int nrfft = n %/% 2 + 1;
+    return n * sigma ^ 2 * length_scale / period * sqrt(2 * pi())
+        * exp(-2 * (pi() * linspaced_vector(nrfft, 0, nrfft - 1) * length_scale / period) ^ 2);
+}
+
+/**
+Evaluate the real fast Fourier transform of the periodic Matern kernel.
+
+:param nu: Smoothness parameter (1 / 2, 3 / 2, and 5 / 2 are typical values).
+:param n: Number of grid points.
+:param sigma: Scale of the covariance.
+:param length_scale: Correlation length.
+:param period: Period for circular boundary conditions.
+:returns: Fourier transform of the squared exponential kernel of size :code:`n %/% 2 + 1`.
+*/
+vector gp_periodic_matern_cov_rfft(real nu, int n, real sigma, real length_scale, real period) {
+    int nrfft = n %/% 2 + 1;
+    vector[nrfft] k = linspaced_vector(nrfft, 0, nrfft - 1);
+    return sigma ^ 2 * n * sqrt(2 * pi() / nu) * tgamma(nu + 0.5) / tgamma(nu)
+        * (1 + 2 / nu * (pi() * length_scale / period * k) ^ 2) ^ -(nu + 0.5) * length_scale
+        / period;
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Generated by roxygen2: do not edit by hand

		export(gptools_include_path)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#include gptools/fft1.stan
		#include gptools/fft2.stan