ENH: Add functions to calculate the pdf and cdf

zoj613 · Mar 22, 2021 · 7b6669c · 7b6669c
1 parent 2af0edb
commit 7b6669c
Show file tree

Hide file tree

Showing 7 changed files with 371 additions and 3 deletions.
diff --git a/build.py b/build.py
@@ -12,6 +12,7 @@
     "src/pgm_devroye.c",
     "src/pgm_common.c",
     "src/pgm_saddle.c",
+    "src/pgm_density.c",
 ]
 
 

diff --git a/include/pgm_density.h b/include/pgm_density.h
@@ -0,0 +1,18 @@
+#ifndef PGM_DENSITY_H
+#define PGM_DENSITY_H
+#include <stddef.h>
+
+/*
+ * Approximate the density function of PG(h, z).
+ *
+ * The value is calculated with accuracy of up `terms` terms. The calculate
+ * will terminate early if successive terms are very small such that the
+ * current series value is equal to the previous value with given tolerance.
+ */
+double
+pgm_polyagamma_pdf(double x, double h, double z, size_t terms, double rtol);
+
+double
+pgm_polyagamma_cdf(double x, double h, double z, size_t terms, double rtol);
+
+#endif
diff --git a/polyagamma/__init__.py b/polyagamma/__init__.py
@@ -1,4 +1,4 @@
-from _polyagamma import polyagamma
+from _polyagamma import polyagamma, polyagamma_pdf, polyagamma_cdf
 
 __version__ = '1.2.0'
 

diff --git a/polyagamma/_polyagamma.pyx b/polyagamma/_polyagamma.pyx
@@ -281,3 +281,167 @@ def polyagamma(h=1, z=0, *, size=None, double[:] out=None, method=None,
             return
         else:
             return arr
+
+
+cdef extern from "pgm_density.h":
+    double pgm_polyagamma_pdf(double x, double h, double z, size_t terms,
+                              double rtol) nogil
+    double pgm_polyagamma_cdf(double x, double h, double z, size_t terms,
+                              double rtol) nogil
+
+
+ctypedef double (*dist_func)(double x, double h, double z, size_t terms,
+                             double rtol) nogil
+
+
+cdef object dispatcher(dist_func f, object x, object h, object z, size_t terms,
+                       double rtol):
+    cdef double cx, ch, cz
+
+    if is_a_number(x) and is_a_number(h) and is_a_number(z):
+        cx, ch, cz = x, h, z
+        with nogil:
+            cx = f(cx, ch, cz, terms, rtol)
+        return cx
+
+    x = np.PyArray_FROM_OT(x, np.NPY_DOUBLE)
+    h = np.PyArray_FROM_OT(h, np.NPY_DOUBLE)
+    z = np.PyArray_FROM_OT(z, np.NPY_DOUBLE)
+    cdef np.broadcast bcast = np.PyArray_MultiIterNew3(x, h, z)
+    arr = np.PyArray_EMPTY(bcast.nd, bcast.dimensions, np.NPY_DOUBLE, 0)
+    cdef double* arr_ptr = <double*>np.PyArray_DATA(arr)
+
+    with nogil:
+        while bcast.index < bcast.size:
+            cx = (<double*>np.PyArray_MultiIter_DATA(bcast, 0))[0]
+            ch = (<double*>np.PyArray_MultiIter_DATA(bcast, 1))[0]
+            cz = (<double*>np.PyArray_MultiIter_DATA(bcast, 2))[0]
+            arr_ptr[bcast.index] = f(cx, ch, cz, terms, rtol)
+            np.PyArray_MultiIter_NEXT(bcast)
+
+    return arr
+
+
+def polyagamma_pdf(x, *, h=1, z=0, size_t terms=200, double rtol=1e-12):
+    """
+    polyagamma_pdf(x, *, h=1, z=0, terms=200, rtol=1e-12)
+
+    Calculate the density of PG(h, z) at `x`.
+
+    Parameters
+    ----------
+    h : scalar or sequence, optional
+        The shape parameter of the distribution as described in [1]_.
+        The value(s) must be positive and finite. Defaults to 1.
+    z : scalar or sequence, optional
+        The exponential tilting parameter as described in [1]_. The value(s)
+        must be finite. Defaults to 0.
+    terms : int, optional
+        The maximum number of terms used in the alternating infinite series
+        to approximate the density of the distribution. Defaults to 200.
+    rtol : float, optional
+        The relative tolerance used to test for convergence of the infinite
+        series used to approximate the distribution's density function.
+        Defaults to 1e-12.
+
+    Returns
+    -------
+    out : numpy.ndarray or scalar
+        Value of the density function at `x` of PG(`h`, `z`).
+
+    Notes
+    -----
+    This function implements the density function as shown in page 6 of [1]_.
+    The infinite sum is truncated at `terms` terms. Convergence of the series
+    is tested after each term is calculated so that if the successive terms
+    are too small to be significant, the calculation is terminated early.
+
+    References
+    ----------
+    .. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
+           "Bayesian inference for logistic models using Pólya–Gamma latent
+           variables." Journal of the American statistical Association
+           108.504 (2013): 1339-1349.
+
+    Examples
+    --------
+    >>> from polyagamma import polyagamma_pdf
+    >>> import numpy as np
+    >>> polyagamma_pdf(0.2)
+    2.339176537265802
+    >>> polyagamma_pdf(3, h=6, z=1)
+    0.012537773310576542
+    >>> x = np.linspace(0.01, 0.5, 5)
+    >>> polyagamma_pdf(x)
+    array([1.48671951e-03, 3.21504108e+00, 1.78492273e+00, 9.75298427e-01,
+           5.32845353e-01])
+    >>> polyagamma_pdf(0.75, h=[4, 3, 1])
+    array([1.08247188, 1.1022302 , 0.15517146])
+
+    """
+    return dispatcher(pgm_polyagamma_pdf, x, h, z, terms, rtol)
+
+
+def polyagamma_cdf(x, *, h=1, z=0, size_t terms=200, double rtol=1e-12):
+    """
+    polyagamma_cdf(x, *, h=1, z=0, terms=200, rtol=1e-12)
+
+    Calculate the cumulative distribution function of PG(h, z) at `x`.
+
+    Parameters
+    ----------
+    h : scalar or sequence, optional
+        The shape parameter of the distribution as described in [1]_.
+        The value(s) must be positive and finite. Defaults to 1.
+    z : scalar or sequence, optional
+        The exponential tilting parameter as described in [1]_. The value(s)
+        must be finite. Defaults to 0.
+    terms : int, optional
+        The maximum number of terms used in the alternating infinite series
+        to approximate the density of the distribution. Defaults to 200.
+    rtol : float, optional
+        The relative tolerance used to test for convergence of the infinite
+        series used to approximate the distribution's density function.
+        Defaults to 1e-12.
+
+    Returns
+    -------
+    out : numpy.ndarray or scalar
+        Value of the density function at `x` of PG(`h`, `z`).
+
+    Notes
+    -----
+    This function implements the integration of the density function as shown
+    in page 6 of [1]_.
+
+    The infinite sum is truncated at `terms` terms. Convergence of the series
+    is tested after each term is calculated so that if the successive terms
+    are too small to be significant, the calculation is terminated early.
+
+    References
+    ----------
+    .. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
+           "Bayesian inference for logistic models using Pólya–Gamma latent
+           variables." Journal of the American statistical Association
+           108.504 (2013): 1339-1349.
+    .. [2] Wikipedia contributors. "Romberg's method." Wikipedia, The Free
+           Encyclopedia. Wikipedia, The Free Encyclopedia, 26 Feb. 2021.
+           Web. 21 Mar. 2021.
+
+    Examples
+    --------
+    >>> from polyagamma import polyagamma_cdf
+    >>> import numpy as np
+    >>> polyagamma_cdf(0.2)
+    0.525512539620251
+    >>> polyagamma_cdf(3, h=6, z=1)
+    0.9966435676448027
+    >>> x = np.linspace(0.01, 1, 5)
+    >>> polyagamma_cdf(x)
+    array([1.14660629e-06, 6.42692997e-01, 8.94654582e-01, 9.68941234e-01,
+           9.90843010e-01])
+    >>> polyagamma_cdf(0.75, h=[4, 3, 1])
+    array([0.30130807, 0.57523169, 0.96855569])
+
+    """
+    return dispatcher(pgm_polyagamma_cdf, x, h, z, terms, rtol)
diff --git a/src/pgm_density.c b/src/pgm_density.c
@@ -0,0 +1,157 @@
+/* Copyright (c) 2021, Zolisa Bleki
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+#include "pgm_common.h"
+#include "../include/pgm_density.h"
+
+#define PGM_LOG2 0.6931471805599453  // log(2)
+#define PGM_2PI 6.283185307179586
+
+DECLDIR bool
+is_close(double a, double b, double atol, double rtol);
+
+/*
+ * Approximate the density function of PG(h, z).
+ *
+ * The value is calculated with accuracy of up `terms` terms. The calculate
+ * will terminate early if successive terms are very small such that the
+ * current series value is equal to the previous value with given tolerance.
+ */
+double
+pgm_polyagamma_pdf(double x, double h, double z, size_t terms, double rtol)
+{
+    if (x <= 0) {
+        return 0;
+    }
+
+    double sum = 0;
+    double a = (fabs(z) > 0 ? h * log(cosh(0.5 * z)) - 0.5 * z * z * x : 0) +
+               (h - 1) * PGM_LOG2 - pgm_lgamma(h);
+
+    for (size_t n = 0; n < terms; n++) {
+        double twonh = 2 * n + h;
+        double term = exp(pgm_lgamma(n + h) - pgm_lgamma(n + 1) -
+                          0.125 * twonh * twonh / x) * twonh;
+        double prev_sum = sum;
+        sum += n & 1 ? -term: term;
+        if (is_close(sum, prev_sum, 0, rtol)) {
+            break;
+        }
+    }
+    return exp(a) * sum / sqrt(PGM_2PI * x * x * x);
+}
+
+/*
+ * A struct for passing around arguments of the integrand for the Romberg method
+ */
+struct integrand_args {
+    // (2 * n + h)^2
+    double twonh2;
+    // 0.5 * z^2
+    double halfz2;
+};
+
+/*
+ * The function to integrate.
+ * f(x) = exp(-0.125 * (2n + h)^2 / x - 0.5 * z^2) / sqrt(2 * pi * x^3)
+ */
+static NPY_INLINE double
+integrand(double x, struct integrand_args* arg)
+{
+    return exp(-0.125 * arg->twonh2 / x - arg->halfz2 * x) / sqrt(PGM_2PI * x * x * x);
+}
+
+
+static NPY_INLINE void
+swap_pointers(double** a, double** b)
+{
+    static double* temp;
+    temp = *a;
+    *a = *b;
+    *b = temp;
+}
+
+#ifndef PGM_ROMBERG_ITERS
+#define PGM_ROMBERG_ITERS 15
+#endif
+/*
+ * Approximate the integral of the function `integrand` using Romberg's method.
+ *
+ *  The approximation is accurate up to the given tolerance.
+ */
+static NPY_INLINE double
+romberg(double b, struct integrand_args* args, double rtol)
+{
+    static double R[PGM_ROMBERG_ITERS][PGM_ROMBERG_ITERS];
+    static double* R1 = R[0];
+    static double* R2 = R[1];
+    static double a = 1e-03;
+    double h = b - a;
+
+    R1[0] = 0.5 * h * (integrand(a, args) + integrand(b, args));
+
+    for (size_t i = 1; i < PGM_ROMBERG_ITERS; i++) {
+        h *= 0.5;
+        double sum = 0;
+        for (size_t j = 1; j <= 1 << (i - 1); j++) {
+            sum += integrand(a + (2 * j - 1) * h, args);
+        }
+
+        R2[0] = 0.5 * R1[0] + h * sum;
+        for (size_t j = 1; j <= i; j++) {
+            R2[j] = R2[j - 1] + (R2[j - 1] - R1[j - 1]) / ((1 << (2 * j)) - 1);
+        }
+
+        if (i > 1 && is_close(R2[i], R1[i - 1], 0, rtol)) {
+            return R2[i - 1];
+        }
+        swap_pointers(&R1, &R2);
+    }
+    return R1[PGM_ROMBERG_ITERS - 1];
+}
+
+/*
+ * Appromixate the comulative distribution function of PG(h, z) at x.
+ *
+ * Romberg's method is used to approximate the integral of
+ *
+ *      f(x) = exp(-0.125 * (2n + h)^2 / x - 0.5 * z^2) / sqrt(2 * pi * x^3)
+ *
+ * The infinite sum is truncated at `terms` terms. Convergence
+ * of the series is tested after each term is calculated so that if the
+ * successive terms are too small to be significant, the calculation is
+ * terminated early.
+ *
+ * References
+ * ----------
+ * [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
+ *     "Bayesian inference for logistic models using Pólya–Gamma latent
+ *     variables." Journal of the American statistical Association
+ *     108.504 (2013): 1339-1349.
+ */
+double
+pgm_polyagamma_cdf(double x, double h, double z, size_t terms, double rtol)
+{
+    if (x <= 0) {
+        return 0;
+    }
+
+    double sum = 0;
+    double a = (fabs(z) > 0 ? h * log(cosh(0.5 * z)) : 0) +
+               (h - 1) * PGM_LOG2 - pgm_lgamma(h);
+
+    struct integrand_args args = {.halfz2 = 0.5 * z * z};
+
+    for (size_t n = 0; n < terms; n++) {
+        double twonh = 2 * n + h;
+        args.twonh2 = twonh * twonh;
+        double term = exp(pgm_lgamma(n + h) - pgm_lgamma(n + 1)) * twonh *
+                      romberg(x, &args, rtol);
+        double prev_sum = sum;
+        sum += n & 1 ? -term : term;
+        if (is_close(sum, prev_sum, 0, rtol)) {
+            break;
+        }
+    }
+    return exp(a) * sum;
+}
diff --git a/src/pgm_saddle.c b/src/pgm_saddle.c
@@ -159,7 +159,7 @@ select_starting_guess(double x)
  *
  * `atol` is the minimum absolute tolerance – useful for comparisons near zero.
  */
-static NPY_INLINE bool
+DECLDIR NPY_INLINE bool
 is_close(double a, double b, double atol, double rtol)
 {
     return fabs(a - b) <= MAX(rtol * MAX(fabs(a), fabs(b)), atol);