Skip to content

Commit

Permalink
ENH: Add functions to calculate the pdf and cdf
Browse files Browse the repository at this point in the history
  • Loading branch information
zoj613 committed Mar 22, 2021
1 parent 2af0edb commit 7b6669c
Show file tree
Hide file tree
Showing 7 changed files with 371 additions and 3 deletions.
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"src/pgm_devroye.c",
"src/pgm_common.c",
"src/pgm_saddle.c",
"src/pgm_density.c",
]


Expand Down
18 changes: 18 additions & 0 deletions include/pgm_density.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef PGM_DENSITY_H
#define PGM_DENSITY_H
#include <stddef.h>

/*
* Approximate the density function of PG(h, z).
*
* The value is calculated with accuracy of up `terms` terms. The calculate
* will terminate early if successive terms are very small such that the
* current series value is equal to the previous value with given tolerance.
*/
double
pgm_polyagamma_pdf(double x, double h, double z, size_t terms, double rtol);

double
pgm_polyagamma_cdf(double x, double h, double z, size_t terms, double rtol);

#endif
2 changes: 1 addition & 1 deletion polyagamma/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from _polyagamma import polyagamma
from _polyagamma import polyagamma, polyagamma_pdf, polyagamma_cdf

__version__ = '1.2.0'

Expand Down
164 changes: 164 additions & 0 deletions polyagamma/_polyagamma.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,167 @@ def polyagamma(h=1, z=0, *, size=None, double[:] out=None, method=None,
return
else:
return arr


cdef extern from "pgm_density.h":
double pgm_polyagamma_pdf(double x, double h, double z, size_t terms,
double rtol) nogil
double pgm_polyagamma_cdf(double x, double h, double z, size_t terms,
double rtol) nogil


ctypedef double (*dist_func)(double x, double h, double z, size_t terms,
double rtol) nogil


cdef object dispatcher(dist_func f, object x, object h, object z, size_t terms,
double rtol):
cdef double cx, ch, cz

if is_a_number(x) and is_a_number(h) and is_a_number(z):
cx, ch, cz = x, h, z
with nogil:
cx = f(cx, ch, cz, terms, rtol)
return cx

x = np.PyArray_FROM_OT(x, np.NPY_DOUBLE)
h = np.PyArray_FROM_OT(h, np.NPY_DOUBLE)
z = np.PyArray_FROM_OT(z, np.NPY_DOUBLE)
cdef np.broadcast bcast = np.PyArray_MultiIterNew3(x, h, z)
arr = np.PyArray_EMPTY(bcast.nd, bcast.dimensions, np.NPY_DOUBLE, 0)
cdef double* arr_ptr = <double*>np.PyArray_DATA(arr)

with nogil:
while bcast.index < bcast.size:
cx = (<double*>np.PyArray_MultiIter_DATA(bcast, 0))[0]
ch = (<double*>np.PyArray_MultiIter_DATA(bcast, 1))[0]
cz = (<double*>np.PyArray_MultiIter_DATA(bcast, 2))[0]
arr_ptr[bcast.index] = f(cx, ch, cz, terms, rtol)
np.PyArray_MultiIter_NEXT(bcast)

return arr


def polyagamma_pdf(x, *, h=1, z=0, size_t terms=200, double rtol=1e-12):
"""
polyagamma_pdf(x, *, h=1, z=0, terms=200, rtol=1e-12)
Calculate the density of PG(h, z) at `x`.
Parameters
----------
h : scalar or sequence, optional
The shape parameter of the distribution as described in [1]_.
The value(s) must be positive and finite. Defaults to 1.
z : scalar or sequence, optional
The exponential tilting parameter as described in [1]_. The value(s)
must be finite. Defaults to 0.
terms : int, optional
The maximum number of terms used in the alternating infinite series
to approximate the density of the distribution. Defaults to 200.
rtol : float, optional
The relative tolerance used to test for convergence of the infinite
series used to approximate the distribution's density function.
Defaults to 1e-12.
Returns
-------
out : numpy.ndarray or scalar
Value of the density function at `x` of PG(`h`, `z`).
Notes
-----
This function implements the density function as shown in page 6 of [1]_.
The infinite sum is truncated at `terms` terms. Convergence of the series
is tested after each term is calculated so that if the successive terms
are too small to be significant, the calculation is terminated early.
References
----------
.. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
"Bayesian inference for logistic models using Pólya–Gamma latent
variables." Journal of the American statistical Association
108.504 (2013): 1339-1349.
Examples
--------
>>> from polyagamma import polyagamma_pdf
>>> import numpy as np
>>> polyagamma_pdf(0.2)
2.339176537265802
>>> polyagamma_pdf(3, h=6, z=1)
0.012537773310576542
>>> x = np.linspace(0.01, 0.5, 5)
>>> polyagamma_pdf(x)
array([1.48671951e-03, 3.21504108e+00, 1.78492273e+00, 9.75298427e-01,
5.32845353e-01])
>>> polyagamma_pdf(0.75, h=[4, 3, 1])
array([1.08247188, 1.1022302 , 0.15517146])
"""
return dispatcher(pgm_polyagamma_pdf, x, h, z, terms, rtol)


def polyagamma_cdf(x, *, h=1, z=0, size_t terms=200, double rtol=1e-12):
"""
polyagamma_cdf(x, *, h=1, z=0, terms=200, rtol=1e-12)
Calculate the cumulative distribution function of PG(h, z) at `x`.
Parameters
----------
h : scalar or sequence, optional
The shape parameter of the distribution as described in [1]_.
The value(s) must be positive and finite. Defaults to 1.
z : scalar or sequence, optional
The exponential tilting parameter as described in [1]_. The value(s)
must be finite. Defaults to 0.
terms : int, optional
The maximum number of terms used in the alternating infinite series
to approximate the density of the distribution. Defaults to 200.
rtol : float, optional
The relative tolerance used to test for convergence of the infinite
series used to approximate the distribution's density function.
Defaults to 1e-12.
Returns
-------
out : numpy.ndarray or scalar
Value of the density function at `x` of PG(`h`, `z`).
Notes
-----
This function implements the integration of the density function as shown
in page 6 of [1]_.
The infinite sum is truncated at `terms` terms. Convergence of the series
is tested after each term is calculated so that if the successive terms
are too small to be significant, the calculation is terminated early.
References
----------
.. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
"Bayesian inference for logistic models using Pólya–Gamma latent
variables." Journal of the American statistical Association
108.504 (2013): 1339-1349.
.. [2] Wikipedia contributors. "Romberg's method." Wikipedia, The Free
Encyclopedia. Wikipedia, The Free Encyclopedia, 26 Feb. 2021.
Web. 21 Mar. 2021.
Examples
--------
>>> from polyagamma import polyagamma_cdf
>>> import numpy as np
>>> polyagamma_cdf(0.2)
0.525512539620251
>>> polyagamma_cdf(3, h=6, z=1)
0.9966435676448027
>>> x = np.linspace(0.01, 1, 5)
>>> polyagamma_cdf(x)
array([1.14660629e-06, 6.42692997e-01, 8.94654582e-01, 9.68941234e-01,
9.90843010e-01])
>>> polyagamma_cdf(0.75, h=[4, 3, 1])
array([0.30130807, 0.57523169, 0.96855569])
"""
return dispatcher(pgm_polyagamma_cdf, x, h, z, terms, rtol)
157 changes: 157 additions & 0 deletions src/pgm_density.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* Copyright (c) 2021, Zolisa Bleki
*
* SPDX-License-Identifier: BSD-3-Clause */
#include "pgm_common.h"
#include "../include/pgm_density.h"

#define PGM_LOG2 0.6931471805599453 // log(2)
#define PGM_2PI 6.283185307179586

DECLDIR bool
is_close(double a, double b, double atol, double rtol);

/*
* Approximate the density function of PG(h, z).
*
* The value is calculated with accuracy of up `terms` terms. The calculate
* will terminate early if successive terms are very small such that the
* current series value is equal to the previous value with given tolerance.
*/
double
pgm_polyagamma_pdf(double x, double h, double z, size_t terms, double rtol)
{
if (x <= 0) {
return 0;
}

double sum = 0;
double a = (fabs(z) > 0 ? h * log(cosh(0.5 * z)) - 0.5 * z * z * x : 0) +
(h - 1) * PGM_LOG2 - pgm_lgamma(h);

for (size_t n = 0; n < terms; n++) {
double twonh = 2 * n + h;
double term = exp(pgm_lgamma(n + h) - pgm_lgamma(n + 1) -
0.125 * twonh * twonh / x) * twonh;
double prev_sum = sum;
sum += n & 1 ? -term: term;
if (is_close(sum, prev_sum, 0, rtol)) {
break;
}
}
return exp(a) * sum / sqrt(PGM_2PI * x * x * x);
}

/*
* A struct for passing around arguments of the integrand for the Romberg method
*/
struct integrand_args {
// (2 * n + h)^2
double twonh2;
// 0.5 * z^2
double halfz2;
};

/*
* The function to integrate.
* f(x) = exp(-0.125 * (2n + h)^2 / x - 0.5 * z^2) / sqrt(2 * pi * x^3)
*/
static NPY_INLINE double
integrand(double x, struct integrand_args* arg)
{
return exp(-0.125 * arg->twonh2 / x - arg->halfz2 * x) / sqrt(PGM_2PI * x * x * x);
}


static NPY_INLINE void
swap_pointers(double** a, double** b)
{
static double* temp;
temp = *a;
*a = *b;
*b = temp;
}

#ifndef PGM_ROMBERG_ITERS
#define PGM_ROMBERG_ITERS 15
#endif
/*
* Approximate the integral of the function `integrand` using Romberg's method.
*
* The approximation is accurate up to the given tolerance.
*/
static NPY_INLINE double
romberg(double b, struct integrand_args* args, double rtol)
{
static double R[PGM_ROMBERG_ITERS][PGM_ROMBERG_ITERS];
static double* R1 = R[0];
static double* R2 = R[1];
static double a = 1e-03;
double h = b - a;

R1[0] = 0.5 * h * (integrand(a, args) + integrand(b, args));

for (size_t i = 1; i < PGM_ROMBERG_ITERS; i++) {
h *= 0.5;
double sum = 0;
for (size_t j = 1; j <= 1 << (i - 1); j++) {
sum += integrand(a + (2 * j - 1) * h, args);
}

R2[0] = 0.5 * R1[0] + h * sum;
for (size_t j = 1; j <= i; j++) {
R2[j] = R2[j - 1] + (R2[j - 1] - R1[j - 1]) / ((1 << (2 * j)) - 1);
}

if (i > 1 && is_close(R2[i], R1[i - 1], 0, rtol)) {
return R2[i - 1];
}
swap_pointers(&R1, &R2);
}
return R1[PGM_ROMBERG_ITERS - 1];
}

/*
* Appromixate the comulative distribution function of PG(h, z) at x.
*
* Romberg's method is used to approximate the integral of
*
* f(x) = exp(-0.125 * (2n + h)^2 / x - 0.5 * z^2) / sqrt(2 * pi * x^3)
*
* The infinite sum is truncated at `terms` terms. Convergence
* of the series is tested after each term is calculated so that if the
* successive terms are too small to be significant, the calculation is
* terminated early.
*
* References
* ----------
* [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
* "Bayesian inference for logistic models using Pólya–Gamma latent
* variables." Journal of the American statistical Association
* 108.504 (2013): 1339-1349.
*/
double
pgm_polyagamma_cdf(double x, double h, double z, size_t terms, double rtol)
{
if (x <= 0) {
return 0;
}

double sum = 0;
double a = (fabs(z) > 0 ? h * log(cosh(0.5 * z)) : 0) +
(h - 1) * PGM_LOG2 - pgm_lgamma(h);

struct integrand_args args = {.halfz2 = 0.5 * z * z};

for (size_t n = 0; n < terms; n++) {
double twonh = 2 * n + h;
args.twonh2 = twonh * twonh;
double term = exp(pgm_lgamma(n + h) - pgm_lgamma(n + 1)) * twonh *
romberg(x, &args, rtol);
double prev_sum = sum;
sum += n & 1 ? -term : term;
if (is_close(sum, prev_sum, 0, rtol)) {
break;
}
}
return exp(a) * sum;
}
2 changes: 1 addition & 1 deletion src/pgm_saddle.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ select_starting_guess(double x)
*
* `atol` is the minimum absolute tolerance – useful for comparisons near zero.
*/
static NPY_INLINE bool
DECLDIR NPY_INLINE bool
is_close(double a, double b, double atol, double rtol)
{
return fabs(a - b) <= MAX(rtol * MAX(fabs(a), fabs(b)), atol);
Expand Down
Loading

0 comments on commit 7b6669c

Please sign in to comment.