Skip to content

Commit

Permalink
Add Polyagamma distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
zoj613 committed Mar 27, 2021
1 parent af23c2f commit ac6f6aa
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 0 deletions.
161 changes: 161 additions & 0 deletions pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from aesara.assert_op import Assert
from aesara.tensor.random.basic import (
BetaRV,
RandomVariable,
cauchy,
exponential,
gamma,
Expand All @@ -33,6 +34,21 @@
normal,
uniform,
)

try:
from polyagamma import polyagamma_logcdf, polyagamma_logpdf, random_polyagamma
except ImportError: # pragma: no cover

def random_polyagamma(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")

def polyagamma_logpdf(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")

def polyagamma_logcdf(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")


from scipy import stats
from scipy.interpolate import InterpolatedUnivariateSpline

Expand Down Expand Up @@ -89,6 +105,7 @@
"Rice",
"Moyal",
"AsymmetricLaplace",
"PolyaGamma",
]


Expand Down Expand Up @@ -4265,3 +4282,147 @@ def logcdf(self, value):
aet.log(aet.erfc(aet.exp(-scaled / 2) * (2 ** -0.5))),
0 < sigma,
)


class PolyaGammaRV(RandomVariable):
"""Polya-Gamma random variable."""

name = "polyagamma"
ndim_supp = 0
ndims_params = [0, 0]
dtype = "floatX"
_print_name = ("PG", "\\operatorname{PG}")

def __call__(self, h=1, z=0, size=None, **kwargs):
return super().__call__(h, z, size=size, **kwargs)

@classmethod
def rng_fn(cls, rng, h, z, size=None):
"""
Generate a random sample from the distribution with the given parameters
Parameters
----------
h : scalar or sequence, optional
The shape parameter of the distribution.
z : scalar or sequence, optional
The exponential tilting parameter.
size : int or tuple of ints, optional
The number of elements to draw from the distribution. If size is
``None`` (default) then a single value is returned. If a tuple of
integers is passed, the returned array will have the same shape.
If the element(s) of size is not an integer type, it will be truncated
to the largest integer smaller than its value (e.g (2.1, 1) -> (2, 1)).
This parameter only applies if `h` and `z` are scalars.
"""
# handle the kind of rng passed to the sampler
bg = rng._bit_generator if isinstance(rng, np.random.RandomState) else rng
return random_polyagamma(h, z, size=size, random_state=bg)


polyagamma = PolyaGammaRV()


class PolyaGamma(PositiveContinuous):
r"""
The Polya-Gamma distribution.
The distribution is parametrized by ``h`` (shape parameter) and ``z``
(exponential tilting parameter). The pdf of this distribution is
.. math::
f(x \mid h, z) = cosh^h(\frac{z}{2})e^{-\frac{1}{2}xz^2}f(x \mid h, 0),
where :math:`f(x \mid h, 0)` is the pdf of a :math:`PG(h, 0)` variable.
Notice that the pdf of this distribution is expressed as an alternating-sign
sum of inverse-Gaussian densities.
.. math::
X = \Sigma_{k=1}^{\infty}\frac{Ga(h, 1)}{d_k},
where :math:`d_k = 2(k - 0.5)^2\pi^2 + z^2/2`, :math:`Ga(h, 1)` is a gamma
random variable with shape parameter ``h`` and scale parameter ``1``.
.. plot::
import matplotlib.pyplot as plt
import numpy as np
from polyagamma import polyagamma_pdf
plt.style.use('seaborn-darkgrid')
x = np.linspace(0.01, 5, 500);x.sort()
hs = [1., 5., 10., 15.]
zs = [0.] * 4
for h, z in zip(hs, zs):
pdf = polyagamma_pdf(x, h=h, z=z)
plt.plot(x, pdf, label=r'$h$ = {}, $z$ = {}'.format(h, z))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()
======== =============================
Support :math:`x \in (0, \infty)`
Mean :math:`dfrac{h}{4} if :math:`z=0`, :math:`\dfrac{tanh(z/2)h}{2z}` otherwise.
Variance :math:`0.041666688h` if :math:`z=0`, :math:`\dfrac{h(sinh(z) - z)(1 - tanh^2(z/2))}{4z^3}` otherwise.
======== =============================
Parameters
----------
h: float, optional
The shape parameter of the distribution (h > 0).
z: float, optional
The exponential tilting parameter of the distribution.
Examples
--------
.. code-block:: python
rng = np.random.default_rng()
with pm.Model():
x = pm.PolyaGamma('x', h=1, z=5.5)
with pm.Model():
x = pm.PolyaGamma('x', h=25, z=-2.3, rng=rng, size=(100, 5))
References
----------
.. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
"Bayesian inference for logistic models using Pólya–Gamma latent
variables." Journal of the American statistical Association
108.504 (2013): 1339-1349.
.. [2] Windle, Jesse, Nicholas G. Polson, and James G. Scott.
"Sampling Polya-Gamma random variates: alternate and approximate
techniques." arXiv preprint arXiv:1405.0506 (2014)
.. [3] Luc Devroye. "On exact simulation algorithms for some distributions
related to Jacobi theta functions." Statistics & Probability Letters,
Volume 79, Issue 21, (2009): 2251-2259.
"""
rv_op = polyagamma

@classmethod
def dist(cls, h=1.0, z=0.0, rng=None, size=None, **kwargs):
# really not sure whether h & z are required to be tensors? The
# distribution uses numpy arrays or scalars for parameter values.
hh = aet.as_tensor_variable(h)
zz = aet.as_tensor_variable(z)

msg = f"The variable {hh} specified for PolyaGamma has non-positive "
msg += "values, making it unsuitable for this parameter."
Assert(msg)(hh, aet.all(aet.gt(hh, 0.00001)))

return super().dist([h, z], size=size, rng=rng, **kwargs)

def logp(value, h, z):
"""
Calculate log-probability of Normal distribution at specified value.
Parameters
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
values are desired the values must be provided in a numpy array.
Returns
-------
TensorVariable
"""
return bound(polyagamma_logpdf(value, h, z), h > 0)

def logcdf(value, h, z):
"""
Compute the log of the cumulative distribution function for Normal distribution
at the specified value.
Parameters
----------
value: numeric or np.ndarray or `TensorVariable`
Value(s) for which log CDF is calculated. If the log CDF for multiple
values are desired the values must be provided in a numpy array.
Returns
-------
TensorVariable
"""
return bound(polyagamma_logcdf(value, h, z), h > 0)
37 changes: 37 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@

import pymc3 as pm

try:
pm.PolyaGamma.rv_op.rng_fn()
polyagamma_not_installed = False
except RuntimeError: # pragma: no cover
polyagamma_not_installed = True

from pymc3.aesaraf import floatX, intX
from pymc3.distributions import change_rv_size
from pymc3.distributions.dist_math import clipped_beta_rvs
Expand Down Expand Up @@ -518,6 +524,37 @@ def test_probability_vector_shape(self):
assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 3, 7)


@pytest.mark.skipif(polyagamma_not_installed)
class PolyaGamma(BaseTestCases.BaseTestCase):
rng = np.random.default_rng(SeededTest.random_seed)
distribution = pm.PolyaGamma
params = {"h": 1, "z": 0.0}

@staticmethod
def sample_random_variable(this, size):
""" Draws samples from a RandomVariable using its .rng_fn method. """
if size is None:
return this.distribution.rv_op.rng_fn(cls.rng)
else:
return this.distribution.rv_op.rng_fn(cls.rng, size=size)

@pytest.mark.parametrize(
"shape, size",
[
((2), (1)),
((2), (2)),
((2, 2), (2, 100)),
((3, 4), (3, 4)),
((3, 4), (3, 4, 100)),
((3, 4), (100)),
((3, 4), (1)),
],
)
def test_polyagamma_random_shape(self, shape, size):
out_shape = to_tuple(size) + to_tuple(shape)
assert self.distribution.rv_op.rng_fn(cls.rng, size=size).shape == out_shape


@pytest.mark.skip(reason="This test is covered by Aesara")
class TestDirichlet(SeededTest):
@pytest.mark.parametrize(
Expand Down

0 comments on commit ac6f6aa

Please sign in to comment.