Skip to content

Commit

Permalink
Add Polya-Gamma distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
zoj613 authored and ricardoV94 committed Jul 13, 2021
1 parent 8623cd4 commit e8396bf
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ jobs:
run: |
conda activate pymc3-dev-py37
pip install -e .
pip install --pre -U polyagamma
python --version
- name: Run tests
run: |
Expand Down Expand Up @@ -211,6 +212,7 @@ jobs:
run: |
conda activate pymc3-dev-py38
pip install -e .
pip install --pre -U polyagamma
python --version
- name: Run tests
# This job uses a cmd shell, therefore the environment variable syntax is different!
Expand Down
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- Add `logcdf` method to Kumaraswamy distribution (see [#4706](https://github.com/pymc-devs/pymc3/pull/4706)).
- The `OrderedMultinomial` distribution has been added for use on ordinal data which are _aggregated_ by trial, like multinomial observations, whereas `OrderedLogistic` only accepts ordinal data in a _disaggregated_ format, like categorical
observations (see [#4773](https://github.com/pymc-devs/pymc3/pull/4773)).
- The `Polya-Gamma` distribution has been added (see [#4531](https://github.com/pymc-devs/pymc3/pull/4531)). To make use of this distribution, the [`polyagamma>=1.3.1`](https://pypi.org/project/polyagamma/) library must be installed and available in the user's environment.
- ...

### Maintenance
Expand Down
1 change: 1 addition & 0 deletions docs/source/api/distributions/continuous.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Continuous
Logistic
LogitNormal
Interpolated
PolyaGamma

.. automodule:: pymc3.distributions.continuous
:members:
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
Moyal,
Normal,
Pareto,
PolyaGamma,
Rice,
SkewNormal,
StudentT,
Expand Down Expand Up @@ -189,6 +190,7 @@
"Simulator",
"BART",
"CAR",
"PolyaGamma",
"logpt",
"logp",
"_logp",
Expand Down
218 changes: 218 additions & 0 deletions pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@

from typing import List, Optional, Tuple, Union

import aesara
import aesara.tensor as at
import numpy as np

from aesara.assert_op import Assert
from aesara.graph.basic import Apply
from aesara.graph.op import Op
from aesara.tensor import gammaln
from aesara.tensor.extra_ops import broadcast_shape
from aesara.tensor.random.basic import (
BetaRV,
WeibullRV,
Expand All @@ -47,6 +51,21 @@
)
from aesara.tensor.random.op import RandomVariable
from aesara.tensor.var import TensorConstant, TensorVariable

try:
from polyagamma import polyagamma_cdf, polyagamma_pdf, random_polyagamma
except ImportError: # pragma: no cover

def random_polyagamma(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")

def polyagamma_pdf(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")

def polyagamma_cdf(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")


from scipy import stats
from scipy.interpolate import InterpolatedUnivariateSpline
from scipy.special import expit
Expand Down Expand Up @@ -103,6 +122,7 @@
"Rice",
"Moyal",
"AsymmetricLaplace",
"PolyaGamma",
]


Expand Down Expand Up @@ -4007,3 +4027,201 @@ def logcdf(value, mu, sigma):
at.log(at.erfc(at.exp(-scaled / 2) * (2 ** -0.5))),
0 < sigma,
)


class PolyaGammaRV(RandomVariable):
"""Polya-Gamma random variable."""

name = "polyagamma"
ndim_supp = 0
ndims_params = [0, 0]
dtype = "floatX"
_print_name = ("PG", "\\operatorname{PG}")

def __call__(self, h=1.0, z=0.0, size=None, **kwargs):
return super().__call__(h, z, size=size, **kwargs)

@classmethod
def rng_fn(cls, rng, h, z, size=None):
"""
Generate a random sample from the distribution with the given parameters
Parameters
----------
rng : {None, int, array_like[ints], SeedSequence, BitGenerator, Generator}
A seed to initialize the random number generator. If None, then fresh,
unpredictable entropy will be pulled from the OS. If an ``int`` or
``array_like[ints]`` is passed, then it will be passed to
`SeedSequence` to derive the initial `BitGenerator` state. One may also
pass in a `SeedSequence` instance.
Additionally, when passed a `BitGenerator`, it will be wrapped by
`Generator`. If passed a `Generator`, it will be returned unaltered.
h : scalar or sequence
The shape parameter of the distribution.
z : scalar or sequence
The exponential tilting parameter.
size : int or tuple of ints, optional
The number of elements to draw from the distribution. If size is
``None`` (default) then a single value is returned. If a tuple of
integers is passed, the returned array will have the same shape.
If the element(s) of size is not an integer type, it will be truncated
to the largest integer smaller than its value (e.g (2.1, 1) -> (2, 1)).
This parameter only applies if `h` and `z` are scalars.
"""
# handle the kind of rng passed to the sampler
bg = rng._bit_generator if isinstance(rng, np.random.RandomState) else rng
return random_polyagamma(h, z, size=size, random_state=bg).astype(aesara.config.floatX)


polyagamma = PolyaGammaRV()


class _PolyaGammaLogDistFunc(Op):
__props__ = ("get_pdf",)

def __init__(self, get_pdf=False):
self.get_pdf = get_pdf

def make_node(self, x, h, z):
x = at.as_tensor_variable(floatX(x))
h = at.as_tensor_variable(floatX(h))
z = at.as_tensor_variable(floatX(z))
shape = broadcast_shape(x, h, z)
broadcastable = [] if not shape else [False] * len(shape)
return Apply(self, [x, h, z], [at.TensorType(aesara.config.floatX, broadcastable)()])

def perform(self, node, ins, outs):
x, h, z = ins[0], ins[1], ins[2]
outs[0][0] = (
polyagamma_pdf(x, h, z, return_log=True)
if self.get_pdf
else polyagamma_cdf(x, h, z, return_log=True)
).astype(aesara.config.floatX)


class PolyaGamma(PositiveContinuous):
r"""
The Polya-Gamma distribution.
The distribution is parametrized by ``h`` (shape parameter) and ``z``
(exponential tilting parameter). The pdf of this distribution is
.. math::
f(x \mid h, z) = cosh^h(\frac{z}{2})e^{-\frac{1}{2}xz^2}f(x \mid h, 0),
where :math:`f(x \mid h, 0)` is the pdf of a :math:`PG(h, 0)` variable.
Notice that the pdf of this distribution is expressed as an alternating-sign
sum of inverse-Gaussian densities.
.. math::
X = \Sigma_{k=1}^{\infty}\frac{Ga(h, 1)}{d_k},
where :math:`d_k = 2(k - 0.5)^2\pi^2 + z^2/2`, :math:`Ga(h, 1)` is a gamma
random variable with shape parameter ``h`` and scale parameter ``1``.
.. plot::
import matplotlib.pyplot as plt
import numpy as np
from polyagamma import polyagamma_pdf
plt.style.use('seaborn-darkgrid')
x = np.linspace(0.01, 5, 500);x.sort()
hs = [1., 5., 10., 15.]
zs = [0.] * 4
for h, z in zip(hs, zs):
pdf = polyagamma_pdf(x, h=h, z=z)
plt.plot(x, pdf, label=r'$h$ = {}, $z$ = {}'.format(h, z))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()
======== =============================
Support :math:`x \in (0, \infty)`
Mean :math:`dfrac{h}{4} if :math:`z=0`, :math:`\dfrac{tanh(z/2)h}{2z}` otherwise.
Variance :math:`0.041666688h` if :math:`z=0`, :math:`\dfrac{h(sinh(z) - z)(1 - tanh^2(z/2))}{4z^3}` otherwise.
======== =============================
Parameters
----------
h: float, optional
The shape parameter of the distribution (h > 0).
z: float, optional
The exponential tilting parameter of the distribution.
Examples
--------
.. code-block:: python
rng = np.random.default_rng()
with pm.Model():
x = pm.PolyaGamma('x', h=1, z=5.5)
with pm.Model():
x = pm.PolyaGamma('x', h=25, z=-2.3, rng=rng, size=(100, 5))
References
----------
.. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle.
"Bayesian inference for logistic models using Pólya–Gamma latent
variables." Journal of the American statistical Association
108.504 (2013): 1339-1349.
.. [2] Windle, Jesse, Nicholas G. Polson, and James G. Scott.
"Sampling Polya-Gamma random variates: alternate and approximate
techniques." arXiv preprint arXiv:1405.0506 (2014)
.. [3] Luc Devroye. "On exact simulation algorithms for some distributions
related to Jacobi theta functions." Statistics & Probability Letters,
Volume 79, Issue 21, (2009): 2251-2259.
.. [4] Windle, J. (2013). Forecasting high-dimensional, time-varying
variance-covariance matrices with high-frequency data and sampling
Pólya-Gamma random variates for posterior distributions derived
from logistic likelihoods.(PhD thesis). Retrieved from
http://hdl.handle.net/2152/21842
"""
rv_op = polyagamma

@classmethod
def dist(cls, h=1.0, z=0.0, **kwargs):
h = at.as_tensor_variable(floatX(h))
z = at.as_tensor_variable(floatX(z))

msg = f"The variable {h} specified for PolyaGamma has non-positive "
msg += "values, making it unsuitable for this parameter."
Assert(msg)(h, at.all(at.gt(h, 0.0)))

return super().dist([h, z], **kwargs)

def logp(value, h, z):
"""
Calculate log-probability of Polya-Gamma distribution at specified value.
Parameters
----------
value: numeric
Value(s) for which log-probability is calculated. If the log
probabilities for multiple values are desired the values must be
provided in a numpy array.
Returns
-------
TensorVariable
"""

return bound(_PolyaGammaLogDistFunc(True)(value, h, z), h > 0, value > 0)

def logcdf(value, h, z):
"""
Compute the log of the cumulative distribution function for the
Polya-Gamma distribution at the specified value.
Parameters
----------
value: numeric or np.ndarray or `TensorVariable`
Value(s) for which log CDF is calculated. If the log CDF for multiple
values are desired the values must be provided in a numpy array.
Returns
-------
TensorVariable
"""
return bound(_PolyaGammaLogDistFunc(False)(value, h, z), h > 0, value > 0)
36 changes: 36 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@
import aesara.tensor as at
import numpy as np
import numpy.random as nr

try:
from polyagamma import polyagamma_cdf, polyagamma_pdf

_polyagamma_not_installed = False
except ImportError: # pragma: no cover

_polyagamma_not_installed = True

def polyagamma_pdf(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")

def polyagamma_cdf(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")


import pytest
import scipy.stats
import scipy.stats.distributions as sp
Expand Down Expand Up @@ -954,6 +970,26 @@ def test_bound_normal(self):
x = PositiveNormal("x", mu=0, sigma=1, transform=None)
assert np.isinf(logp(x, -1).eval())

@pytest.mark.skipif(
condition=_polyagamma_not_installed,
reason="`polyagamma package is not available/installed.",
)
def test_polyagamma(self):
self.check_logp(
pm.PolyaGamma,
Rplus,
{"h": Rplus, "z": R},
lambda value, h, z: polyagamma_pdf(value, h, z, return_log=True),
decimal=select_by_precision(float64=6, float32=-1),
)
self.check_logcdf(
pm.PolyaGamma,
Rplus,
{"h": Rplus, "z": R},
lambda value, h, z: polyagamma_cdf(value, h, z, return_log=True),
decimal=select_by_precision(float64=6, float32=-1),
)

def test_discrete_unif(self):
self.check_logp(
DiscreteUniform,
Expand Down
35 changes: 35 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@
import scipy.stats as st

from numpy.testing import assert_almost_equal, assert_array_almost_equal

try:
from polyagamma import random_polyagamma

_polyagamma_not_installed = False
except ImportError: # pragma: no cover

_polyagamma_not_installed = True

def random_polyagamma(*args, **kwargs):
raise RuntimeError("polyagamma package is not installed!")


from scipy.special import expit

import pymc3 as pm
Expand Down Expand Up @@ -1326,6 +1339,28 @@ class TestBetaBinomial(BaseTestDistribution):
]


@pytest.mark.skipif(
condition=_polyagamma_not_installed,
reason="`polyagamma package is not available/installed.",
)
class TestPolyaGamma(BaseTestDistribution):
def polyagamma_rng_fn(self, size, h, z, rng):
return random_polyagamma(h, z, size=size, random_state=rng._bit_generator)

pymc_dist = pm.PolyaGamma
pymc_dist_params = {"h": 1.0, "z": 0.0}
expected_rv_op_params = {"h": 1.0, "z": 0.0}
reference_dist_params = {"h": 1.0, "z": 0.0}
reference_dist = lambda self: functools.partial(
self.polyagamma_rng_fn, rng=self.get_random_state()
)
tests_to_run = [
"check_pymc_params_match_rv_op",
"check_pymc_draws_match_reference",
"check_rv_size",
]


class TestDiscreteUniform(BaseTestDistribution):
def discrete_uniform_rng_fn(self, size, lower, upper, rng):
return st.randint.rvs(lower, upper + 1, size=size, random_state=rng)
Expand Down

0 comments on commit e8396bf

Please sign in to comment.