From ac6f6aa7ebdc1de656a8547d67fbf18c50e7490d Mon Sep 17 00:00:00 2001 From: zoj <44142765+zoj613@users.noreply.github.com> Date: Sat, 27 Mar 2021 15:43:04 +0200 Subject: [PATCH] Add Polyagamma distribution --- pymc3/distributions/continuous.py | 161 +++++++++++++++++++++++ pymc3/tests/test_distributions_random.py | 37 ++++++ 2 files changed, 198 insertions(+) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index d66ceb60e80..2f3d6cc0cc1 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -24,6 +24,7 @@ from aesara.assert_op import Assert from aesara.tensor.random.basic import ( BetaRV, + RandomVariable, cauchy, exponential, gamma, @@ -33,6 +34,21 @@ normal, uniform, ) + +try: + from polyagamma import polyagamma_logcdf, polyagamma_logpdf, random_polyagamma +except ImportError: # pragma: no cover + + def random_polyagamma(*args, **kwargs): + raise RuntimeError("polyagamma package is not installed!") + + def polyagamma_logpdf(*args, **kwargs): + raise RuntimeError("polyagamma package is not installed!") + + def polyagamma_logcdf(*args, **kwargs): + raise RuntimeError("polyagamma package is not installed!") + + from scipy import stats from scipy.interpolate import InterpolatedUnivariateSpline @@ -89,6 +105,7 @@ "Rice", "Moyal", "AsymmetricLaplace", + "PolyaGamma", ] @@ -4265,3 +4282,147 @@ def logcdf(self, value): aet.log(aet.erfc(aet.exp(-scaled / 2) * (2 ** -0.5))), 0 < sigma, ) + + +class PolyaGammaRV(RandomVariable): + """Polya-Gamma random variable.""" + + name = "polyagamma" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "floatX" + _print_name = ("PG", "\\operatorname{PG}") + + def __call__(self, h=1, z=0, size=None, **kwargs): + return super().__call__(h, z, size=size, **kwargs) + + @classmethod + def rng_fn(cls, rng, h, z, size=None): + """ + Generate a random sample from the distribution with the given parameters + Parameters + ---------- + h : scalar or sequence, optional + The shape parameter of the distribution. + z : scalar or sequence, optional + The exponential tilting parameter. + size : int or tuple of ints, optional + The number of elements to draw from the distribution. If size is + ``None`` (default) then a single value is returned. If a tuple of + integers is passed, the returned array will have the same shape. + If the element(s) of size is not an integer type, it will be truncated + to the largest integer smaller than its value (e.g (2.1, 1) -> (2, 1)). + This parameter only applies if `h` and `z` are scalars. + """ + # handle the kind of rng passed to the sampler + bg = rng._bit_generator if isinstance(rng, np.random.RandomState) else rng + return random_polyagamma(h, z, size=size, random_state=bg) + + +polyagamma = PolyaGammaRV() + + +class PolyaGamma(PositiveContinuous): + r""" + The Polya-Gamma distribution. + The distribution is parametrized by ``h`` (shape parameter) and ``z`` + (exponential tilting parameter). The pdf of this distribution is + .. math:: + f(x \mid h, z) = cosh^h(\frac{z}{2})e^{-\frac{1}{2}xz^2}f(x \mid h, 0), + where :math:`f(x \mid h, 0)` is the pdf of a :math:`PG(h, 0)` variable. + Notice that the pdf of this distribution is expressed as an alternating-sign + sum of inverse-Gaussian densities. + .. math:: + X = \Sigma_{k=1}^{\infty}\frac{Ga(h, 1)}{d_k}, + where :math:`d_k = 2(k - 0.5)^2\pi^2 + z^2/2`, :math:`Ga(h, 1)` is a gamma + random variable with shape parameter ``h`` and scale parameter ``1``. + .. plot:: + import matplotlib.pyplot as plt + import numpy as np + from polyagamma import polyagamma_pdf + plt.style.use('seaborn-darkgrid') + x = np.linspace(0.01, 5, 500);x.sort() + hs = [1., 5., 10., 15.] + zs = [0.] * 4 + for h, z in zip(hs, zs): + pdf = polyagamma_pdf(x, h=h, z=z) + plt.plot(x, pdf, label=r'$h$ = {}, $z$ = {}'.format(h, z)) + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + ======== ============================= + Support :math:`x \in (0, \infty)` + Mean :math:`dfrac{h}{4} if :math:`z=0`, :math:`\dfrac{tanh(z/2)h}{2z}` otherwise. + Variance :math:`0.041666688h` if :math:`z=0`, :math:`\dfrac{h(sinh(z) - z)(1 - tanh^2(z/2))}{4z^3}` otherwise. + ======== ============================= + Parameters + ---------- + h: float, optional + The shape parameter of the distribution (h > 0). + z: float, optional + The exponential tilting parameter of the distribution. + Examples + -------- + .. code-block:: python + rng = np.random.default_rng() + with pm.Model(): + x = pm.PolyaGamma('x', h=1, z=5.5) + with pm.Model(): + x = pm.PolyaGamma('x', h=25, z=-2.3, rng=rng, size=(100, 5)) + References + ---------- + .. [1] Polson, Nicholas G., James G. Scott, and Jesse Windle. + "Bayesian inference for logistic models using Pólya–Gamma latent + variables." Journal of the American statistical Association + 108.504 (2013): 1339-1349. + .. [2] Windle, Jesse, Nicholas G. Polson, and James G. Scott. + "Sampling Polya-Gamma random variates: alternate and approximate + techniques." arXiv preprint arXiv:1405.0506 (2014) + .. [3] Luc Devroye. "On exact simulation algorithms for some distributions + related to Jacobi theta functions." Statistics & Probability Letters, + Volume 79, Issue 21, (2009): 2251-2259. + """ + rv_op = polyagamma + + @classmethod + def dist(cls, h=1.0, z=0.0, rng=None, size=None, **kwargs): + # really not sure whether h & z are required to be tensors? The + # distribution uses numpy arrays or scalars for parameter values. + hh = aet.as_tensor_variable(h) + zz = aet.as_tensor_variable(z) + + msg = f"The variable {hh} specified for PolyaGamma has non-positive " + msg += "values, making it unsuitable for this parameter." + Assert(msg)(hh, aet.all(aet.gt(hh, 0.00001))) + + return super().dist([h, z], size=size, rng=rng, **kwargs) + + def logp(value, h, z): + """ + Calculate log-probability of Normal distribution at specified value. + Parameters + ---------- + value: numeric + Value(s) for which log-probability is calculated. If the log probabilities for multiple + values are desired the values must be provided in a numpy array. + Returns + ------- + TensorVariable + """ + return bound(polyagamma_logpdf(value, h, z), h > 0) + + def logcdf(value, h, z): + """ + Compute the log of the cumulative distribution function for Normal distribution + at the specified value. + Parameters + ---------- + value: numeric or np.ndarray or `TensorVariable` + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array. + Returns + ------- + TensorVariable + """ + return bound(polyagamma_logcdf(value, h, z), h > 0) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 19fdb6369bb..f8803ec8c9e 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -28,6 +28,12 @@ import pymc3 as pm +try: + pm.PolyaGamma.rv_op.rng_fn() + polyagamma_not_installed = False +except RuntimeError: # pragma: no cover + polyagamma_not_installed = True + from pymc3.aesaraf import floatX, intX from pymc3.distributions import change_rv_size from pymc3.distributions.dist_math import clipped_beta_rvs @@ -518,6 +524,37 @@ def test_probability_vector_shape(self): assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 3, 7) +@pytest.mark.skipif(polyagamma_not_installed) +class PolyaGamma(BaseTestCases.BaseTestCase): + rng = np.random.default_rng(SeededTest.random_seed) + distribution = pm.PolyaGamma + params = {"h": 1, "z": 0.0} + + @staticmethod + def sample_random_variable(this, size): + """ Draws samples from a RandomVariable using its .rng_fn method. """ + if size is None: + return this.distribution.rv_op.rng_fn(cls.rng) + else: + return this.distribution.rv_op.rng_fn(cls.rng, size=size) + + @pytest.mark.parametrize( + "shape, size", + [ + ((2), (1)), + ((2), (2)), + ((2, 2), (2, 100)), + ((3, 4), (3, 4)), + ((3, 4), (3, 4, 100)), + ((3, 4), (100)), + ((3, 4), (1)), + ], + ) + def test_polyagamma_random_shape(self, shape, size): + out_shape = to_tuple(size) + to_tuple(shape) + assert self.distribution.rv_op.rng_fn(cls.rng, size=size).shape == out_shape + + @pytest.mark.skip(reason="This test is covered by Aesara") class TestDirichlet(SeededTest): @pytest.mark.parametrize(