Skip to content

Commit

Permalink
Add GeneralizedPoisson distribution
Browse files Browse the repository at this point in the history
Co-authored-by: Luciano Paz <[email protected]>
  • Loading branch information
ricardoV94 and lucianopaz committed Apr 28, 2023
1 parent 1fcb0d1 commit 1fab291
Show file tree
Hide file tree
Showing 4 changed files with 300 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Distributions
:toctree: generated/

GenExtreme
GeneralizedPoisson
histogram_utils.histogram_approximation
DiscreteMarkovChain

Expand Down
7 changes: 6 additions & 1 deletion pymc_experimental/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
"""

from pymc_experimental.distributions.continuous import GenExtreme
from pymc_experimental.distributions.discrete import GeneralizedPoisson
from pymc_experimental.distributions.timeseries import DiscreteMarkovChain

__all__ = ["GenExtreme", "DiscreteMarkovChain"]
__all__ = [
"DiscreteMarkovChain",
"GeneralizedPoisson",
"GenExtreme",
]
173 changes: 173 additions & 0 deletions pymc_experimental/distributions/discrete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Copyright 2023 The PyMC Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pymc as pm
from pymc.distributions.dist_math import check_parameters, factln, logpow
from pymc.distributions.shape_utils import rv_size_is_none
from pytensor import tensor as pt
from pytensor.tensor.random.op import RandomVariable


class GeneralizedPoissonRV(RandomVariable):
name = "generalized_poisson"
ndim_supp = 0
ndims_params = [0, 0]
dtype = "int64"
_print_name = ("GeneralizedPoisson", "\\operatorname{GeneralizedPoisson}")

@classmethod
def rng_fn(cls, rng, theta, lam, size):
theta = np.asarray(theta)
lam = np.asarray(lam)

if size is not None:
dist_size = size
else:
dist_size = np.broadcast_shapes(theta.shape, lam.shape)

# A mix of 2 algorithms described by Famoye (1997) is used depending on parameter values
# 0: Inverse method, computed on the log scale. Used when lam <= 0.
# 1: Branching method. Used when lambda > 0.
x = np.empty(dist_size)
idxs_mask = np.broadcast_to(lam < 0, dist_size)
if np.any(idxs_mask):
x[idxs_mask] = cls._inverse_rng_fn(rng, theta, lam, dist_size, idxs_mask=idxs_mask)[
idxs_mask
]
idxs_mask = ~idxs_mask
if np.any(idxs_mask):
x[idxs_mask] = cls._branching_rng_fn(rng, theta, lam, dist_size, idxs_mask=idxs_mask)[
idxs_mask
]
return x

@classmethod
def _inverse_rng_fn(cls, rng, theta, lam, dist_size, idxs_mask):
# We handle x/0 and log(0) issues with branching
with np.errstate(divide="ignore", invalid="ignore"):
log_u = np.log(rng.uniform(size=dist_size))
pos_lam = lam > 0
abs_log_lam = np.log(np.abs(lam))
theta_m_lam = theta - lam
log_s = -theta
log_p = log_s.copy()
x_ = 0
x = np.zeros(dist_size)
below_cutpoint = log_s < log_u
while np.any(below_cutpoint[idxs_mask]):
x_ += 1
x[below_cutpoint] += 1
log_c = np.log(theta_m_lam + lam * x_)
# Compute log(1 + lam / C)
log1p_lam_m_C = np.where(
pos_lam,
np.log1p(np.exp(abs_log_lam - log_c)),
pm.math.log1mexp_numpy(abs_log_lam - log_c, negative_input=True),
)
log_p = log_c + log1p_lam_m_C * (x_ - 1) + log_p - np.log(x_) - lam
log_s = np.logaddexp(log_s, log_p)
below_cutpoint = log_s < log_u
return x

@classmethod
def _branching_rng_fn(cls, rng, theta, lam, dist_size, idxs_mask):
lam_ = np.abs(lam) # This algorithm is only valid for positive lam
y = rng.poisson(theta, size=dist_size)
x = y.copy()
higher_than_zero = y > 0
while np.any(higher_than_zero[idxs_mask]):
y = rng.poisson(lam_ * y)
x[higher_than_zero] = x[higher_than_zero] + y[higher_than_zero]
higher_than_zero = y > 0
return x


generalized_poisson = GeneralizedPoissonRV()


class GeneralizedPoisson(pm.distributions.Discrete):
R"""
Generalized Poisson.
Used to model count data that can be either overdispersed or underdispersed.
Offers greater flexibility than the standard Poisson which assumes equidispersion,
where the mean is equal to the variance.
The pmf of this distribution is
.. math:: f(x \mid \mu, \lambda) =
\frac{\mu (\mu + \lambda x)^{x-1} e^{-\mu - \lambda x}}{x!}
======== ======================================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\frac{\mu}{1 - \lambda}`
Variance :math:`\frac{\mu}{(1 - \lambda)^3}`
======== ======================================
Parameters
----------
mu : tensor_like of float
Mean parameter (mu > 0).
lam : tensor_like of float
Dispersion parameter (max(-1, -mu/4) <= lam <= 1).
Notes
-----
When lam = 0, the Generalized Poisson reduces to the standard Poisson with the same mu.
When lam < 0, the mean is greater than the variance (underdispersion).
When lam > 0, the mean is less than the variance (overdispersion).
References
----------
The PMF is taken from [1] and the random generator function is adapted from [2].
.. [1] Consul, PoC, and Felix Famoye. "Generalized Poisson regression model."
Communications in Statistics-Theory and Methods 21.1 (1992): 89-109.
.. [2] Famoye, Felix. "Generalized Poisson random variate generation." American
Journal of Mathematical and Management Sciences 17.3-4 (1997): 219-237.
"""

rv_op = generalized_poisson

@classmethod
def dist(cls, mu, lam, **kwargs):
mu = pt.as_tensor_variable(mu)
lam = pt.as_tensor_variable(lam)
return super().dist([mu, lam], **kwargs)

def moment(rv, size, mu, lam):
mean = pt.floor(mu / (1 - lam))
if not rv_size_is_none(size):
mean = pt.full(size, mean)
return mean

def logp(value, mu, lam):
mu_lam_value = mu + lam * value
logprob = np.log(mu) + logpow(mu_lam_value, value - 1) - mu_lam_value - factln(value)

# Probability is 0 when value > m, where m is the largest positive integer for
# which mu + m * lam > 0 (when lam < 0).
logprob = pt.switch(
pt.or_(
mu_lam_value < 0,
value < 0,
),
-np.inf,
logprob,
)

return check_parameters(
logprob,
0 < mu,
pt.abs(lam) <= 1,
(-mu / 4) <= lam,
msg="0 < mu, max(-1, -mu/4)) <= lam <= 1",
)
120 changes: 120 additions & 0 deletions pymc_experimental/tests/distributions/test_discrete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright 2023 The PyMC Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pymc as pm
import pytensor
import pytensor.tensor as pt
import pytest
import scipy.stats
from pymc.logprob.utils import ParameterValueError
from pymc.testing import (
BaseTestDistributionRandom,
Domain,
Rplus,
assert_moment_is_expected,
discrete_random_tester,
)
from pytensor import config

from pymc_experimental.distributions import GeneralizedPoisson


class TestGeneralizedPoisson:
class TestRandomVariable(BaseTestDistributionRandom):
pymc_dist = GeneralizedPoisson
pymc_dist_params = {"mu": 4.0, "lam": 1.0}
expected_rv_op_params = {"mu": 4.0, "lam": 1.0}
tests_to_run = [
"check_pymc_params_match_rv_op",
"check_rv_size",
]

def test_random_matches_poisson(self):
discrete_random_tester(
dist=self.pymc_dist,
paramdomains={"mu": Rplus, "lam": Domain([0], edges=(None, None))},
ref_rand=lambda mu, lam, size: scipy.stats.poisson.rvs(mu, size=size),
)

@pytest.mark.parametrize("mu", (2.5, 20, 50))
def test_random_lam_expected_moments(self, mu):
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
dist = self.pymc_dist.dist(mu=mu, lam=lam, size=(10_000, len(lam)))
draws = dist.eval()

expected_mean = mu / (1 - lam)
np.testing.assert_allclose(draws.mean(0), expected_mean, rtol=1e-1)

expected_std = np.sqrt(mu / (1 - lam) ** 3)
np.testing.assert_allclose(draws.std(0), expected_std, rtol=1e-1)

def test_logp_matches_poisson(self):
# We are only checking this distribution for lambda=0 where it's equivalent to Poisson.
mu = pt.scalar("mu")
lam = pt.scalar("lam")
value = pt.vector("value", dtype="int64")

logp = pm.logp(GeneralizedPoisson.dist(mu, lam), value)
logp_fn = pytensor.function([value, mu, lam], logp)

test_value = np.array([0, 1, 2, 30])
for test_mu in (0.01, 0.1, 0.9, 1, 1.5, 20, 100):
np.testing.assert_allclose(
logp_fn(test_value, test_mu, lam=0),
scipy.stats.poisson.logpmf(test_value, test_mu),
rtol=1e-7 if config.floatX == "float64" else 1e-5,
)

# Check out-of-bounds values
value = pt.scalar("value")
logp = pm.logp(GeneralizedPoisson.dist(mu, lam), value)
logp_fn = pytensor.function([value, mu, lam], logp)

logp_fn(-1, mu=5, lam=0) == -np.inf
logp_fn(9, mu=5, lam=-1) == -np.inf

# Check mu/lam restrictions
with pytest.raises(ParameterValueError):
logp_fn(1, mu=1, lam=2)

with pytest.raises(ParameterValueError):
logp_fn(1, mu=0, lam=0)

with pytest.raises(ParameterValueError):
logp_fn(1, mu=1, lam=-1)

def test_logp_lam_expected_moments(self):
mu = 30
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
with pm.Model():
x = GeneralizedPoisson("x", mu=mu, lam=lam)
trace = pm.sample(chains=1, draws=10_000, random_seed=96).posterior

expected_mean = mu / (1 - lam)
np.testing.assert_allclose(trace["x"].mean(("chain", "draw")), expected_mean, rtol=1e-1)

expected_std = np.sqrt(mu / (1 - lam) ** 3)
np.testing.assert_allclose(trace["x"].std(("chain", "draw")), expected_std, rtol=1e-1)

@pytest.mark.parametrize(
"mu, lam, size, expected",
[
(50, [-0.6, 0, 0.6], None, np.floor(50 / (1 - np.array([-0.6, 0, 0.6])))),
([5, 50], -0.1, (4, 2), np.full((4, 2), np.floor(np.array([5, 50]) / 1.1))),
],
)
def test_moment(self, mu, lam, size, expected):
with pm.Model() as model:
GeneralizedPoisson("x", mu=mu, lam=lam, size=size)
assert_moment_is_expected(model, expected)

0 comments on commit 1fab291

Please sign in to comment.