Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HyperGeometric Distribution to pymc3.distributions.discrete #4108 #4249

Merged
merged 11 commits into from
Dec 3, 2020
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ This new version of `Theano-PyMC` comes with an experimental JAX backend which,
- Change SMC metropolis kernel to independent metropolis kernel [#4115](https://github.com/pymc-devs/pymc3/pull/4115))
- Add alternative parametrization to NegativeBinomial distribution in terms of n and p (see [#4126](https://github.com/pymc-devs/pymc3/issues/4126))
- Added semantically meaningful `str` representations to PyMC3 objects for console, notebook, and GraphViz use (see [#4076](https://github.com/pymc-devs/pymc3/pull/4076), [#4065](https://github.com/pymc-devs/pymc3/pull/4065), [#4159](https://github.com/pymc-devs/pymc3/pull/4159), [#4217](https://github.com/pymc-devs/pymc3/pull/4217), and [#4243](https://github.com/pymc-devs/pymc3/pull/4243)).
- Add Discrete HyperGeometric Distribution (see [#4249](https://github.com/pymc-devs/pymc3/pull/#4249))

### Maintenance
- Switch the dependency of Theano to our own fork, [Theano-PyMC](https://github.com/pymc-devs/Theano-PyMC).
Expand Down
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from .discrete import ZeroInflatedBinomial
from .discrete import DiscreteUniform
from .discrete import Geometric
from .discrete import HyperGeometric
from .discrete import Categorical
from .discrete import OrderedLogistic

Expand Down Expand Up @@ -141,6 +142,7 @@
"ZeroInflatedBinomial",
"DiscreteUniform",
"Geometric",
"HyperGeometric",
"Categorical",
"OrderedLogistic",
"DensityDist",
Expand Down
113 changes: 113 additions & 0 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"ZeroInflatedNegativeBinomial",
"DiscreteUniform",
"Geometric",
"HyperGeometric",
"Categorical",
"OrderedLogistic",
]
Expand Down Expand Up @@ -809,6 +810,118 @@ def logp(self, value):
return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)


class HyperGeometric(Discrete):
R"""
Discrete hypergeometric distribution.

The probability of :math:`x` successes in a sequence of :math:`n` bernoulli
trials taken without replacement from a population of :math:`N` objects,
containing :math:`k` good (or successful or Type I) objects.
The pmf of this distribution is

.. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}

.. plot::

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
plt.style.use('seaborn-darkgrid')
x = np.arange(1, 15)
N = 50
k = 10
for n in [20, 25]:
pmf = st.hypergeom.pmf(x, N, k, n)
plt.plot(x, pmf, '-o', label='n = {}'.format(n))
plt.plot(x, pmf, '-o', label='N = {}'.format(N))
plt.plot(x, pmf, '-o', label='k = {}'.format(k))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()

======== =============================
Support :math:`x \in \left[\max(0, n - N + k), \min(k, n)\right]`
Mean :math:`\dfrac{nk}{N}`
Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}`
======== =============================

Parameters
----------
N : integer
Total size of the population
k : integer
Number of successful individuals in the population
n : integer
Number of samples drawn from the population
"""

def __init__(self, N, k, n, *args, **kwargs):
super().__init__(*args, **kwargs)
self.N = intX(N)
self.k = intX(k)
self.n = intX(n)
self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2)))

def random(self, point=None, size=None):
r"""
Draw random values from HyperGeometric distribution.

Parameters
----------
point : dict, optional
Dict of variable values on which random values are to be
conditioned (uses default point if not specified).
size : int, optional
Desired size of random sample (returns one sample if not
specified).

Returns
-------
array
"""

N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size)
return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size)

def _random(self, M, n, N, size=None):
r"""Wrapper around scipy stat's hypergeom.rvs"""
try:
samples = stats.hypergeom.rvs(M=M, n=n, N=N, size=size)
return samples
except ValueError:
raise ValueError("Domain error in arguments")

def logp(self, value):
r"""
Calculate log-probability of HyperGeometric distribution at specified value.

Parameters
----------
value : numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
values are desired the values must be provided in a numpy array or theano tensor

Returns
-------
TensorVariable
"""
N = self.N
k = self.k
n = self.n
tot, good = N, k
bad = tot - good
result = (
betaln(good + 1, 1)
+ betaln(bad + 1, 1)
+ betaln(tot - n + 1, n + 1)
- betaln(value + 1, good - value + 1)
- betaln(n - value + 1, bad - n + value + 1)
- betaln(tot + 1, 1)
)
return result


class DiscreteUniform(Discrete):
R"""
Discrete uniform distribution.
Expand Down
9 changes: 9 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
Rice,
Kumaraswamy,
Moyal,
HyperGeometric,
)

from ..distributions import continuous
Expand Down Expand Up @@ -790,6 +791,14 @@ def test_geometric(self):
Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p))
)

def test_hypergeometric(self):
self.pymc3_matches_scipy(
HyperGeometric,
Nat,
{"N": NatSmall, "k": NatSmall, "n": NatSmall},
lambda value, N, k, n: sp.hypergeom.logpmf(value, N, k, n),
)

def test_negative_binomial(self):
def test_fun(value, mu, alpha):
return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha))
Expand Down
21 changes: 21 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,11 @@ class TestGeometric(BaseTestCases.BaseTestCase):
params = {"p": 0.5}


class TestHyperGeometric(BaseTestCases.BaseTestCase):
distribution = pm.HyperGeometric
params = {"N": 50, "k": 25, "n": 10}


class TestMoyal(BaseTestCases.BaseTestCase):
distribution = pm.Moyal
params = {"mu": 0.0, "sigma": 1.0}
Expand Down Expand Up @@ -739,6 +744,22 @@ def ref_rand(size, alpha, mu):
def test_geometric(self):
pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric)

def test_hypergeometric(self):
def ref_rand(size, N, k, n):
return st.hypergeom.rvs(M=N, n=k, N=n, size=size)

pymc3_random_discrete(
pm.HyperGeometric,
{
"N": Domain([10, 11, 12, 13], "int64"),
"k": Domain([4, 5, 6, 7], "int64"),
"n": Domain([6, 7, 8, 9], "int64"),
},
size=500,
fails=50,
ref_rand=ref_rand,
)

def test_discrete_uniform(self):
def ref_rand(size, lower, upper):
return st.randint.rvs(lower, upper + 1, size=size)
Expand Down