From 47021d4c096cc7896379667c6534c9f09d0a141b Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Dec 2020 19:42:36 +0100
Subject: [PATCH 1/9] Implement logcdf method for discrete distributions

---
 pymc3/distributions/discrete.py   | 364 +++++++++++++++++++++++++++---
 pymc3/tests/test_distributions.py | 211 ++++++++++++++++-
 2 files changed, 532 insertions(+), 43 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index e639ba6684..786a55b27d 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -24,6 +24,7 @@
     binomln,
     bound,
     factln,
+    incomplete_beta,
     log_diff_normal_cdf,
     logpow,
     normal_lccdf,
@@ -32,7 +33,7 @@
 )
 from pymc3.distributions.distribution import Discrete, draw_values, generate_samples
 from pymc3.distributions.shape_utils import broadcast_distribution_samples
-from pymc3.math import log1pexp, logaddexp, logit, sigmoid, tround
+from pymc3.math import log1mexp, log1pexp, logaddexp, logit, logsumexp, sigmoid, tround
 from pymc3.theanof import floatX, intX, take_along_axis
 
 __all__ = [
@@ -148,6 +149,36 @@ def logp(self, value):
             p <= 1,
         )
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for Binomial distribution
+        at the specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value for which log CDF is calculated.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        # TODO: work with multiple values (see #4342)
+        n = self.n
+        p = self.p
+        value = tt.floor(value)
+
+        return tt.switch(
+            tt.lt(value, n),
+            bound(
+                tt.log(incomplete_beta(n - value, value + 1, 1 - p)),
+                n > 0,
+                0 <= p,
+                p <= 1,
+            ),
+            0,
+        )
+
 
 class BetaBinomial(Discrete):
     R"""
@@ -271,16 +302,40 @@ def logp(self, value):
         """
         alpha = self.alpha
         beta = self.beta
+        n = self.n
         return bound(
-            binomln(self.n, value)
-            + betaln(value + alpha, self.n - value + beta)
-            - betaln(alpha, beta),
+            binomln(n, value) + betaln(value + alpha, n - value + beta) - betaln(alpha, beta),
             value >= 0,
-            value <= self.n,
+            value <= n,
             alpha > 0,
             beta > 0,
         )
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for BetaBinomial distribution
+        at the specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value for which log CDF is calculated.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        # TODO: fix for multiple values
+        n = self.n
+
+        return tt.switch(
+            tt.lt(value, 0),
+            -np.inf,
+            tt.switch(
+                tt.lt(value, n), logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False), 0
+            ),
+        )
+
 
 class Bernoulli(Discrete):
     R"""Bernoulli log-likelihood
@@ -380,6 +435,31 @@ def logp(self, value):
                 tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1
             )
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for Bernoulli distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log CDF is calculated. If the log CDF for multiple
+            values are desired the values must be provided in a numpy array or theano tensor.
+        Returns
+        -------
+        TensorVariable
+        """
+        p = self.p
+
+        return tt.switch(
+            tt.lt(value, 0),
+            -np.inf,
+            tt.switch(
+                tt.lt(value, 1),
+                tt.log(1 - p),
+                0,
+            ),
+        )
+
     def _distr_parameters_for_repr(self):
         return ["p"]
 
@@ -426,36 +506,11 @@ def DiscreteWeibull(q, b, x):
     def __init__(self, q, beta, *args, **kwargs):
         super().__init__(*args, defaults=("median",), **kwargs)
 
-        self.q = q = tt.as_tensor_variable(floatX(q))
-        self.beta = beta = tt.as_tensor_variable(floatX(beta))
+        self.q = tt.as_tensor_variable(floatX(q))
+        self.beta = tt.as_tensor_variable(floatX(beta))
 
         self.median = self._ppf(0.5)
 
-    def logp(self, value):
-        r"""
-        Calculate log-probability of DiscreteWeibull distribution at specified value.
-
-        Parameters
-        ----------
-        value: numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or theano tensor
-
-        Returns
-        -------
-        TensorVariable
-        """
-        q = self.q
-        beta = self.beta
-
-        return bound(
-            tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
-            0 <= value,
-            0 < q,
-            q < 1,
-            0 < beta,
-        )
-
     def _ppf(self, p):
         r"""
         The percentile point function (the inverse of the cumulative
@@ -492,6 +547,57 @@ def random(self, point=None, size=None):
 
         return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size)
 
+    def logp(self, value):
+        r"""
+        Calculate log-probability of DiscreteWeibull distribution at specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or theano tensor
+
+        Returns
+        -------
+        TensorVariable
+        """
+        q = self.q
+        beta = self.beta
+        return bound(
+            tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
+            0 <= value,
+            0 < q,
+            q < 1,
+            0 < beta,
+        )
+
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for Discrete Weibull distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log CDF is calculated. If the log CDF for multiple
+            values are desired the values must be provided in a numpy array or theano tensor.
+        Returns
+        -------
+        TensorVariable
+        """
+        q = self.q
+        beta = self.beta
+
+        return tt.switch(
+            tt.lt(value, 0),
+            -np.inf,
+            bound(
+                tt.log(1 - tt.power(q, tt.power(value + 1, beta))),
+                0 < q,
+                q < 1,
+                0 < beta,
+            ),
+        )
+
 
 class Poisson(Discrete):
     R"""
@@ -581,6 +687,28 @@ def logp(self, value):
         # Return zero when mu and value are both zero
         return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for Poisson distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log CDF is calculated. If the log CDF for multiple
+            values are desired the values must be provided in a numpy array or theano tensor.
+        Returns
+        -------
+        TensorVariable
+        """
+        mu = self.mu
+        value = tt.floor(value)
+
+        return bound(
+            tt.log(tt.gammaincc(value + 1, mu)),
+            value >= 0,
+            mu >= 0,
+        )
+
 
 class NegativeBinomial(Discrete):
     R"""
@@ -737,6 +865,32 @@ def logp(self, value):
         # Return Poisson when alpha gets very large.
         return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for NegativeBinomial distribution
+        at the specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value for which log CDF is calculated.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        # TODO: work with multiple values (see #4342)
+        # TODO: avoid `p` recomputation if distribution was defined in terms of `p`
+        alpha = self.alpha
+        p = alpha / (self.mu + alpha)
+
+        return bound(
+            tt.log(incomplete_beta(alpha, tt.floor(value) + 1, p)),
+            alpha > 0,
+            0 <= p,
+            p <= 1,
+        )
+
     def _distr_parameters_for_repr(self):
         return self._param_type
 
@@ -820,6 +974,28 @@ def logp(self, value):
         p = self.p
         return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for Geometric distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log CDF is calculated. If the log CDF for multiple
+            values are desired the values must be provided in a numpy array or theano tensor.
+        Returns
+        -------
+        TensorVariable
+        """
+        p = self.p
+
+        return bound(
+            log1mexp(-tt.log1p(-p) * value),
+            value >= 0,
+            0 <= p,
+            p <= 1,
+        )
+
 
 class HyperGeometric(Discrete):
     R"""
@@ -935,6 +1111,40 @@ def logp(self, value):
         upper = tt.switch(tt.lt(k, n), k, n)
         return bound(result, lower <= value, value <= upper)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for HyperGeometric distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value for which log CDF is calculated.
+        Returns
+        -------
+        TensorVariable
+        """
+        # TODO: fix for multiple values
+        N = self.N
+        n = self.n
+        k = self.k
+
+        return tt.switch(
+            tt.lt(value, 0),
+            -np.inf,
+            tt.switch(
+                tt.lt(value, n),
+                logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False),
+                bound(
+                    0,
+                    N > 0,
+                    k >= 0,
+                    n >= 0,
+                    k <= N,
+                    n <= N,
+                ),
+            ),
+        )
+
 
 class DiscreteUniform(Discrete):
     R"""
@@ -1025,6 +1235,24 @@ def logp(self, value):
         lower = self.lower
         return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for Discrete uniform distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log CDF is calculated. If the log CDF for multiple
+            values are desired the values must be provided in a numpy array or theano tensor.
+        Returns
+        -------
+        TensorVariable
+        """
+        upper = self.upper
+        lower = self.lower
+
+        return tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1)
+
 
 class Categorical(Discrete):
     R"""
@@ -1263,7 +1491,7 @@ class ZeroInflatedPoisson(Discrete):
     def __init__(self, psi, theta, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.theta = theta = tt.as_tensor_variable(floatX(theta))
-        self.psi = psi = tt.as_tensor_variable(floatX(psi))
+        self.psi = tt.as_tensor_variable(floatX(psi))
         self.pois = Poisson.dist(theta)
         self.mode = self.pois.mode
 
@@ -1314,6 +1542,28 @@ def logp(self, value):
 
         return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for ZeroInflatedPoisson distribution
+        at the specified value.
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log CDF is calculated. If the log CDF for multiple
+            values are desired the values must be provided in a numpy array or theano tensor.
+        Returns
+        -------
+        TensorVariable
+        """
+        psi = self.psi
+
+        return bound(
+            tt.log(1 - psi + psi * tt.exp(self.pois.logcdf(value))),
+            0 <= value,
+            0 <= psi,
+            psi <= 1,
+        )
+
 
 class ZeroInflatedBinomial(Discrete):
     R"""
@@ -1422,6 +1672,30 @@ def logp(self, value):
 
         return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for ZeroInflatedBinomial distribution
+        at the specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value for which log CDF is calculated.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        # TODO: work with multiple values (see #4342)
+        psi = self.psi
+
+        return bound(
+            tt.log(1 - psi + psi * tt.exp(self.bin.logcdf(value))),
+            0 <= value,
+            0 <= psi,
+            psi <= 1,
+        )
+
 
 class ZeroInflatedNegativeBinomial(Discrete):
     R"""
@@ -1561,6 +1835,30 @@ def logp(self, value):
 
         return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0)
 
+    def logcdf(self, value):
+        """
+        Compute the log of the cumulative distribution function for ZeroInflatedNegativeBinomial distribution
+        at the specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value for which log CDF is calculated.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        # TODO: work with multiple values (see #4342)
+        psi = self.psi
+
+        return bound(
+            tt.log(1 - psi + psi * tt.exp(self.nb.logcdf(value))),
+            0 <= value,
+            0 <= psi,
+            psi <= 1,
+        )
+
 
 class OrderedLogistic(Categorical):
     R"""
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 16ed273488..1005b63c80 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -91,7 +91,7 @@
     ZeroInflatedPoisson,
     continuous,
 )
-from pymc3.math import kronecker
+from pymc3.math import kronecker, logsumexp
 from pymc3.model import Deterministic, Model, Point
 from pymc3.tests.helpers import SeededTest, select_by_precision
 from pymc3.theanof import floatX
@@ -575,6 +575,28 @@ def check_logcdf(
                 err_msg=str(pt),
             )
 
+    def check_selfconsistency_discrete_logcdf(
+        self, distribution, domain, paramdomains, decimal=None, n_samples=100
+    ):
+        """
+        Check that logcdf of discrete distributions matches sum of logps up to value
+        """
+        domains = paramdomains.copy()
+        domains["value"] = domain
+        if decimal is None:
+            decimal = select_by_precision(float64=6, float32=3)
+        for pt in product(domains, n_samples=n_samples):
+            params = dict(pt)
+            value = params.pop("value")
+            values = np.arange(0, value + 1)
+            dist = distribution.dist(**params)
+            assert_almost_equal(
+                dist.logcdf(value).tag.test_value,
+                logsumexp(dist.logp(values), keepdims=False).tag.test_value,
+                decimal=decimal,
+                err_msg=str(pt),
+            )
+
     def check_int_to_1(self, model, value, domain, paramdomains):
         pdf = model.fastfn(exp(model.logpt))
         for pt in product(paramdomains, n_samples=10):
@@ -642,6 +664,18 @@ def test_discrete_unif(self):
             {"lower": -Rplusdunif, "upper": Rplusdunif},
             lambda value, lower, upper: sp.randint.logpmf(value, lower, upper + 1),
         )
+        self.check_logcdf(
+            DiscreteUniform,
+            Rdunif,
+            {"lower": -Rplusdunif, "upper": Rplusdunif},
+            lambda value, lower, upper: sp.randint.logcdf(value, lower, upper + 1),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            DiscreteUniform,
+            Rdunif,
+            # Using lower = Bool, as this unittest assumes the distribution domain starts at zero.
+            {"lower": Bool, "upper": Rplusdunif},
+        )
 
     def test_flat(self):
         self.pymc3_matches_scipy(Flat, Runif, {}, lambda value: 0)
@@ -801,32 +835,95 @@ def test_exponential(self):
 
     def test_geometric(self):
         self.pymc3_matches_scipy(
-            Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p))
+            Geometric,
+            Nat,
+            {"p": Unit},
+            lambda value, p: np.log(sp.geom.pmf(value, p)),
+        )
+        self.check_logcdf(
+            Geometric,
+            Nat,
+            {"p": Unit},
+            lambda value, p: sp.geom.logcdf(value, p),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            Geometric,
+            Nat,
+            {"p": Unit},
         )
 
     def test_hypergeometric(self):
         def modified_scipy_hypergeom_logpmf(value, N, k, n):
+            # Convert nan to -np.inf
             original_res = sp.hypergeom.logpmf(value, N, k, n)
             return original_res if not np.isnan(original_res) else -np.inf
 
+        def modified_scipy_hypergeom_logcdf(value, N, k, n):
+            # Convert nan to -np.inf
+            original_res = sp.hypergeom.logcdf(value, N, k, n)
+
+            # Correct for scipy bug in logcdf method (see https://github.com/scipy/scipy/issues/13280)
+            if not np.isnan(original_res):
+                pmfs = sp.hypergeom.logpmf(np.arange(value + 1), N, k, n)
+                if np.all(np.isnan(pmfs)):
+                    original_res = np.nan
+
+            return original_res if not np.isnan(original_res) else -np.inf
+
         self.pymc3_matches_scipy(
             HyperGeometric,
             Nat,
             {"N": NatSmall, "k": NatSmall, "n": NatSmall},
-            lambda value, N, k, n: modified_scipy_hypergeom_logpmf(value, N, k, n),
+            modified_scipy_hypergeom_logpmf,
+        )
+        self.check_logcdf(
+            HyperGeometric,
+            Nat,
+            {"N": NatSmall, "k": NatSmall, "n": NatSmall},
+            modified_scipy_hypergeom_logcdf,
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            HyperGeometric,
+            Nat,
+            {"N": NatSmall, "k": NatSmall, "n": NatSmall},
         )
 
     def test_negative_binomial(self):
-        def test_fun(value, mu, alpha):
+        def scipy_mu_alpha_logpmf(value, mu, alpha):
             return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha))
 
-        self.pymc3_matches_scipy(NegativeBinomial, Nat, {"mu": Rplus, "alpha": Rplus}, test_fun)
+        def scipy_mu_alpha_logcdf(value, mu, alpha):
+            return sp.nbinom.logcdf(value, alpha, 1 - mu / (mu + alpha))
+
+        self.pymc3_matches_scipy(
+            NegativeBinomial,
+            Nat,
+            {"mu": Rplus, "alpha": Rplus},
+            scipy_mu_alpha_logpmf,
+        )
         self.pymc3_matches_scipy(
             NegativeBinomial,
             Nat,
             {"p": Unit, "n": Rplus},
             lambda value, p, n: sp.nbinom.logpmf(value, n, p),
         )
+        self.check_logcdf(
+            NegativeBinomial,
+            Nat,
+            {"mu": Rplus, "alpha": Rplus},
+            scipy_mu_alpha_logcdf,
+        )
+        self.check_logcdf(
+            NegativeBinomial,
+            Nat,
+            {"p": Unit, "n": Rplus},
+            lambda value, p, n: sp.nbinom.logcdf(value, n, p),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            NegativeBinomial,
+            Nat,
+            {"mu": Rplus, "alpha": Rplus},
+        )
 
     @pytest.mark.parametrize(
         "mu, p, alpha, n, expected",
@@ -1024,11 +1121,43 @@ def test_binomial(self):
             {"n": NatSmall, "p": Unit},
             lambda value, n, p: sp.binom.logpmf(value, n, p),
         )
+        self.check_logcdf(
+            Binomial,
+            Nat,
+            {"n": NatSmall, "p": Unit},
+            lambda value, n, p: sp.binom.logcdf(value, n, p),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            Binomial,
+            Nat,
+            {"n": NatSmall, "p": Unit},
+        )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
     @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
     def test_beta_binomial(self):
-        self.checkd(BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall})
+        self.checkd(
+            BetaBinomial,
+            Nat,
+            {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
+        )
+        self.pymc3_matches_scipy(
+            BetaBinomial,
+            Nat,
+            {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
+            lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n),
+        )
+        self.check_logcdf(
+            BetaBinomial,
+            Nat,
+            {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
+            lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            BetaBinomial,
+            Nat,
+            {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
+        )
 
     def test_bernoulli(self):
         self.pymc3_matches_scipy(
@@ -1038,7 +1167,27 @@ def test_bernoulli(self):
             lambda value, logit_p: sp.bernoulli.logpmf(value, scipy.special.expit(logit_p)),
         )
         self.pymc3_matches_scipy(
-            Bernoulli, Bool, {"p": Unit}, lambda value, p: sp.bernoulli.logpmf(value, p)
+            Bernoulli,
+            Bool,
+            {"p": Unit},
+            lambda value, p: sp.bernoulli.logpmf(value, p),
+        )
+        self.check_logcdf(
+            Bernoulli,
+            Bool,
+            {"p": Unit},
+            lambda value, p: sp.bernoulli.logcdf(value, p),
+        )
+        self.check_logcdf(
+            Bernoulli,
+            Bool,
+            {"logit_p": R},
+            lambda value, logit_p: sp.bernoulli.logcdf(value, scipy.special.expit(logit_p)),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            Bernoulli,
+            Bool,
+            {"p": Unit},
         )
 
     def test_discrete_weibull(self):
@@ -1048,10 +1197,29 @@ def test_discrete_weibull(self):
             {"q": Unit, "beta": Rplusdunif},
             discrete_weibull_logpmf,
         )
+        self.check_selfconsistency_discrete_logcdf(
+            DiscreteWeibull,
+            Nat,
+            {"q": Unit, "beta": Rplusdunif},
+        )
 
     def test_poisson(self):
         self.pymc3_matches_scipy(
-            Poisson, Nat, {"mu": Rplus}, lambda value, mu: sp.poisson.logpmf(value, mu)
+            Poisson,
+            Nat,
+            {"mu": Rplus},
+            lambda value, mu: sp.poisson.logpmf(value, mu),
+        )
+        self.check_logcdf(
+            Poisson,
+            Nat,
+            {"mu": Rplus},
+            lambda value, mu: sp.poisson.logcdf(value, mu),
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            Poisson,
+            Nat,
+            {"mu": Rplus},
         )
 
     def test_bound_poisson(self):
@@ -1073,7 +1241,16 @@ def test_constantdist(self):
     # Too lazy to propagate decimal parameter through the whole chain of deps
     @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
     def test_zeroinflatedpoisson(self):
-        self.checkd(ZeroInflatedPoisson, Nat, {"theta": Rplus, "psi": Unit})
+        self.checkd(
+            ZeroInflatedPoisson,
+            Nat,
+            {"theta": Rplus, "psi": Unit},
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            ZeroInflatedPoisson,
+            Nat,
+            {"theta": Rplus, "psi": Unit},
+        )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
     @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
@@ -1083,11 +1260,25 @@ def test_zeroinflatednegativebinomial(self):
             Nat,
             {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit},
         )
+        self.check_selfconsistency_discrete_logcdf(
+            ZeroInflatedNegativeBinomial,
+            Nat,
+            {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit},
+        )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
     @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
     def test_zeroinflatedbinomial(self):
-        self.checkd(ZeroInflatedBinomial, Nat, {"n": NatSmall, "p": Unit, "psi": Unit})
+        self.checkd(
+            ZeroInflatedBinomial,
+            Nat,
+            {"n": NatSmall, "p": Unit, "psi": Unit},
+        )
+        self.check_selfconsistency_discrete_logcdf(
+            ZeroInflatedBinomial,
+            Nat,
+            {"n": NatSmall, "p": Unit, "psi": Unit},
+        )
 
     @pytest.mark.parametrize("n", [1, 2, 3])
     def test_mvnormal(self, n):

From 0583d8eb21e38abc92c545f78b6f558750d15efa Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 28 Dec 2020 12:27:02 +0100
Subject: [PATCH 2/9] Add release note

---
 RELEASE-NOTES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 2cbff44fc3..f755ec145f 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -19,6 +19,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
 - Option to set `check_bounds=False` when instantiating `pymc3.Model()`. This turns off bounds checks that ensure that input parameters of distributions are valid. For correctly specified models, this is unneccessary as all parameters get automatically transformed so that all values are valid. Turning this off should lead to faster sampling (see [#4377](https://github.com/pymc-devs/pymc3/pull/4377)).
 - `OrderedProbit` distribution added (see [#4232](https://github.com/pymc-devs/pymc3/pull/4232)).
 - `plot_posterior_predictive_glm` now works with `arviz.InferenceData` as well (see [#4234](https://github.com/pymc-devs/pymc3/pull/4234))
+- Add `logcdf` method to all univariate discrete distributions (see [#4387](https://github.com/pymc-devs/pymc3/pull/4387)).
 
 ### Maintenance
 - Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)

From a09d93bd9d20c422ccb342a267a201edd6cf2c28 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 28 Dec 2020 13:30:50 +0100
Subject: [PATCH 3/9] Small reformatting

---
 pymc3/distributions/discrete.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 786a55b27d..77e10a1a8c 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -332,7 +332,9 @@ def logcdf(self, value):
             tt.lt(value, 0),
             -np.inf,
             tt.switch(
-                tt.lt(value, n), logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False), 0
+                tt.lt(value, n),
+                logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False),
+                0,
             ),
         )
 

From 831afbaf8ebf1356cd19bea1b811efc925f3936b Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 28 Dec 2020 14:12:42 +0100
Subject: [PATCH 4/9] Add more comprehensive bound check for impossible
 parameters

---
 pymc3/distributions/discrete.py | 83 +++++++++++++++++----------------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 77e10a1a8c..5e09e8fd3a 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -168,15 +168,16 @@ def logcdf(self, value):
         p = self.p
         value = tt.floor(value)
 
-        return tt.switch(
-            tt.lt(value, n),
-            bound(
+        return bound(
+            tt.switch(
+                tt.lt(value, n),
                 tt.log(incomplete_beta(n - value, value + 1, 1 - p)),
-                n > 0,
-                0 <= p,
-                p <= 1,
+                0,
             ),
-            0,
+            0 <= value,
+            0 < n,
+            0 <= p,
+            p <= 1,
         )
 
 
@@ -326,16 +327,19 @@ def logcdf(self, value):
         TensorVariable
         """
         # TODO: fix for multiple values
+        alpha = self.alpha
+        beta = self.beta
         n = self.n
 
-        return tt.switch(
-            tt.lt(value, 0),
-            -np.inf,
+        return bound(
             tt.switch(
                 tt.lt(value, n),
                 logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False),
                 0,
             ),
+            0 <= value,
+            0 < alpha,
+            0 < beta,
         )
 
 
@@ -452,14 +456,15 @@ def logcdf(self, value):
         """
         p = self.p
 
-        return tt.switch(
-            tt.lt(value, 0),
-            -np.inf,
+        return bound(
             tt.switch(
                 tt.lt(value, 1),
-                tt.log(1 - p),
+                tt.log1p(-p),
                 0,
             ),
+            0 <= value,
+            0 <= p,
+            p <= 1,
         )
 
     def _distr_parameters_for_repr(self):
@@ -589,15 +594,12 @@ def logcdf(self, value):
         q = self.q
         beta = self.beta
 
-        return tt.switch(
-            tt.lt(value, 0),
-            -np.inf,
-            bound(
-                tt.log(1 - tt.power(q, tt.power(value + 1, beta))),
-                0 < q,
-                q < 1,
-                0 < beta,
-            ),
+        return bound(
+            tt.log(1 - tt.power(q, tt.power(value + 1, beta))),
+            0 <= value,
+            0 < q,
+            q < 1,
+            0 < beta,
         )
 
 
@@ -707,8 +709,8 @@ def logcdf(self, value):
 
         return bound(
             tt.log(tt.gammaincc(value + 1, mu)),
-            value >= 0,
-            mu >= 0,
+            0 <= value,
+            0 <= mu,
         )
 
 
@@ -888,7 +890,8 @@ def logcdf(self, value):
 
         return bound(
             tt.log(incomplete_beta(alpha, tt.floor(value) + 1, p)),
-            alpha > 0,
+            0 <= value,
+            0 < alpha,
             0 <= p,
             p <= 1,
         )
@@ -993,7 +996,7 @@ def logcdf(self, value):
 
         return bound(
             log1mexp(-tt.log1p(-p) * value),
-            value >= 0,
+            0 <= value,
             0 <= p,
             p <= 1,
         )
@@ -1126,25 +1129,23 @@ def logcdf(self, value):
         TensorVariable
         """
         # TODO: fix for multiple values
+        # TODO: Use lower upper in locgdf for smarter logsumexp?
         N = self.N
         n = self.n
         k = self.k
 
-        return tt.switch(
-            tt.lt(value, 0),
-            -np.inf,
+        return bound(
             tt.switch(
                 tt.lt(value, n),
                 logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False),
-                bound(
-                    0,
-                    N > 0,
-                    k >= 0,
-                    n >= 0,
-                    k <= N,
-                    n <= N,
-                ),
+                0,
             ),
+            0 <= value,
+            0 < N,
+            0 <= k,
+            0 <= n,
+            k <= N,
+            n <= N,
         )
 
 
@@ -1253,7 +1254,11 @@ def logcdf(self, value):
         upper = self.upper
         lower = self.lower
 
-        return tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1)
+        return tt.switch(
+            tt.lt(upper, lower),
+            -np.inf,
+            tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1),
+        )
 
 
 class Categorical(Discrete):

From 450a6f78d4a2b62d5eddef66aa54ebc00bc0aa26 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 28 Dec 2020 16:23:34 +0100
Subject: [PATCH 5/9] Fix bounds for logcdf of DiscreteUniform

---
 pymc3/distributions/discrete.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 5e09e8fd3a..c5809464ae 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -1254,10 +1254,10 @@ def logcdf(self, value):
         upper = self.upper
         lower = self.lower
 
-        return tt.switch(
-            tt.lt(upper, lower),
-            -np.inf,
+        return bound(
             tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1),
+            lower <= value,
+            lower <= upper,
         )
 
 

From 2c3c2a0ce3a8d35355f4513b4e1846ab4e997c84 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 29 Dec 2020 13:22:13 +0100
Subject: [PATCH 6/9] Add safe values/ parameters for `BetaBinomial`,
 `Poisson`, and `HyperGeometric` to avoid errors when evaluating negative
 logcdfs.

---
 pymc3/distributions/discrete.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index c5809464ae..705e98e99b 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -330,11 +330,12 @@ def logcdf(self, value):
         alpha = self.alpha
         beta = self.beta
         n = self.n
+        safe_lower = tt.switch(tt.lt(value, 0), value, 0)
 
         return bound(
             tt.switch(
                 tt.lt(value, n),
-                logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False),
+                logsumexp(self.logp(tt.arange(safe_lower, value + 1)), keepdims=False),
                 0,
             ),
             0 <= value,
@@ -706,9 +707,12 @@ def logcdf(self, value):
         """
         mu = self.mu
         value = tt.floor(value)
+        # To avoid issue with #4340
+        safe_mu = tt.switch(tt.lt(mu, 0), 0, mu)
+        safe_value = tt.switch(tt.lt(value, 0), 0, value)
 
         return bound(
-            tt.log(tt.gammaincc(value + 1, mu)),
+            tt.log(tt.gammaincc(safe_value + 1, safe_mu)),
             0 <= value,
             0 <= mu,
         )
@@ -1133,11 +1137,12 @@ def logcdf(self, value):
         N = self.N
         n = self.n
         k = self.k
+        safe_lower = tt.switch(tt.lt(value, 0), value, 0)
 
         return bound(
             tt.switch(
                 tt.lt(value, n),
-                logsumexp(self.logp(tt.arange(0, value + 1)), keepdims=False),
+                logsumexp(self.logp(tt.arange(safe_lower, value + 1)), keepdims=False),
                 0,
             ),
             0 <= value,

From 670a65292bc6bad424b836ec9b13d2c2218810df Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 29 Dec 2020 14:08:47 +0100
Subject: [PATCH 7/9] Change `check_selfconsistency_discrete_logcdf` to work
 with negative domains, such as the DiscreteUniform.

---
 pymc3/tests/test_distributions.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 1005b63c80..9a21e00c15 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -588,7 +588,7 @@ def check_selfconsistency_discrete_logcdf(
         for pt in product(domains, n_samples=n_samples):
             params = dict(pt)
             value = params.pop("value")
-            values = np.arange(0, value + 1)
+            values = np.arange(domain.lower, value + 1)
             dist = distribution.dist(**params)
             assert_almost_equal(
                 dist.logcdf(value).tag.test_value,
@@ -673,8 +673,7 @@ def test_discrete_unif(self):
         self.check_selfconsistency_discrete_logcdf(
             DiscreteUniform,
             Rdunif,
-            # Using lower = Bool, as this unittest assumes the distribution domain starts at zero.
-            {"lower": Bool, "upper": Rplusdunif},
+            {"lower": -Rplusdunif, "upper": Rplusdunif},
         )
 
     def test_flat(self):

From bcf87a2656c54eb95b454e5f0773f941007f38c7 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Thu, 31 Dec 2020 11:29:45 +0100
Subject: [PATCH 8/9] Raise TypeError in logcdf methods that only accept scalar
 values. Fix docstring formatting.

---
 pymc3/distributions/discrete.py | 67 ++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 705e98e99b..b665a86597 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -163,7 +163,14 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
-        # TODO: work with multiple values (see #4342)
+        # incomplete_beta function can only handle scalar values (see #4342)
+        if np.ndim(value):
+            raise TypeError(
+                "Binomial.logcdf expects a scalar value but received a {}-dimensional object.".format(
+                    np.ndim(value)
+                )
+            )
+
         n = self.n
         p = self.p
         value = tt.floor(value)
@@ -326,7 +333,14 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
-        # TODO: fix for multiple values
+        # logcdf can only handle scalar values at the moment
+        if np.ndim(value):
+            raise TypeError(
+                "BetaBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format(
+                    np.ndim(value)
+                )
+            )
+
         alpha = self.alpha
         beta = self.beta
         n = self.n
@@ -446,11 +460,13 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for Bernoulli distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
+
         Returns
         -------
         TensorVariable
@@ -583,11 +599,13 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for Discrete Weibull distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
+
         Returns
         -------
         TensorVariable
@@ -696,11 +714,13 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for Poisson distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
+
         Returns
         -------
         TensorVariable
@@ -887,7 +907,14 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
-        # TODO: work with multiple values (see #4342)
+        # incomplete_beta function can only handle scalar values (see #4342)
+        if np.ndim(value):
+            raise TypeError(
+                "NegativeBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format(
+                    np.ndim(value)
+                )
+            )
+
         # TODO: avoid `p` recomputation if distribution was defined in terms of `p`
         alpha = self.alpha
         p = alpha / (self.mu + alpha)
@@ -987,11 +1014,13 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for Geometric distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
+
         Returns
         -------
         TensorVariable
@@ -1124,15 +1153,24 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for HyperGeometric distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value for which log CDF is calculated.
+
         Returns
         -------
         TensorVariable
         """
-        # TODO: fix for multiple values
+        # logcdf can only handle scalar values at the moment
+        if np.ndim(value):
+            raise TypeError(
+                "BetaBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format(
+                    np.ndim(value)
+                )
+            )
+
         # TODO: Use lower upper in locgdf for smarter logsumexp?
         N = self.N
         n = self.n
@@ -1247,11 +1285,13 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for Discrete uniform distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
+
         Returns
         -------
         TensorVariable
@@ -1558,11 +1598,13 @@ def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for ZeroInflatedPoisson distribution
         at the specified value.
+
         Parameters
         ----------
         value: numeric
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
+
         Returns
         -------
         TensorVariable
@@ -1698,7 +1740,14 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
-        # TODO: work with multiple values (see #4342)
+        # logcdf can only handle scalar values due to limitation in Binomial.logcdf
+        if np.ndim(value):
+            raise TypeError(
+                "ZeroInflatedBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format(
+                    np.ndim(value)
+                )
+            )
+
         psi = self.psi
 
         return bound(
@@ -1861,7 +1910,13 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
-        # TODO: work with multiple values (see #4342)
+        # logcdf can only handle scalar values due to limitation in NegativeBinomial.logcdf
+        if np.ndim(value):
+            raise TypeError(
+                "ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format(
+                    np.ndim(value)
+                )
+            )
         psi = self.psi
 
         return bound(

From dc4ce4a131e2bbf55ed53805b6b9c40d52dd34ca Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Thu, 31 Dec 2020 12:01:29 +0100
Subject: [PATCH 9/9] Small change to logcdf methods of `DiscreteWeibull` and
 `ZeroInflated` making use of `tt.log1p` and `logaddexp`. More informative
 comment on workaround for `Poisson.logcdf`.

---
 pymc3/distributions/discrete.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index b665a86597..d1dc858f9c 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -614,7 +614,7 @@ def logcdf(self, value):
         beta = self.beta
 
         return bound(
-            tt.log(1 - tt.power(q, tt.power(value + 1, beta))),
+            tt.log1p(-tt.power(q, tt.power(value + 1, beta))),
             0 <= value,
             0 < q,
             q < 1,
@@ -727,7 +727,7 @@ def logcdf(self, value):
         """
         mu = self.mu
         value = tt.floor(value)
-        # To avoid issue with #4340
+        # To avoid gammaincc C-assertion when given invalid values (#4340)
         safe_mu = tt.switch(tt.lt(mu, 0), 0, mu)
         safe_value = tt.switch(tt.lt(value, 0), 0, value)
 
@@ -1612,7 +1612,7 @@ def logcdf(self, value):
         psi = self.psi
 
         return bound(
-            tt.log(1 - psi + psi * tt.exp(self.pois.logcdf(value))),
+            logaddexp(tt.log1p(-psi), tt.log(psi) + self.pois.logcdf(value)),
             0 <= value,
             0 <= psi,
             psi <= 1,
@@ -1751,7 +1751,7 @@ def logcdf(self, value):
         psi = self.psi
 
         return bound(
-            tt.log(1 - psi + psi * tt.exp(self.bin.logcdf(value))),
+            logaddexp(tt.log1p(-psi), tt.log(psi) + self.bin.logcdf(value)),
             0 <= value,
             0 <= psi,
             psi <= 1,
@@ -1920,7 +1920,7 @@ def logcdf(self, value):
         psi = self.psi
 
         return bound(
-            tt.log(1 - psi + psi * tt.exp(self.nb.logcdf(value))),
+            logaddexp(tt.log1p(-psi), tt.log(psi) + self.nb.logcdf(value)),
             0 <= value,
             0 <= psi,
             psi <= 1,