From b7492d2305dbd65fc3f4ffa1b36857704b0aadbb Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 1 Oct 2019 16:04:00 -0700
Subject: [PATCH 01/56] Add implementation of DM distribution.

---
 pymc3/distributions/multivariate.py | 69 +++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index a90d239389..f3f186650a 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -690,6 +690,75 @@ def logp(self, x):
         )
 
 
+class DirichletMultinomial(pm.Discrete):
+    R"""Dirichlet Multinomial log-likelihood.
+
+    Dirichlet mixture of multinomials distribution, with a marginalized PMF.
+
+    .. math::
+
+    f(x \mid n, \alpha) = \frac{\Gamma(n + 1)\Gamma(\sum\alpha_k)}
+                              {\Gamma(\n + \sum\alpha_k)}
+                         \prod_{k=1}^K
+                         \frac{\Gamma(x_k + \alpha_k)}
+                              {\Gamma(x_k + 1)\Gamma(alpha_k)}
+
+    ==========  ===========================================
+    Support     :math:`x \in \{0, 1, \ldots, n\}` such that
+                :math:`\sum x_i = n`
+    Mean        :math:`n \frac{\alpha_i}{\sum{\alpha_k}}`
+    ==========  ===========================================
+
+    Parameters
+    ----------
+    alpha : one- or two-dimensional array
+        Dirichlet parameter.  Elements must be non-negative.
+        Dimension of each element of the distribution is the length
+        of the last dimension of alpha.
+    n     : one-dimensional array
+        Number of items sampled.
+
+    """
+
+    def __init__(self, alpha, n, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.n = tt.as_tensor_variable(n)
+
+    def logp(self, x):
+        alpha = self.alpha
+        n = self.n
+        sum_alpha = alpha.sum(axis=-1)
+
+        const = (gammaln(n + 1) + gammaln(sum_alpha)) - gammaln(n + sum_alpha)
+        series = gammaln(x + alpha) - (gammaln(x + 1) + gammaln(alpha))
+        result = const + series.sum(axis=-1)
+        return bound(result,
+                     tt.all(tt.ge(x, 0)),
+                     tt.all(tt.gt(alpha, 0)),
+                     tt.all(tt.ge(n, 0)),
+                     tt.all(tt.eq(x.sum(axis=-1), n)),
+                     broadcast_conditions=False)
+
+    def random(self, point=None, size=None, repeat=None):
+        alpha, n = draw_values([self.alpha, self.n], point=point)
+        out = np.empty_like(alpha)
+        for i in range(len(n)):
+            p = np.random.dirichlet(alpha[i, :])
+            x = np.random.dirichlet(n[i], p)
+            out[i, :] = x
+        return out
+
+    def _repr_latex_(self, name=None, dist=None):
+        if dist is None:
+            dist = self
+        n = dist.n
+        alpha = dist.alpha
+        return (r'${} \sim \text{{DirichletMultinomial}}('
+                r'\matit{{n}}={} \mathit{{\alpha}}={})$'
+                ).format(name, get_variable_name(n), get_variable_name(alpha))
+
+
 def posdef(AA):
     try:
         linalg.cholesky(AA)

From 2106f7cd52da261b41726eb79610595fc2a6024d Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Wed, 2 Oct 2019 11:47:10 -0700
Subject: [PATCH 02/56] Fix class name mistake.

---
 pymc3/distributions/multivariate.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index f3f186650a..38d43b2af4 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -51,6 +51,7 @@
     "MvStudentT",
     "Dirichlet",
     "Multinomial",
+    "DirichletMultinomial",
     "Wishart",
     "WishartBartlett",
     "LKJCorr",

From 487fc8a480404eacd6b9d64c9522517405d3550a Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Wed, 2 Oct 2019 11:47:31 -0700
Subject: [PATCH 03/56] Add DM dist to exported multivariate distributions.

---
 pymc3/distributions/multivariate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 38d43b2af4..576a31d23f 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -691,7 +691,7 @@ def logp(self, x):
         )
 
 
-class DirichletMultinomial(pm.Discrete):
+class DirichletMultinomial(Discrete):
     R"""Dirichlet Multinomial log-likelihood.
 
     Dirichlet mixture of multinomials distribution, with a marginalized PMF.

From 24d7ec8b3b8cd73bb35fc6af0eeccd55ab82c4be Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Fri, 6 Dec 2019 16:28:39 -0800
Subject: [PATCH 04/56] Export DirichletMultinomial in pymc3.distributions

As suggested in
https://github.com/pymc-devs/pymc3/pull/3639#issuecomment-559822430

Also see:
https://github.com/pymc-devs/pymc3/pull/3639#issuecomment-559840453
but this seems to be part of a broader discussion.
---
 pymc3/distributions/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index ade7c9ef00..e0b33887f3 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -154,6 +154,7 @@
     "MvStudentT",
     "Dirichlet",
     "Multinomial",
+    "DirichletMultinomial",
     "Wishart",
     "WishartBartlett",
     "LKJCholeskyCov",

From 4fbd1d9ec04562a3f12821c167459f6eb9b97475 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 14:09:48 -0800
Subject: [PATCH 05/56] Attempt at matching Multinomial initialization.

---
 pymc3/distributions/multivariate.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 576a31d23f..c2399511d6 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -712,20 +712,30 @@ class DirichletMultinomial(Discrete):
 
     Parameters
     ----------
-    alpha : one- or two-dimensional array
+    alpha : two-dimensional array
         Dirichlet parameter.  Elements must be non-negative.
         Dimension of each element of the distribution is the length
-        of the last dimension of alpha.
+        of the second dimension of alpha.
     n     : one-dimensional array
-        Number of items sampled.
+        Total counts in each replicate.
 
     """
 
-    def __init__(self, alpha, n, *args, **kwargs):
+    def __init__(self, n, alpha, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.alpha = tt.as_tensor_variable(alpha)
         self.n = tt.as_tensor_variable(n)
 
+        p = self.alpha / self.alpha.sum(-1, keepdims=True)
+        self.mean = self.n * p
+
+        mode = tt.cast(tt.round(self.mean), 'int32')
+        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
+        inc_bool_arr = tt.abs_(diff) > 0
+        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
+                                diff[inc_bool_arr.nonzero()])
+        self.mode = mode
+
     def logp(self, x):
         alpha = self.alpha
         n = self.n

From 685a428b6c7a82a554e1a861f1aad21d741f4ae9 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 14:10:22 -0800
Subject: [PATCH 06/56] Add some simple tests for DM.

---
 pymc3/tests/test_distributions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index a2d00a2d60..9eb39a959f 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -44,6 +44,7 @@
     Constant,
     DensityDist,
     Dirichlet,
+    DirichletMultinomial,
     DiscreteUniform,
     DiscreteWeibull,
     ExGaussian,

From ad8e77ec649033143e2f35b91caeb743bb3e7ca9 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 15:12:30 -0800
Subject: [PATCH 07/56] Correctly deal with 1d n and 2d alpha.

---
 pymc3/distributions/multivariate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index c2399511d6..8230554be6 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -727,10 +727,10 @@ def __init__(self, n, alpha, *args, **kwargs):
         self.n = tt.as_tensor_variable(n)
 
         p = self.alpha / self.alpha.sum(-1, keepdims=True)
-        self.mean = self.n * p
+        self.mean = tt.shape_padright(self.n) * p
 
         mode = tt.cast(tt.round(self.mean), 'int32')
-        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
+        diff = tt.shape_padright(self.n) - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
         mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                 diff[inc_bool_arr.nonzero()])

From 8fa717a890b3fbb0145206f6be5518495cc584d6 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 15:12:41 -0800
Subject: [PATCH 08/56] Fix typo in DM random.

---
 pymc3/distributions/multivariate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 8230554be6..e60536ab34 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -756,7 +756,7 @@ def random(self, point=None, size=None, repeat=None):
         out = np.empty_like(alpha)
         for i in range(len(n)):
             p = np.random.dirichlet(alpha[i, :])
-            x = np.random.dirichlet(n[i], p)
+            x = np.random.multinomial(n[i], p)
             out[i, :] = x
         return out
 

From 4db6b1c93a72a89ead3e64040e2ff60f0062a6de Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 15:14:46 -0800
Subject: [PATCH 09/56] Fix faulty tests for DM.

---
 pymc3/tests/test_distributions.py | 32 +++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 9eb39a959f..b06c7786d4 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1725,6 +1725,38 @@ def test_batch_multinomial(self):
         sample = dist.random(size=2)
         assert_allclose(sample, np.stack([vals, vals], axis=0))
 
+    @pytest.mark.parametrize('alpha,n', [
+        [[[.25, .25, .25, .25]], [1]],
+        [[[.3, .6, .05, .05]], [2]],
+        [[[.3, .6, .05, .05]], [10]],
+        [[[.3, .6, .05, .05],
+          [.25, .25, .25, .25]],
+         [10, 2]],
+    ])
+    def test_dirichlet_multinomial_mode(self, alpha, n):
+        alpha = np.array(alpha)
+        n = np.array(n)
+        with Model() as model:
+            m = DirichletMultinomial('m', n, alpha,
+                                     shape=alpha.shape)
+        assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
+
+    @pytest.mark.parametrize('alpha,n', [
+        [[[.25, .25, .25, .25]], [1]],
+        [[[.3, .6, .05, .05]], [2]],
+        [[[.3, .6, .05, .05]], [10]],
+        [[[.3, .6, .05, .05],
+          [.25, .25, .25, .25]],
+         [10, 2]],
+    ])
+    def test_dirichlet_multinomial_random(self, alpha, n):
+        alpha = np.array(alpha)
+        n = np.array(n)
+        with Model() as model:
+            m = DirichletMultinomial('m', n=n, alpha=alpha,
+                                     shape=alpha.shape)
+        m.random()
+
     def test_categorical_bounds(self):
         with Model():
             x = Categorical("x", p=np.array([0.2, 0.3, 0.5]))

From 01d359b9745360e9669b435c0823b3869f503114 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 15:15:06 -0800
Subject: [PATCH 10/56] Drop redundant initialization test for DM.

---
 pymc3/tests/test_distributions.py | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index b06c7786d4..56ebb12b93 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1703,27 +1703,9 @@ def test_multinomial_vec_2d_p(self):
             decimal=4,
         )
 
-    def test_batch_multinomial(self):
-        n = 10
-        vals = np.zeros((4, 5, 3), dtype="int32")
-        p = np.zeros_like(vals, dtype=theano.config.floatX)
-        inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
-        np.put_along_axis(vals, inds, n, axis=-1)
-        np.put_along_axis(p, inds, 1, axis=-1)
-
-        dist = Multinomial.dist(n=n, p=p, shape=vals.shape)
-        value = tt.tensor3(dtype="int32")
-        value.tag.test_value = np.zeros_like(vals, dtype="int32")
-        logp = tt.exp(dist.logp(value))
-        f = theano.function(inputs=[value], outputs=logp)
-        assert_almost_equal(
-            f(vals),
-            np.ones(vals.shape[:-1] + (1,)),
-            decimal=select_by_precision(float64=6, float32=3),
-        )
-
-        sample = dist.random(size=2)
-        assert_allclose(sample, np.stack([vals, vals], axis=0))
+        assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]),
+                            model.fastlogp({'m': vals}),
+                            decimal=4)
 
     @pytest.mark.parametrize('alpha,n', [
         [[[.25, .25, .25, .25]], [1]],

From 4892355e055a8fb9ef04262f1b295e3baa562a41 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 15:16:50 -0800
Subject: [PATCH 11/56] Add test that DM is normalized for n=1 case.

---
 pymc3/tests/test_distributions.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 56ebb12b93..e32bab3c81 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1723,6 +1723,22 @@ def test_dirichlet_multinomial_mode(self, alpha, n):
                                      shape=alpha.shape)
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
+    @pytest.mark.parametrize('alpha,n,enum', [
+        [[[.25, .25, .25, .25]], [1], [[1, 0, 0, 0],
+                                       [0, 1, 0, 0],
+                                       [0, 0, 1, 0],
+                                       [0, 0, 0, 1]]]
+    ])
+    def test_dirichlet_multinomial_pmf(self, alpha, n, enum):
+        alpha = np.array(alpha)
+        n = np.array(n)
+        with Model() as model:
+            m = DirichletMultinomial('m', n=n, alpha=alpha,
+                                     shape=alpha.shape)
+        logp = lambda x: m.distribution.logp(np.array([x])).eval()
+        p_all_poss = [np.exp(logp(x)) for x in enum]
+        assert_almost_equal(np.sum(p_all_poss), 1)
+
     @pytest.mark.parametrize('alpha,n', [
         [[[.25, .25, .25, .25]], [1]],
         [[[.3, .6, .05, .05]], [2]],

From bc5f3bfa590a176e6802e7a15347867bafb05df2 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 16 Dec 2019 15:17:32 -0800
Subject: [PATCH 12/56] Add DM test case based on BetaBinomial.

---
 pymc3/tests/test_distributions_random.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 5e735b6aed..c3927af80d 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -455,6 +455,11 @@ class TestBetaBinomial(BaseTestCases.BaseTestCase):
     params = {"n": 5, "alpha": 1.0, "beta": 1.0}
 
 
+class TestDirichletMultinomial(BaseTestCases.BaseTestCase):
+    distribution = pm.DirichletMultinomial
+    params = {'n': [5], 'alpha': [[1., 1., 1., 1.]]}
+
+
 class TestBernoulli(BaseTestCases.BaseTestCase):
     distribution = pm.Bernoulli
     params = {"p": 0.5}

From ffa705cdabaae95c008b259dba6dc5c1aa299d61 Mon Sep 17 00:00:00 2001
From: Colin <ColCarroll@users.noreply.github.com>
Date: Sat, 19 Sep 2020 14:04:30 -0400
Subject: [PATCH 13/56] Update pymc3/distributions/multivariate.py

---
 pymc3/distributions/multivariate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index e60536ab34..61399c5f2a 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -752,7 +752,7 @@ def logp(self, x):
                      broadcast_conditions=False)
 
     def random(self, point=None, size=None, repeat=None):
-        alpha, n = draw_values([self.alpha, self.n], point=point)
+        alpha, n = draw_values([self.alpha, self.n], point=point, size=size)
         out = np.empty_like(alpha)
         for i in range(len(n)):
             p = np.random.dirichlet(alpha[i, :])

From c801ef1459f38b3bfc5b77869d06749835608bdd Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 22 Dec 2020 17:37:23 +0100
Subject: [PATCH 14/56] - Infer shape by default (copied code from Dirichlet
 Distribution) - Add default shape in `test_distributions_random.py`

---
 pymc3/distributions/__init__.py          |  1 +
 pymc3/distributions/multivariate.py      | 15 +++++++++++++++
 pymc3/tests/test_distributions_random.py |  1 +
 3 files changed, 17 insertions(+)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index e0b33887f3..4e47dab38d 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -80,6 +80,7 @@
 from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
 from pymc3.distributions.multivariate import (
     Dirichlet,
+    DirichletMultinomial,
     KroneckerNormal,
     LKJCholeskyCov,
     LKJCorr,
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 61399c5f2a..d7e19e431f 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -722,6 +722,21 @@ class DirichletMultinomial(Discrete):
     """
 
     def __init__(self, n, alpha, *args, **kwargs):
+
+        if kwargs.get("shape") is None:
+            warnings.warn(
+                (
+                    "Shape not explicitly set. "
+                    "Please, set the value using the `shape` keyword argument. "
+                    "Using the test value to infer the shape."
+                ),
+                DeprecationWarning,
+            )
+            try:
+                kwargs["shape"] = np.shape(get_test_value(alpha))
+            except TestValueError:
+                pass
+
         super().__init__(*args, **kwargs)
         self.alpha = tt.as_tensor_variable(alpha)
         self.n = tt.as_tensor_variable(n)
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index c3927af80d..f37efcbdfd 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -458,6 +458,7 @@ class TestBetaBinomial(BaseTestCases.BaseTestCase):
 class TestDirichletMultinomial(BaseTestCases.BaseTestCase):
     distribution = pm.DirichletMultinomial
     params = {'n': [5], 'alpha': [[1., 1., 1., 1.]]}
+    default_shape = (1, 4)
 
 
 class TestBernoulli(BaseTestCases.BaseTestCase):

From c8921eeeda1006075a9bb481737bec7628770af6 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 22 Dec 2020 19:40:15 +0100
Subject: [PATCH 15/56] - Use size information in random method - Change random
 unittests

---
 pymc3/distributions/multivariate.py      | 19 ++++++++++++++-----
 pymc3/tests/test_distributions_random.py | 23 +++++++++++++++++------
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index d7e19e431f..f29f47a929 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -768,11 +768,20 @@ def logp(self, x):
 
     def random(self, point=None, size=None, repeat=None):
         alpha, n = draw_values([self.alpha, self.n], point=point, size=size)
-        out = np.empty_like(alpha)
-        for i in range(len(n)):
-            p = np.random.dirichlet(alpha[i, :])
-            x = np.random.multinomial(n[i], p)
-            out[i, :] = x
+        if size is not None:
+            out = np.empty((size, *alpha.shape))
+            for j in range(size):
+                for i in range(len(n)):
+                    p = np.random.dirichlet(alpha[i, :])
+                    x = np.random.multinomial(n[i], p)
+                    out[j, i, :] = x
+        else:
+            out = np.empty_like(alpha)
+            for i in range(len(n)):
+                p = np.random.dirichlet(alpha[i, :])
+                x = np.random.multinomial(n[i], p)
+                out[i, :] = x
+
         return out
 
     def _repr_latex_(self, name=None, dist=None):
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index f37efcbdfd..3c23be3cfa 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -455,12 +455,6 @@ class TestBetaBinomial(BaseTestCases.BaseTestCase):
     params = {"n": 5, "alpha": 1.0, "beta": 1.0}
 
 
-class TestDirichletMultinomial(BaseTestCases.BaseTestCase):
-    distribution = pm.DirichletMultinomial
-    params = {'n': [5], 'alpha': [[1., 1., 1., 1.]]}
-    default_shape = (1, 4)
-
-
 class TestBernoulli(BaseTestCases.BaseTestCase):
     distribution = pm.Bernoulli
     params = {"p": 0.5}
@@ -995,6 +989,23 @@ def ref_rand(size, a):
                 ref_rand=ref_rand,
             )
 
+    def test_dirichletmultinomial(self):
+        def ref_rand(size, n, alpha):
+            p = st.dirichlet.rvs(alpha, size=size)
+            res = np.empty((size, *alpha.shape))
+            for i in range(size):
+                res[i, :] = st.multinomial(p=p[i], n=n).rvs()
+            return res
+
+        for n in [2, 3]:
+            pymc3_random_discrete(
+                pm.DirichletMultinomial,
+                {"n": Vector(Nat, 1), "alpha": Vector(Rplus, np.array([1, n]))},
+                valuedomain=Vector(Nat, np.array([1, n])),
+                size=100,
+                ref_rand=ref_rand,
+            )
+
     def test_multinomial(self):
         def ref_rand(size, p, n):
             return nr.multinomial(pvals=p, n=n, size=size)

From e801568654f5de44b037fab813674195e50daa41 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 22 Dec 2020 19:50:19 +0100
Subject: [PATCH 16/56] - Restore merge accidental deletions

---
 pymc3/tests/test_distributions.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index e32bab3c81..9ba3671f50 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1703,9 +1703,33 @@ def test_multinomial_vec_2d_p(self):
             decimal=4,
         )
 
-        assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]),
-                            model.fastlogp({'m': vals}),
-                            decimal=4)
+        assert_almost_equal(
+            sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]),
+            model.fastlogp({'m': vals}),
+            decimal=4
+        )
+
+    def test_batch_multinomial(self):
+        n = 10
+        vals = np.zeros((4, 5, 3), dtype="int32")
+        p = np.zeros_like(vals, dtype=theano.config.floatX)
+        inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
+        np.put_along_axis(vals, inds, n, axis=-1)
+        np.put_along_axis(p, inds, 1, axis=-1)
+
+        dist = Multinomial.dist(n=n, p=p, shape=vals.shape)
+        value = tt.tensor3(dtype="int32")
+        value.tag.test_value = np.zeros_like(vals, dtype="int32")
+        logp = tt.exp(dist.logp(value))
+        f = theano.function(inputs=[value], outputs=logp)
+        assert_almost_equal(
+            f(vals),
+            np.ones(vals.shape[:-1] + (1,)),
+            decimal=select_by_precision(float64=6, float32=3),
+        )
+
+        sample = dist.random(size=2)
+        assert_allclose(sample, np.stack([vals, vals], axis=0))
 
     @pytest.mark.parametrize('alpha,n', [
         [[[.25, .25, .25, .25]], [1]],

From 3483ab5f9653c50afe413569cc59e2842477deb4 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 22 Dec 2020 19:52:42 +0100
Subject: [PATCH 17/56] - Underscore missing

---
 pymc3/tests/test_distributions_random.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 3c23be3cfa..bb941ba285 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -989,7 +989,7 @@ def ref_rand(size, a):
                 ref_rand=ref_rand,
             )
 
-    def test_dirichletmultinomial(self):
+    def test_dirichlet_multinomial(self):
         def ref_rand(size, n, alpha):
             p = st.dirichlet.rvs(alpha, size=size)
             res = np.empty((size, *alpha.shape))

From 23ba2e4706b22e20c0764e04a6c95cea3dc35eb2 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 22 Dec 2020 19:57:15 +0100
Subject: [PATCH 18/56] - More merge cleaning

---
 pymc3/tests/test_distributions.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 9ba3671f50..843a9124dd 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1703,12 +1703,6 @@ def test_multinomial_vec_2d_p(self):
             decimal=4,
         )
 
-        assert_almost_equal(
-            sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]),
-            model.fastlogp({'m': vals}),
-            decimal=4
-        )
-
     def test_batch_multinomial(self):
         n = 10
         vals = np.zeros((4, 5, 3), dtype="int32")

From fe018ecf6826f7ab99cb420ada5438fae262cc40 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 29 Dec 2020 10:16:43 -0800
Subject: [PATCH 19/56] Bring DirichletMultinomial initialization into
 alignment with Multinomial.

---
 pymc3/distributions/multivariate.py | 32 +++++++++++------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index f29f47a929..2951f3f2f3 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -722,30 +722,22 @@ class DirichletMultinomial(Discrete):
     """
 
     def __init__(self, n, alpha, *args, **kwargs):
-
-        if kwargs.get("shape") is None:
-            warnings.warn(
-                (
-                    "Shape not explicitly set. "
-                    "Please, set the value using the `shape` keyword argument. "
-                    "Using the test value to infer the shape."
-                ),
-                DeprecationWarning,
-            )
-            try:
-                kwargs["shape"] = np.shape(get_test_value(alpha))
-            except TestValueError:
-                pass
-
         super().__init__(*args, **kwargs)
-        self.alpha = tt.as_tensor_variable(alpha)
-        self.n = tt.as_tensor_variable(n)
+        n = tt.as_tensor_variable(n)
+        alpha = tt.as_tensor_variable(alpha)
+        p = alpha / alpha.sum(-1, keepdims=True)
 
-        p = self.alpha / self.alpha.sum(-1, keepdims=True)
-        self.mean = tt.shape_padright(self.n) * p
+        if len(self.shape) > 1:
+            self.n = tt.shape_padright(n)
+            self.alpha = alpha if alpha.ndim > 1 else tt.shape_padleft(alpha)
+        else:
+            # n is a scalar, p is a 1d array
+            self.n = n
+            self.alpha = alpha
 
+        self.mean = self.n * p
         mode = tt.cast(tt.round(self.mean), 'int32')
-        diff = tt.shape_padright(self.n) - tt.sum(mode, axis=-1, keepdims=True)
+        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
         mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                 diff[inc_bool_arr.nonzero()])

From 25fd41a515143b610f2b4e0b4813322cc27c49f2 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Fri, 1 Jan 2021 09:53:21 -0800
Subject: [PATCH 20/56] Align all DM tests with Multinomial.

---
 pymc3/tests/test_distributions.py        | 199 ++++++++++++++++++-----
 pymc3/tests/test_distributions_random.py |  27 +--
 2 files changed, 175 insertions(+), 51 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 843a9124dd..b23bbb9410 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -266,6 +266,21 @@ def multinomial_logpdf(value, n, p):
         return -inf
 
 
+def dirichlet_multinomial_logpmf(value, n, alpha):
+    value, n, alpha = [np.asarray(x) for x in [value, n, alpha]]
+    assert value.ndim == 1
+    assert n.ndim == 0
+    assert alpha.shape == value.shape
+    gammaln = scipy.special.gammaln
+    if value.sum() == n and (0 <= value).all() and (value <= n).all():
+        sum_alpha = alpha.sum(axis=-1)
+        const = gammaln(n + 1) + gammaln(sum_alpha) - gammaln(n + sum_alpha)
+        series = gammaln(value + alpha) - gammaln(value + 1) - gammaln(alpha)
+        return const + series.sum(axis=-1)
+    else:
+        return -inf
+
+
 def beta_mu_sigma(value, mu, sigma):
     kappa = mu * (1 - mu) / sigma ** 2 - 1
     if kappa > 0:
@@ -1725,54 +1740,156 @@ def test_batch_multinomial(self):
         sample = dist.random(size=2)
         assert_allclose(sample, np.stack([vals, vals], axis=0))
 
-    @pytest.mark.parametrize('alpha,n', [
-        [[[.25, .25, .25, .25]], [1]],
-        [[[.3, .6, .05, .05]], [2]],
-        [[[.3, .6, .05, .05]], [10]],
-        [[[.3, .6, .05, .05],
-          [.25, .25, .25, .25]],
-         [10, 2]],
-    ])
+    @pytest.mark.parametrize("n", [2, 3])
+    def test_dirichlet_multinomial(self, n):
+        self.pymc3_matches_scipy(
+            DirichletMultinomial, Vector(Nat, n), {"alpha": Vector(Rplus, n), "n": Nat}, dirichlet_multinomial_logpmf
+        )
+
+    @pytest.mark.parametrize(
+        "alpha, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 1],
+            [[0.3, 0.6, 0.05, 0.05], 2],
+            [[0.3, 0.6, 0.05, 0.05], 10],
+        ],
+    )
     def test_dirichlet_multinomial_mode(self, alpha, n):
-        alpha = np.array(alpha)
-        n = np.array(n)
+        _alpha = np.array(alpha)
+        with Model() as model:
+            m = DirichletMultinomial("m", n, _alpha, _alpha.shape)
+        assert_allclose(m.distribution.mode.eval().sum(), n)
+        _alpha = np.array([alpha, alpha])
         with Model() as model:
-            m = DirichletMultinomial('m', n, alpha,
-                                     shape=alpha.shape)
+            m = DirichletMultinomial("m", n, _alpha, _alpha.shape)
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
-    @pytest.mark.parametrize('alpha,n,enum', [
-        [[[.25, .25, .25, .25]], [1], [[1, 0, 0, 0],
-                                       [0, 1, 0, 0],
-                                       [0, 0, 1, 0],
-                                       [0, 0, 0, 1]]]
-    ])
-    def test_dirichlet_multinomial_pmf(self, alpha, n, enum):
-        alpha = np.array(alpha)
-        n = np.array(n)
-        with Model() as model:
-            m = DirichletMultinomial('m', n=n, alpha=alpha,
-                                     shape=alpha.shape)
-        logp = lambda x: m.distribution.logp(np.array([x])).eval()
-        p_all_poss = [np.exp(logp(x)) for x in enum]
-        assert_almost_equal(np.sum(p_all_poss), 1)
-
-    @pytest.mark.parametrize('alpha,n', [
-        [[[.25, .25, .25, .25]], [1]],
-        [[[.3, .6, .05, .05]], [2]],
-        [[[.3, .6, .05, .05]], [10]],
-        [[[.3, .6, .05, .05],
-          [.25, .25, .25, .25]],
-         [10, 2]],
-    ])
-    def test_dirichlet_multinomial_random(self, alpha, n):
-        alpha = np.array(alpha)
-        n = np.array(n)
+    @pytest.mark.parametrize(
+        "alpha, shape, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 4, 2],
+            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
+            # 3: expect to fail
+            # [[.25, .25, .25, .25], (10, 4)],
+            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
+            # 5: expect to fail
+            # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
+            [
+                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
+                (10, 2, 4),
+                [31, 37],
+            ],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
+        ],
+    )
+    def test_dirichlet_multinomial_random(self, alpha, shape, n):
+        alpha = np.asarray(alpha)
         with Model() as model:
-            m = DirichletMultinomial('m', n=n, alpha=alpha,
-                                     shape=alpha.shape)
+            m = DirichletMultinomial("m", n=n, alpha=alpha, shape=shape)
         m.random()
 
+    def test_dirichlet_multinomial_mode_with_shape(self):
+        n = [1, 10]
+        alpha = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
+        with Model() as model:
+            m = DirichletMultinomial("m", n=n, alpha=alpha, shape=(2, 4))
+        assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
+
+    def test_dirichlet_multinomial_vec(self):
+        vals = np.array([[2, 4, 4], [3, 3, 4]])
+        alpha = np.array([0.2, 0.3, 0.5])
+        n = 10
+
+        with Model() as model_single:
+            DirichletMultinomial("m", n=n, alpha=alpha, shape=len(alpha))
+
+        with Model() as model_many:
+            DirichletMultinomial("m", n=n, alpha=alpha, shape=vals.shape)
+
+        assert_almost_equal(
+            np.asarray([dirichlet_multinomial_logpmf(v, n, alpha) for v in vals]),
+            np.asarray([model_single.fastlogp({"m": val}) for val in vals]),
+            decimal=4,
+        )
+
+        assert_almost_equal(
+            np.asarray([dirichlet_multinomial_logpmf(v, n, alpha) for v in vals]),
+            model_many.free_RVs[0].logp_elemwise({"m": vals}).squeeze(),
+            decimal=4,
+        )
+
+        assert_almost_equal(
+            sum([model_single.fastlogp({"m": val}) for val in vals]),
+            model_many.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_dirichlet_multinomial_vec_1d_n(self):
+        vals = np.array([[2, 4, 4], [4, 3, 4]])
+        alpha = np.array([0.2, 0.3, 0.5])
+        ns = np.array([10, 11])
+
+        with Model() as model:
+            DirichletMultinomial("m", n=ns, alpha=alpha, shape=vals.shape)
+
+        assert_almost_equal(
+            sum([dirichlet_multinomial_logpmf(val, n, alpha) for val, n in zip(vals, ns)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_dirichlet_multinomial_vec_1d_n_2d_alpha(self):
+        vals = np.array([[2, 4, 4], [4, 3, 4]])
+        alphas = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]])
+        ns = np.array([10, 11])
+
+        with Model() as model:
+            DirichletMultinomial("m", n=ns, alpha=alphas, shape=vals.shape)
+
+        assert_almost_equal(
+            sum([dirichlet_multinomial_logpmf(val, n, alpha) for val, n, alpha in zip(vals, ns, alphas)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_dirichlet_multinomial_vec_2d_alpha(self):
+        vals = np.array([[2, 4, 4], [3, 3, 4]])
+        alphas = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]])
+        n = 10
+
+        with Model() as model:
+            DirichletMultinomial("m", n=n, alpha=alphas, shape=vals.shape)
+
+        assert_almost_equal(
+            sum([dirichlet_multinomial_logpmf(val, n, alpha) for val, alpha in zip(vals, alphas)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_batch_dirichlet_multinomial(self):
+        n = 10
+        vals = np.zeros((4, 5, 3), dtype="int32")
+        alpha = np.zeros_like(vals, dtype=theano.config.floatX)
+        inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
+        np.put_along_axis(vals, inds, n, axis=-1)
+        np.put_along_axis(alpha, inds, 1, axis=-1)
+
+        dist = DirichletMultinomial.dist(n=n, alpha=alpha, shape=vals.shape)
+        value = tt.tensor3(dtype="int32")
+        value.tag.test_value = np.zeros_like(vals, dtype="int32")
+        logp = tt.exp(dist.logp(value))
+        f = theano.function(inputs=[value], outputs=logp)
+        assert_almost_equal(
+            f(vals),
+            np.ones(vals.shape[:-1] + (1,)),
+            decimal=select_by_precision(float64=6, float32=3),
+        )
+
+        sample = dist.random(size=2)
+        assert_allclose(sample, np.stack([vals, vals], axis=0))
+
     def test_categorical_bounds(self):
         with Model():
             x = Categorical("x", p=np.array([0.2, 0.3, 0.5]))
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index bb941ba285..2087124ed1 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -89,9 +89,10 @@ def pymc3_random(
 
 
 def pymc3_random_discrete(
-    dist, paramdomains, valuedomain=Domain([0]), ref_rand=None, size=100000, alpha=0.05, fails=20
+    dist, paramdomains, valuedomain=Domain([0]), ref_rand=None, size=100000, alpha=0.05, fails=20,
+    extra_args=None,
 ):
-    model = build_model(dist, valuedomain, paramdomains)
+    model = build_model(dist, valuedomain, paramdomains, extra_args=extra_args)
     domains = paramdomains.copy()
     for pt in product(domains, n_samples=100):
         pt = pm.Point(pt, model=model)
@@ -990,19 +991,25 @@ def ref_rand(size, a):
             )
 
     def test_dirichlet_multinomial(self):
-        def ref_rand(size, n, alpha):
-            p = st.dirichlet.rvs(alpha, size=size)
-            res = np.empty((size, *alpha.shape))
+        def ref_rand(size, alpha, n):
+            k = alpha.shape[-1]
+            out = np.empty((size, k), dtype=int)
+            # debug_p = np.empty((size, k))
+            # FIXME: Vectorize this?
             for i in range(size):
-                res[i, :] = st.multinomial(p=p[i], n=n).rvs()
-            return res
+                p = nr.dirichlet(alpha)
+                x = nr.multinomial(n=n, pvals=p)
+                out[i, :] = x
+                # debug_p[i, :] = p
+            # breakpoint()
+            return out
 
         for n in [2, 3]:
             pymc3_random_discrete(
                 pm.DirichletMultinomial,
-                {"n": Vector(Nat, 1), "alpha": Vector(Rplus, np.array([1, n]))},
-                valuedomain=Vector(Nat, np.array([1, n])),
-                size=100,
+                {"alpha": Vector(Rplus, n), "n": Nat},
+                valuedomain=Vector(Nat, n),
+                size=1000,
                 ref_rand=ref_rand,
             )
 

From 28b0a6243254330c10432e2fe6157da8dc6de3a6 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Fri, 1 Jan 2021 09:54:04 -0800
Subject: [PATCH 21/56] Align DirichletMultinomial random implementation with
 Multinomial.

---
 pymc3/distributions/multivariate.py | 93 ++++++++++++++++++++++-------
 1 file changed, 70 insertions(+), 23 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 2951f3f2f3..73671b193c 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -723,17 +723,16 @@ class DirichletMultinomial(Discrete):
 
     def __init__(self, n, alpha, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        n = tt.as_tensor_variable(n)
-        alpha = tt.as_tensor_variable(alpha)
-        p = alpha / alpha.sum(-1, keepdims=True)
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
-            self.alpha = alpha if alpha.ndim > 1 else tt.shape_padleft(alpha)
+            self.alpha = tt.as_tensor_variable(alpha) if alpha.ndim > 1 else tt.shape_padleft(alpha)
         else:
             # n is a scalar, p is a 1d array
-            self.n = n
-            self.alpha = alpha
+            self.n = tt.as_tensor_variable(n)
+            self.alpha = tt.as_tensor_variable(alpha)
+
+        p = self.alpha / self.alpha.sum(-1, keepdims=True)
 
         self.mean = self.n * p
         mode = tt.cast(tt.round(self.mean), 'int32')
@@ -746,36 +745,84 @@ def __init__(self, n, alpha, *args, **kwargs):
     def logp(self, x):
         alpha = self.alpha
         n = self.n
-        sum_alpha = alpha.sum(axis=-1)
+        sum_alpha = alpha.sum(axis=-1, keepdims=True)
 
         const = (gammaln(n + 1) + gammaln(sum_alpha)) - gammaln(n + sum_alpha)
         series = gammaln(x + alpha) - (gammaln(x + 1) + gammaln(alpha))
-        result = const + series.sum(axis=-1)
+        result = const + series.sum(axis=-1, keepdims=True)
         return bound(result,
                      tt.all(tt.ge(x, 0)),
                      tt.all(tt.gt(alpha, 0)),
                      tt.all(tt.ge(n, 0)),
-                     tt.all(tt.eq(x.sum(axis=-1), n)),
+                     tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
                      broadcast_conditions=False)
 
-    def random(self, point=None, size=None, repeat=None):
+    def random0(self, point=None, size=None, repeat=None):
         alpha, n = draw_values([self.alpha, self.n], point=point, size=size)
-        if size is not None:
-            out = np.empty((size, *alpha.shape))
-            for j in range(size):
-                for i in range(len(n)):
-                    p = np.random.dirichlet(alpha[i, :])
-                    x = np.random.multinomial(n[i], p)
-                    out[j, i, :] = x
-        else:
-            out = np.empty_like(alpha)
-            for i in range(len(n)):
-                p = np.random.dirichlet(alpha[i, :])
-                x = np.random.multinomial(n[i], p)
-                out[i, :] = x
+        if size is None:
+            size = 1
+
+        out = np.empty((size, alpha.shape[-1]))
+        # FIXME: Vectorize this?
+        for i in range(size):
+            p = np.random.dirichlet(alpha)
+            x = np.random.multinomial(n, p)
+            out[i, :] = x
 
         return out
 
+    def _random(self, n, alpha, size=None):
+        original_dtype = alpha.dtype
+        # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
+        alpha = alpha.astype("float64")
+        # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
+        alpha /= np.sum(alpha, axis=-1, keepdims=True)
+
+        size_ = size if len(size) > 1 else (1, size[0])
+        # Thanks to the default shape handling done in generate_values, the last
+        # axis of n is a dummy axis that allows it to broadcast well with p
+        n = np.broadcast_to(n, size_)
+        alpha = np.broadcast_to(alpha, size_)
+        n = n[..., 0]
+
+        # Unlike the multinomial random_, here we need to draw values
+        # of p from np.random.dirichlet.
+        p = np.array([np.random.dirichlet(aa) for aa in alpha])
+
+        samples = np.array(
+            [np.random.multinomial(nn, pp) for nn, pp in zip(n, p)]
+        )
+        # We cast back to the original dtype
+        return samples.astype(original_dtype)
+
+    def random(self, point=None, size=None):
+        """
+        Draw random values from Dirichlet-Multinomial distribution.
+
+        Parameters
+        ----------
+        point: dict, optional
+            Dict of variable values on which random values are to be
+            conditioned (uses default point if not specified).
+        size: int, optional
+            Desired size of random sample (returns one sample if not
+            specified).
+
+        Returns
+        -------
+        array
+        """
+        n, alpha = draw_values([self.n, self.alpha], point=point, size=size)
+        samples = generate_samples(
+            self._random,
+            n,
+            alpha,
+            dist_shape=self.shape,
+            # not_broadcast_kwargs={"raw_size": size},
+            size=size,
+        )
+        return samples
+
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self

From d363f9610b106eff51f3137f3d8d8ef564a92982 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Sun, 3 Jan 2021 11:54:17 -0800
Subject: [PATCH 22/56] Match DM random method to Multinomial implementation.

---
 pymc3/distributions/multivariate.py | 54 +++++++++++++----------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 73671b193c..5d85184b44 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -757,41 +757,37 @@ def logp(self, x):
                      tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
                      broadcast_conditions=False)
 
-    def random0(self, point=None, size=None, repeat=None):
-        alpha, n = draw_values([self.alpha, self.n], point=point, size=size)
-        if size is None:
-            size = 1
-
-        out = np.empty((size, alpha.shape[-1]))
-        # FIXME: Vectorize this?
-        for i in range(size):
-            p = np.random.dirichlet(alpha)
-            x = np.random.multinomial(n, p)
-            out[i, :] = x
-
-        return out
-
-    def _random(self, n, alpha, size=None):
+    def _random(self, n, alpha, size=None, raw_size=None):
         original_dtype = alpha.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
         alpha = alpha.astype("float64")
-        # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
-        alpha /= np.sum(alpha, axis=-1, keepdims=True)
 
-        size_ = size if len(size) > 1 else (1, size[0])
         # Thanks to the default shape handling done in generate_values, the last
-        # axis of n is a dummy axis that allows it to broadcast well with p
-        n = np.broadcast_to(n, size_)
-        alpha = np.broadcast_to(alpha, size_)
+        # axis of n is a dummy axis that allows it to broadcast well with alpha
+        n = np.broadcast_to(n, size)
+        alpha = np.broadcast_to(alpha, size)
         n = n[..., 0]
 
-        # Unlike the multinomial random_, here we need to draw values
-        # of p from np.random.dirichlet.
-        p = np.array([np.random.dirichlet(aa) for aa in alpha])
-
-        samples = np.array(
-            [np.random.multinomial(nn, pp) for nn, pp in zip(n, p)]
-        )
+        # np.random.multinomial needs `n` to be a scalar int and `alpha` a
+        # sequence so we semi flatten them and iterate over them
+        size_ = to_tuple(raw_size)
+        if alpha.ndim > len(size_) and alpha.shape[: len(size_)] == size_:
+            # alpha and n have the size_ prepend so we don't need it in np.random
+            n_ = n.reshape([-1])
+            alpha_ = alpha.reshape([-1, alpha.shape[-1]])
+            p_ = np.array([np.random.dirichlet(aa) for aa in alpha_])
+            samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
+            samples = samples.reshape(alpha.shape)
+        else:
+            # alpha and n don't have the size prepend
+            n_ = n.reshape([-1])
+            alpha_ = alpha.reshape([-1, alpha.shape[-1]])
+            p_ = np.array([np.random.dirichlet(aa) for aa in alpha_])
+            samples = np.array(
+                [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)]
+            )
+            samples = np.moveaxis(samples, 0, -1)
+            samples = samples.reshape(size + alpha.shape)
         # We cast back to the original dtype
         return samples.astype(original_dtype)
 
@@ -818,7 +814,7 @@ def random(self, point=None, size=None):
             n,
             alpha,
             dist_shape=self.shape,
-            # not_broadcast_kwargs={"raw_size": size},
+            not_broadcast_kwargs={"raw_size": size},
             size=size,
         )
         return samples

From 9b6828cf42c7dc97f3cf19d4b9369c9705e52bb9 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 4 Jan 2021 09:30:32 +0100
Subject: [PATCH 23/56] Change alpha -> a Remove _repr_latex_

---
 pymc3/distributions/multivariate.py      | 62 ++++++++++-----------
 pymc3/tests/test_distributions.py        | 70 ++++++++++++------------
 pymc3/tests/test_distributions_random.py |  8 +--
 3 files changed, 67 insertions(+), 73 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 5d85184b44..df10d0ba85 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -712,27 +712,27 @@ class DirichletMultinomial(Discrete):
 
     Parameters
     ----------
-    alpha : two-dimensional array
+    a : two-dimensional array
         Dirichlet parameter.  Elements must be non-negative.
         Dimension of each element of the distribution is the length
         of the second dimension of alpha.
-    n     : one-dimensional array
+    n : one-dimensional array
         Total counts in each replicate.
 
     """
 
-    def __init__(self, n, alpha, *args, **kwargs):
+    def __init__(self, n, a, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
-            self.alpha = tt.as_tensor_variable(alpha) if alpha.ndim > 1 else tt.shape_padleft(alpha)
+            self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
         else:
             # n is a scalar, p is a 1d array
             self.n = tt.as_tensor_variable(n)
-            self.alpha = tt.as_tensor_variable(alpha)
+            self.a = tt.as_tensor_variable(a)
 
-        p = self.alpha / self.alpha.sum(-1, keepdims=True)
+        p = self.a / self.a.sum(-1, keepdims=True)
 
         self.mean = self.n * p
         mode = tt.cast(tt.round(self.mean), 'int32')
@@ -743,51 +743,51 @@ def __init__(self, n, alpha, *args, **kwargs):
         self.mode = mode
 
     def logp(self, x):
-        alpha = self.alpha
+        a = self.a
         n = self.n
-        sum_alpha = alpha.sum(axis=-1, keepdims=True)
+        sum_a = a.sum(axis=-1, keepdims=True)
 
-        const = (gammaln(n + 1) + gammaln(sum_alpha)) - gammaln(n + sum_alpha)
-        series = gammaln(x + alpha) - (gammaln(x + 1) + gammaln(alpha))
+        const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
+        series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
         result = const + series.sum(axis=-1, keepdims=True)
         return bound(result,
                      tt.all(tt.ge(x, 0)),
-                     tt.all(tt.gt(alpha, 0)),
+                     tt.all(tt.gt(a, 0)),
                      tt.all(tt.ge(n, 0)),
                      tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
                      broadcast_conditions=False)
 
-    def _random(self, n, alpha, size=None, raw_size=None):
-        original_dtype = alpha.dtype
+    def _random(self, n, a, size=None, raw_size=None):
+        original_dtype = a.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
-        alpha = alpha.astype("float64")
+        a = a.astype("float64")
 
         # Thanks to the default shape handling done in generate_values, the last
         # axis of n is a dummy axis that allows it to broadcast well with alpha
         n = np.broadcast_to(n, size)
-        alpha = np.broadcast_to(alpha, size)
+        a = np.broadcast_to(a, size)
         n = n[..., 0]
 
         # np.random.multinomial needs `n` to be a scalar int and `alpha` a
         # sequence so we semi flatten them and iterate over them
         size_ = to_tuple(raw_size)
-        if alpha.ndim > len(size_) and alpha.shape[: len(size_)] == size_:
-            # alpha and n have the size_ prepend so we don't need it in np.random
+        if a.ndim > len(size_) and a.shape[: len(size_)] == size_:
+            # a and n have the size_ prepend so we don't need it in np.random
             n_ = n.reshape([-1])
-            alpha_ = alpha.reshape([-1, alpha.shape[-1]])
-            p_ = np.array([np.random.dirichlet(aa) for aa in alpha_])
+            a_ = a.reshape([-1, a.shape[-1]])
+            p_ = np.array([np.random.dirichlet(aa) for aa in a_])
             samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
-            samples = samples.reshape(alpha.shape)
+            samples = samples.reshape(a.shape)
         else:
-            # alpha and n don't have the size prepend
+            # a and n don't have the size prepend
             n_ = n.reshape([-1])
-            alpha_ = alpha.reshape([-1, alpha.shape[-1]])
-            p_ = np.array([np.random.dirichlet(aa) for aa in alpha_])
+            a_ = a.reshape([-1, a.shape[-1]])
+            p_ = np.array([np.random.dirichlet(aa) for aa in a_])
             samples = np.array(
                 [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)]
             )
             samples = np.moveaxis(samples, 0, -1)
-            samples = samples.reshape(size + alpha.shape)
+            samples = samples.reshape(size + a.shape)
         # We cast back to the original dtype
         return samples.astype(original_dtype)
 
@@ -808,25 +808,19 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        n, alpha = draw_values([self.n, self.alpha], point=point, size=size)
+        n, a = draw_values([self.n, self.a], point=point, size=size)
         samples = generate_samples(
             self._random,
             n,
-            alpha,
+            a,
             dist_shape=self.shape,
             not_broadcast_kwargs={"raw_size": size},
             size=size,
         )
         return samples
 
-    def _repr_latex_(self, name=None, dist=None):
-        if dist is None:
-            dist = self
-        n = dist.n
-        alpha = dist.alpha
-        return (r'${} \sim \text{{DirichletMultinomial}}('
-                r'\matit{{n}}={} \mathit{{\alpha}}={})$'
-                ).format(name, get_variable_name(n), get_variable_name(alpha))
+    def _distr_parameters_for_repr(self):
+        return ["n", "a"]
 
 
 def posdef(AA):
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index b23bbb9410..a1425f1402 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -266,16 +266,16 @@ def multinomial_logpdf(value, n, p):
         return -inf
 
 
-def dirichlet_multinomial_logpmf(value, n, alpha):
-    value, n, alpha = [np.asarray(x) for x in [value, n, alpha]]
+def dirichlet_multinomial_logpmf(value, n, a):
+    value, n, a = [np.asarray(x) for x in [value, n, a]]
     assert value.ndim == 1
     assert n.ndim == 0
-    assert alpha.shape == value.shape
+    assert a.shape == value.shape
     gammaln = scipy.special.gammaln
     if value.sum() == n and (0 <= value).all() and (value <= n).all():
-        sum_alpha = alpha.sum(axis=-1)
-        const = gammaln(n + 1) + gammaln(sum_alpha) - gammaln(n + sum_alpha)
-        series = gammaln(value + alpha) - gammaln(value + 1) - gammaln(alpha)
+        sum_a = a.sum(axis=-1)
+        const = gammaln(n + 1) + gammaln(sum_a) - gammaln(n + sum_a)
+        series = gammaln(value + a) - gammaln(value + 1) - gammaln(a)
         return const + series.sum(axis=-1)
     else:
         return -inf
@@ -1743,29 +1743,29 @@ def test_batch_multinomial(self):
     @pytest.mark.parametrize("n", [2, 3])
     def test_dirichlet_multinomial(self, n):
         self.pymc3_matches_scipy(
-            DirichletMultinomial, Vector(Nat, n), {"alpha": Vector(Rplus, n), "n": Nat}, dirichlet_multinomial_logpmf
+            DirichletMultinomial, Vector(Nat, n), {"a": Vector(Rplus, n), "n": Nat}, dirichlet_multinomial_logpmf
         )
 
     @pytest.mark.parametrize(
-        "alpha, n",
+        "a, n",
         [
             [[0.25, 0.25, 0.25, 0.25], 1],
             [[0.3, 0.6, 0.05, 0.05], 2],
             [[0.3, 0.6, 0.05, 0.05], 10],
         ],
     )
-    def test_dirichlet_multinomial_mode(self, alpha, n):
-        _alpha = np.array(alpha)
+    def test_dirichlet_multinomial_mode(self, a, n):
+        _a = np.array(a)
         with Model() as model:
-            m = DirichletMultinomial("m", n, _alpha, _alpha.shape)
+            m = DirichletMultinomial("m", n, _a, _a.shape)
         assert_allclose(m.distribution.mode.eval().sum(), n)
-        _alpha = np.array([alpha, alpha])
+        _a = np.array([a, a])
         with Model() as model:
-            m = DirichletMultinomial("m", n, _alpha, _alpha.shape)
+            m = DirichletMultinomial("m", n, _a, _a.shape)
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
     @pytest.mark.parametrize(
-        "alpha, shape, n",
+        "a, shape, n",
         [
             [[0.25, 0.25, 0.25, 0.25], 4, 2],
             [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
@@ -1784,38 +1784,38 @@ def test_dirichlet_multinomial_mode(self, alpha, n):
             [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
         ],
     )
-    def test_dirichlet_multinomial_random(self, alpha, shape, n):
-        alpha = np.asarray(alpha)
+    def test_dirichlet_multinomial_random(self, a, shape, n):
+        a = np.asarray(a)
         with Model() as model:
-            m = DirichletMultinomial("m", n=n, alpha=alpha, shape=shape)
+            m = DirichletMultinomial("m", n=n, a=a, shape=shape)
         m.random()
 
     def test_dirichlet_multinomial_mode_with_shape(self):
         n = [1, 10]
-        alpha = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
+        a = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
         with Model() as model:
-            m = DirichletMultinomial("m", n=n, alpha=alpha, shape=(2, 4))
+            m = DirichletMultinomial("m", n=n, a=a, shape=(2, 4))
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
     def test_dirichlet_multinomial_vec(self):
         vals = np.array([[2, 4, 4], [3, 3, 4]])
-        alpha = np.array([0.2, 0.3, 0.5])
+        a = np.array([0.2, 0.3, 0.5])
         n = 10
 
         with Model() as model_single:
-            DirichletMultinomial("m", n=n, alpha=alpha, shape=len(alpha))
+            DirichletMultinomial("m", n=n, a=a, shape=len(a))
 
         with Model() as model_many:
-            DirichletMultinomial("m", n=n, alpha=alpha, shape=vals.shape)
+            DirichletMultinomial("m", n=n, a=a, shape=vals.shape)
 
         assert_almost_equal(
-            np.asarray([dirichlet_multinomial_logpmf(v, n, alpha) for v in vals]),
+            np.asarray([dirichlet_multinomial_logpmf(v, n, a) for v in vals]),
             np.asarray([model_single.fastlogp({"m": val}) for val in vals]),
             decimal=4,
         )
 
         assert_almost_equal(
-            np.asarray([dirichlet_multinomial_logpmf(v, n, alpha) for v in vals]),
+            np.asarray([dirichlet_multinomial_logpmf(v, n, a) for v in vals]),
             model_many.free_RVs[0].logp_elemwise({"m": vals}).squeeze(),
             decimal=4,
         )
@@ -1828,42 +1828,42 @@ def test_dirichlet_multinomial_vec(self):
 
     def test_dirichlet_multinomial_vec_1d_n(self):
         vals = np.array([[2, 4, 4], [4, 3, 4]])
-        alpha = np.array([0.2, 0.3, 0.5])
+        a = np.array([0.2, 0.3, 0.5])
         ns = np.array([10, 11])
 
         with Model() as model:
-            DirichletMultinomial("m", n=ns, alpha=alpha, shape=vals.shape)
+            DirichletMultinomial("m", n=ns, a=a, shape=vals.shape)
 
         assert_almost_equal(
-            sum([dirichlet_multinomial_logpmf(val, n, alpha) for val, n in zip(vals, ns)]),
+            sum([dirichlet_multinomial_logpmf(val, n, a) for val, n in zip(vals, ns)]),
             model.fastlogp({"m": vals}),
             decimal=4,
         )
 
-    def test_dirichlet_multinomial_vec_1d_n_2d_alpha(self):
+    def test_dirichlet_multinomial_vec_1d_n_2d_a(self):
         vals = np.array([[2, 4, 4], [4, 3, 4]])
-        alphas = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]])
+        as_ = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]])
         ns = np.array([10, 11])
 
         with Model() as model:
-            DirichletMultinomial("m", n=ns, alpha=alphas, shape=vals.shape)
+            DirichletMultinomial("m", n=ns, a=as_, shape=vals.shape)
 
         assert_almost_equal(
-            sum([dirichlet_multinomial_logpmf(val, n, alpha) for val, n, alpha in zip(vals, ns, alphas)]),
+            sum([dirichlet_multinomial_logpmf(val, n, a) for val, n, a in zip(vals, ns, as_)]),
             model.fastlogp({"m": vals}),
             decimal=4,
         )
 
-    def test_dirichlet_multinomial_vec_2d_alpha(self):
+    def test_dirichlet_multinomial_vec_2d_a(self):
         vals = np.array([[2, 4, 4], [3, 3, 4]])
-        alphas = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]])
+        as_ = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]])
         n = 10
 
         with Model() as model:
-            DirichletMultinomial("m", n=n, alpha=alphas, shape=vals.shape)
+            DirichletMultinomial("m", n=n, a=as_, shape=vals.shape)
 
         assert_almost_equal(
-            sum([dirichlet_multinomial_logpmf(val, n, alpha) for val, alpha in zip(vals, alphas)]),
+            sum([dirichlet_multinomial_logpmf(val, n, a) for val, a in zip(vals, as_)]),
             model.fastlogp({"m": vals}),
             decimal=4,
         )
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 2087124ed1..ba442f8dd7 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -991,13 +991,13 @@ def ref_rand(size, a):
             )
 
     def test_dirichlet_multinomial(self):
-        def ref_rand(size, alpha, n):
-            k = alpha.shape[-1]
+        def ref_rand(size, a, n):
+            k = a.shape[-1]
             out = np.empty((size, k), dtype=int)
             # debug_p = np.empty((size, k))
             # FIXME: Vectorize this?
             for i in range(size):
-                p = nr.dirichlet(alpha)
+                p = nr.dirichlet(a)
                 x = nr.multinomial(n=n, pvals=p)
                 out[i, :] = x
                 # debug_p[i, :] = p
@@ -1007,7 +1007,7 @@ def ref_rand(size, alpha, n):
         for n in [2, 3]:
             pymc3_random_discrete(
                 pm.DirichletMultinomial,
-                {"alpha": Vector(Rplus, n), "n": Nat},
+                {"a": Vector(Rplus, n), "n": Nat},
                 valuedomain=Vector(Nat, n),
                 size=1000,
                 ref_rand=ref_rand,

From d438dfcc2b4f68207b9d0654d3bfd5a664264546 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 4 Jan 2021 09:35:46 +0100
Subject: [PATCH 24/56] Run pre-commit

---
 pymc3/distributions/multivariate.py      | 19 ++++++++++---------
 pymc3/tests/test_distributions.py        |  5 ++++-
 pymc3/tests/test_distributions_random.py |  8 +++++++-
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index df10d0ba85..93a0d49f1f 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -735,11 +735,10 @@ def __init__(self, n, a, *args, **kwargs):
         p = self.a / self.a.sum(-1, keepdims=True)
 
         self.mean = self.n * p
-        mode = tt.cast(tt.round(self.mean), 'int32')
+        mode = tt.cast(tt.round(self.mean), "int32")
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
-        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
-                                diff[inc_bool_arr.nonzero()])
+        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
         self.mode = mode
 
     def logp(self, x):
@@ -750,12 +749,14 @@ def logp(self, x):
         const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
         series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
         result = const + series.sum(axis=-1, keepdims=True)
-        return bound(result,
-                     tt.all(tt.ge(x, 0)),
-                     tt.all(tt.gt(a, 0)),
-                     tt.all(tt.ge(n, 0)),
-                     tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
-                     broadcast_conditions=False)
+        return bound(
+            result,
+            tt.all(tt.ge(x, 0)),
+            tt.all(tt.gt(a, 0)),
+            tt.all(tt.ge(n, 0)),
+            tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
+            broadcast_conditions=False,
+        )
 
     def _random(self, n, a, size=None, raw_size=None):
         original_dtype = a.dtype
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index a1425f1402..057324d731 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1743,7 +1743,10 @@ def test_batch_multinomial(self):
     @pytest.mark.parametrize("n", [2, 3])
     def test_dirichlet_multinomial(self, n):
         self.pymc3_matches_scipy(
-            DirichletMultinomial, Vector(Nat, n), {"a": Vector(Rplus, n), "n": Nat}, dirichlet_multinomial_logpmf
+            DirichletMultinomial,
+            Vector(Nat, n),
+            {"a": Vector(Rplus, n), "n": Nat},
+            dirichlet_multinomial_logpmf,
         )
 
     @pytest.mark.parametrize(
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index ba442f8dd7..56a4795188 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -89,7 +89,13 @@ def pymc3_random(
 
 
 def pymc3_random_discrete(
-    dist, paramdomains, valuedomain=Domain([0]), ref_rand=None, size=100000, alpha=0.05, fails=20,
+    dist,
+    paramdomains,
+    valuedomain=Domain([0]),
+    ref_rand=None,
+    size=100000,
+    alpha=0.05,
+    fails=20,
     extra_args=None,
 ):
     model = build_model(dist, valuedomain, paramdomains, extra_args=extra_args)

From dde5c4513eaa0249968fdb1915413a875a958b70 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 4 Jan 2021 09:54:15 +0100
Subject: [PATCH 25/56] Keep standard order of methods random and logp

---
 pymc3/distributions/multivariate.py | 34 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 93a0d49f1f..adc8d010ea 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -741,23 +741,6 @@ def __init__(self, n, a, *args, **kwargs):
         mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
         self.mode = mode
 
-    def logp(self, x):
-        a = self.a
-        n = self.n
-        sum_a = a.sum(axis=-1, keepdims=True)
-
-        const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
-        series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
-        result = const + series.sum(axis=-1, keepdims=True)
-        return bound(
-            result,
-            tt.all(tt.ge(x, 0)),
-            tt.all(tt.gt(a, 0)),
-            tt.all(tt.ge(n, 0)),
-            tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
-            broadcast_conditions=False,
-        )
-
     def _random(self, n, a, size=None, raw_size=None):
         original_dtype = a.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
@@ -820,6 +803,23 @@ def random(self, point=None, size=None):
         )
         return samples
 
+    def logp(self, x):
+        a = self.a
+        n = self.n
+        sum_a = a.sum(axis=-1, keepdims=True)
+
+        const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
+        series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
+        result = const + series.sum(axis=-1, keepdims=True)
+        return bound(
+            result,
+            tt.all(tt.ge(x, 0)),
+            tt.all(tt.gt(a, 0)),
+            tt.all(tt.ge(n, 0)),
+            tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
+            broadcast_conditions=False,
+        )
+
     def _distr_parameters_for_repr(self):
         return ["n", "a"]
 

From 49b432de5bf888f0da2369fcfc3a7d8d88b02d0f Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 4 Jan 2021 11:10:38 +0100
Subject: [PATCH 26/56] Update docstrings for valid input types. Progress on
 batch test.

---
 pymc3/distributions/multivariate.py |  9 ++++----
 pymc3/tests/test_distributions.py   | 34 +++++++++++++++++------------
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index adc8d010ea..4be9b370dc 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -712,13 +712,14 @@ class DirichletMultinomial(Discrete):
 
     Parameters
     ----------
-    a : two-dimensional array
+    n : int or array
+        Total counts in each replicate. If n is an array its shape must be (N,)
+        with N = a.shape[0]
+
+    a : one- or two-dimensional array
         Dirichlet parameter.  Elements must be non-negative.
         Dimension of each element of the distribution is the length
         of the second dimension of alpha.
-    n : one-dimensional array
-        Total counts in each replicate.
-
     """
 
     def __init__(self, n, a, *args, **kwargs):
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 057324d731..1c89d21522 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1872,24 +1872,30 @@ def test_dirichlet_multinomial_vec_2d_a(self):
         )
 
     def test_batch_dirichlet_multinomial(self):
+        # Test that DM can handle a 3d array for `a`
         n = 10
+        # Create an almost deterministic DM by setting a to 0.001, everywehere
+        # except for one category / dimensions which is given the value fo 100
         vals = np.zeros((4, 5, 3), dtype="int32")
-        alpha = np.zeros_like(vals, dtype=theano.config.floatX)
+        a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001
         inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
         np.put_along_axis(vals, inds, n, axis=-1)
-        np.put_along_axis(alpha, inds, 1, axis=-1)
-
-        dist = DirichletMultinomial.dist(n=n, alpha=alpha, shape=vals.shape)
-        value = tt.tensor3(dtype="int32")
-        value.tag.test_value = np.zeros_like(vals, dtype="int32")
-        logp = tt.exp(dist.logp(value))
-        f = theano.function(inputs=[value], outputs=logp)
-        assert_almost_equal(
-            f(vals),
-            np.ones(vals.shape[:-1] + (1,)),
-            decimal=select_by_precision(float64=6, float32=3),
-        )
-
+        np.put_along_axis(a, inds, 100, axis=-1)
+
+        dist = DirichletMultinomial.dist(n=n, a=a, shape=vals.shape)
+
+        # TODO: Test logp is as expected (not as simple as the Multinomial case)
+        # value = tt.tensor3(dtype="int32")
+        # value.tag.test_value = np.zeros_like(vals, dtype="int32")
+        # logp = tt.exp(dist.logp(value))
+        # f = theano.function(inputs=[value], outputs=logp)
+        # assert_almost_equal(
+        #     f(vals),
+        #     np.ones(vals.shape[:-1] + (1,)),
+        #     decimal=select_by_precision(float64=6, float32=3),
+        # )
+
+        # Samples should be equal given the almost deterministic DM
         sample = dist.random(size=2)
         assert_allclose(sample, np.stack([vals, vals], axis=0))
 

From 83fbda6e55c37328a4e740296cac1665627c7b14 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 4 Jan 2021 11:45:25 +0100
Subject: [PATCH 27/56] Add new test to ensure DM matches BetaBinom

---
 pymc3/tests/test_distributions.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 1c89d21522..7391e30181 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1749,6 +1749,15 @@ def test_dirichlet_multinomial(self, n):
             dirichlet_multinomial_logpmf,
         )
 
+    def test_dirichlet_multinomial_matches_beta_binomial(self):
+        a, b, n = 2, 1, 5
+        ns = np.arange(n + 1)
+        ns_dm = np.vstack((ns, n - ns)).T  # covert ns=1 to ns_dm=[1, 4], for all ns...
+        bb_logp = pm.BetaBinomial.dist(n=n, alpha=a, beta=b).logp(ns).tag.test_value
+        dm_logp = pm.DirichletMultinomial.dist(n=n, a=[a, b]).logp(ns_dm).tag.test_value
+        dm_logp = dm_logp.ravel()
+        assert_allclose(bb_logp, dm_logp)
+
     @pytest.mark.parametrize(
         "a, n",
         [

From 9748a9dbfe3f6b66f3c726471f7e6e9f3eeb3941 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 4 Jan 2021 09:36:36 -0800
Subject: [PATCH 28/56] Change DM alpha -> a in docstrings.

---
 pymc3/distributions/multivariate.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 4be9b370dc..6044aaedc9 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -698,16 +698,16 @@ class DirichletMultinomial(Discrete):
 
     .. math::
 
-    f(x \mid n, \alpha) = \frac{\Gamma(n + 1)\Gamma(\sum\alpha_k)}
-                              {\Gamma(\n + \sum\alpha_k)}
+    f(x \mid n, a) = \frac{\Gamma(n + 1)\Gamma(\sum a_k)}
+                              {\Gamma(\n + \sum a_k)}
                          \prod_{k=1}^K
-                         \frac{\Gamma(x_k + \alpha_k)}
-                              {\Gamma(x_k + 1)\Gamma(alpha_k)}
+                         \frac{\Gamma(x_k +  a_k)}
+                              {\Gamma(x_k + 1)\Gamma(a_k)}
 
     ==========  ===========================================
     Support     :math:`x \in \{0, 1, \ldots, n\}` such that
                 :math:`\sum x_i = n`
-    Mean        :math:`n \frac{\alpha_i}{\sum{\alpha_k}}`
+    Mean        :math:`n \frac{a_i}{\sum{a_k}}`
     ==========  ===========================================
 
     Parameters
@@ -719,7 +719,7 @@ class DirichletMultinomial(Discrete):
     a : one- or two-dimensional array
         Dirichlet parameter.  Elements must be non-negative.
         Dimension of each element of the distribution is the length
-        of the second dimension of alpha.
+        of the second dimension of *a*.
     """
 
     def __init__(self, n, a, *args, **kwargs):
@@ -748,12 +748,12 @@ def _random(self, n, a, size=None, raw_size=None):
         a = a.astype("float64")
 
         # Thanks to the default shape handling done in generate_values, the last
-        # axis of n is a dummy axis that allows it to broadcast well with alpha
+        # axis of n is a dummy axis that allows it to broadcast well with `a`
         n = np.broadcast_to(n, size)
         a = np.broadcast_to(a, size)
         n = n[..., 0]
 
-        # np.random.multinomial needs `n` to be a scalar int and `alpha` a
+        # np.random.multinomial needs `n` to be a scalar int and `a` a
         # sequence so we semi flatten them and iterate over them
         size_ = to_tuple(raw_size)
         if a.ndim > len(size_) and a.shape[: len(size_)] == size_:

From 7b20680084258f61a43b67acca03bd1240a251e8 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 4 Jan 2021 09:43:38 -0800
Subject: [PATCH 29/56] Test two additional parameterization shapes in
 `test_dirichlet_multinomial_random`.

---
 pymc3/tests/test_distributions.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 7391e30181..c6baa11cc7 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1781,11 +1781,9 @@ def test_dirichlet_multinomial_mode(self, a, n):
         [
             [[0.25, 0.25, 0.25, 0.25], 4, 2],
             [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
-            # 3: expect to fail
-            # [[.25, .25, .25, .25], (10, 4)],
+            [[.25, .25, .25, .25], (10, 4), [2]*10],
             [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
-            # 5: expect to fail
-            # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
+            [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
             [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
             [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
             [

From 66c83b0e8da3823eda42113a792a56c1f4c8b9c3 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 4 Jan 2021 09:55:34 -0800
Subject: [PATCH 30/56] Revert debugging comments.

---
 pymc3/tests/test_distributions_random.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 56a4795188..03a15aacee 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -1000,14 +1000,10 @@ def test_dirichlet_multinomial(self):
         def ref_rand(size, a, n):
             k = a.shape[-1]
             out = np.empty((size, k), dtype=int)
-            # debug_p = np.empty((size, k))
-            # FIXME: Vectorize this?
             for i in range(size):
                 p = nr.dirichlet(a)
                 x = nr.multinomial(n=n, pvals=p)
                 out[i, :] = x
-                # debug_p[i, :] = p
-            # breakpoint()
             return out
 
         for n in [2, 3]:

From 672ef562077f8f29be1f3dfab66e6f3d51152de7 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 4 Jan 2021 09:55:52 -0800
Subject: [PATCH 31/56] Revert unrelated changes.

---
 pymc3/tests/test_distributions_random.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 03a15aacee..e34d1328ef 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -89,16 +89,9 @@ def pymc3_random(
 
 
 def pymc3_random_discrete(
-    dist,
-    paramdomains,
-    valuedomain=Domain([0]),
-    ref_rand=None,
-    size=100000,
-    alpha=0.05,
-    fails=20,
-    extra_args=None,
+    dist, paramdomains, valuedomain=Domain([0]), ref_rand=None, size=100000, alpha=0.05, fails=20
 ):
-    model = build_model(dist, valuedomain, paramdomains, extra_args=extra_args)
+    model = build_model(dist, valuedomain, paramdomains)
     domains = paramdomains.copy()
     for pt in product(domains, n_samples=100):
         pt = pm.Point(pt, model=model)

From 2d5d20e95bdbe31400d92d7cd7e8191c86eeb747 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Mon, 4 Jan 2021 10:10:03 -0800
Subject: [PATCH 32/56] Fix minor Black inconsistency.

---
 pymc3/tests/test_distributions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index c6baa11cc7..2166d9041f 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1781,9 +1781,9 @@ def test_dirichlet_multinomial_mode(self, a, n):
         [
             [[0.25, 0.25, 0.25, 0.25], 4, 2],
             [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
-            [[.25, .25, .25, .25], (10, 4), [2]*10],
+            [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10],
             [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
-            [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]],
             [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
             [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
             [

From 922515bfb21d3af415049a3da78d8749045b5d8e Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 5 Jan 2021 15:26:43 -0800
Subject: [PATCH 33/56] Drop no-longer-functional reshaping code.

---
 pymc3/distributions/multivariate.py | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 6044aaedc9..e7db3d1a40 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -756,23 +756,12 @@ def _random(self, n, a, size=None, raw_size=None):
         # np.random.multinomial needs `n` to be a scalar int and `a` a
         # sequence so we semi flatten them and iterate over them
         size_ = to_tuple(raw_size)
-        if a.ndim > len(size_) and a.shape[: len(size_)] == size_:
-            # a and n have the size_ prepend so we don't need it in np.random
-            n_ = n.reshape([-1])
-            a_ = a.reshape([-1, a.shape[-1]])
-            p_ = np.array([np.random.dirichlet(aa) for aa in a_])
-            samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
-            samples = samples.reshape(a.shape)
-        else:
-            # a and n don't have the size prepend
-            n_ = n.reshape([-1])
-            a_ = a.reshape([-1, a.shape[-1]])
-            p_ = np.array([np.random.dirichlet(aa) for aa in a_])
-            samples = np.array(
-                [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)]
-            )
-            samples = np.moveaxis(samples, 0, -1)
-            samples = samples.reshape(size + a.shape)
+        # a and n have the size_ prepend so we don't need it in np.random
+        n_ = n.reshape([-1])
+        a_ = a.reshape([-1, a.shape[-1]])
+        p_ = np.array([np.random.dirichlet(aa) for aa in a_])
+        samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
+        samples = samples.reshape(a.shape)
         # We cast back to the original dtype
         return samples.astype(original_dtype)
 

From aa89d0ad15e74d62f92176b5a48ea8ea0acb0e70 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 5 Jan 2021 15:28:02 -0800
Subject: [PATCH 34/56] Assert shape of random samples is as expected.

---
 pymc3/distributions/multivariate.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index e7db3d1a40..7fef0cb60d 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -791,6 +791,13 @@ def random(self, point=None, size=None):
             not_broadcast_kwargs={"raw_size": size},
             size=size,
         )
+
+        if size is not None:
+            expect_shape = (size, *self.shape)
+        else:
+            expect_shape = self.shape
+        assert tuple(samples.shape) == tuple(expect_shape)
+
         return samples
 
     def logp(self, x):

From 2343004efe7550936c9844a5b2f182ddf72f3997 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 5 Jan 2021 15:41:00 -0800
Subject: [PATCH 35/56] Explicitly test random sample shapes, including batch
 dimensions.

---
 pymc3/tests/test_distributions.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 2166d9041f..0ab5f114e9 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -97,6 +97,7 @@
 from pymc3.tests.helpers import SeededTest, select_by_precision
 from pymc3.theanof import floatX
 from pymc3.vartypes import continuous_types
+from pymc3.distributions.shape_utils import to_tuple
 
 
 def get_lkj_cases():
@@ -1798,7 +1799,14 @@ def test_dirichlet_multinomial_random(self, a, shape, n):
         a = np.asarray(a)
         with Model() as model:
             m = DirichletMultinomial("m", n=n, a=a, shape=shape)
-        m.random()
+        samp0 = m.random()
+        samp1 = m.random(size=1)
+        samp2 = m.random(size=2)
+
+        shape_ = to_tuple(shape)
+        assert to_tuple(samp0.shape) == shape_
+        assert to_tuple(samp1.shape) == (1, *shape_)
+        assert to_tuple(samp2.shape) == (2, *shape_)
 
     def test_dirichlet_multinomial_mode_with_shape(self):
         n = [1, 10]

From a08bc517b956f6067dbd61b80a7bc7c03046db77 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 5 Jan 2021 15:44:27 -0800
Subject: [PATCH 36/56] Sort imports.

---
 pymc3/tests/test_distributions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 0ab5f114e9..8e9864cbc7 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -92,12 +92,12 @@
     ZeroInflatedPoisson,
     continuous,
 )
+from pymc3.distributions.shape_utils import to_tuple
 from pymc3.math import kronecker, logsumexp
 from pymc3.model import Deterministic, Model, Point
 from pymc3.tests.helpers import SeededTest, select_by_precision
 from pymc3.theanof import floatX
 from pymc3.vartypes import continuous_types
-from pymc3.distributions.shape_utils import to_tuple
 
 
 def get_lkj_cases():

From 22beeadb2815931e536dbfaca4414bc168b50df3 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Jan 2021 09:25:37 +0100
Subject: [PATCH 37/56] Simplify _random

It should be okay to not explicitly change the input dtype as in the multinomial, because the input to the np.random.dirichlet should be safe (it's fine to have float32 to float64 overflow from 1.00 to 1.01..., underflow from 0.01, to 0.0 would still be problematic, but we don't know if this is an issue yet...). The output of the numpy.random.dirichlet to numpy.random.multinomial should be safe since it is already in float64 by then. We still need to convert to the previous dtype, since numpy changes it by default.

size_ argument was no longer being used.
---
 pymc3/distributions/multivariate.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 7fef0cb60d..10cd2abbf3 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -743,9 +743,8 @@ def __init__(self, n, a, *args, **kwargs):
         self.mode = mode
 
     def _random(self, n, a, size=None, raw_size=None):
+        # numpy will cast dirichlet and multinomial samples to float64 by default
         original_dtype = a.dtype
-        # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
-        a = a.astype("float64")
 
         # Thanks to the default shape handling done in generate_values, the last
         # axis of n is a dummy axis that allows it to broadcast well with `a`
@@ -755,13 +754,12 @@ def _random(self, n, a, size=None, raw_size=None):
 
         # np.random.multinomial needs `n` to be a scalar int and `a` a
         # sequence so we semi flatten them and iterate over them
-        size_ = to_tuple(raw_size)
-        # a and n have the size_ prepend so we don't need it in np.random
         n_ = n.reshape([-1])
         a_ = a.reshape([-1, a.shape[-1]])
         p_ = np.array([np.random.dirichlet(aa) for aa in a_])
         samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
         samples = samples.reshape(a.shape)
+
         # We cast back to the original dtype
         return samples.astype(original_dtype)
 

From 7bad831c6658a4955cab60789204655b9c157fee Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Jan 2021 09:26:33 +0100
Subject: [PATCH 38/56] Reorder tests more logically

---
 pymc3/tests/test_distributions.py | 62 +++++++++++++++----------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 8e9864cbc7..8388c57563 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1777,37 +1777,6 @@ def test_dirichlet_multinomial_mode(self, a, n):
             m = DirichletMultinomial("m", n, _a, _a.shape)
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
-    @pytest.mark.parametrize(
-        "a, shape, n",
-        [
-            [[0.25, 0.25, 0.25, 0.25], 4, 2],
-            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
-            [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10],
-            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
-            [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
-            [
-                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
-                (10, 2, 4),
-                [31, 37],
-            ],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
-        ],
-    )
-    def test_dirichlet_multinomial_random(self, a, shape, n):
-        a = np.asarray(a)
-        with Model() as model:
-            m = DirichletMultinomial("m", n=n, a=a, shape=shape)
-        samp0 = m.random()
-        samp1 = m.random(size=1)
-        samp2 = m.random(size=2)
-
-        shape_ = to_tuple(shape)
-        assert to_tuple(samp0.shape) == shape_
-        assert to_tuple(samp1.shape) == (1, *shape_)
-        assert to_tuple(samp2.shape) == (2, *shape_)
-
     def test_dirichlet_multinomial_mode_with_shape(self):
         n = [1, 10]
         a = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
@@ -1886,6 +1855,37 @@ def test_dirichlet_multinomial_vec_2d_a(self):
             decimal=4,
         )
 
+    @pytest.mark.parametrize(
+        "a, shape, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 4, 2],
+            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
+            [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10],
+            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
+            [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
+            [
+                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
+                (10, 2, 4),
+                [31, 37],
+            ],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
+        ],
+    )
+    def test_dirichlet_multinomial_random(self, a, shape, n):
+        a = np.asarray(a)
+        with Model() as model:
+            m = DirichletMultinomial("m", n=n, a=a, shape=shape)
+        samp0 = m.random()
+        samp1 = m.random(size=1)
+        samp2 = m.random(size=2)
+
+        shape_ = to_tuple(shape)
+        assert to_tuple(samp0.shape) == shape_
+        assert to_tuple(samp1.shape) == (1, *shape_)
+        assert to_tuple(samp2.shape) == (2, *shape_)
+
     def test_batch_dirichlet_multinomial(self):
         # Test that DM can handle a 3d array for `a`
         n = 10

From 9bbddbaf4ed0943f63afa8b33d0c23738af52fb7 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Jan 2021 09:38:54 +0100
Subject: [PATCH 39/56] Refactor tests

Merged mode tests since shape must be given explicitly anyway
Moved test_dirichlet_multinomial_random to test_distributions_random.py and renamed it to test_dirichlet_multinomial_shapes
---
 pymc3/tests/test_distributions.py        | 59 ++++--------------------
 pymc3/tests/test_distributions_random.py | 31 +++++++++++++
 2 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 8388c57563..74de118ea7 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1760,28 +1760,20 @@ def test_dirichlet_multinomial_matches_beta_binomial(self):
         assert_allclose(bb_logp, dm_logp)
 
     @pytest.mark.parametrize(
-        "a, n",
+        "a, n, shape",
         [
-            [[0.25, 0.25, 0.25, 0.25], 1],
-            [[0.3, 0.6, 0.05, 0.05], 2],
-            [[0.3, 0.6, 0.05, 0.05], 10],
+            [[0.25, 0.25, 0.25, 0.25], 1, (1, 4)],
+            [[0.3, 0.6, 0.05, 0.05], 2, (1, 4)],
+            [[0.3, 0.6, 0.05, 0.05], 10, (1, 4)],
+            [[0.25, 0.25, 0.25, 0.25], 1, (2, 4)],
+            [[0.3, 0.6, 0.05, 0.05], 2, (3, 4)],
+            [[[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]], [1, 10], (2, 4)],
         ],
     )
-    def test_dirichlet_multinomial_mode(self, a, n):
-        _a = np.array(a)
-        with Model() as model:
-            m = DirichletMultinomial("m", n, _a, _a.shape)
-        assert_allclose(m.distribution.mode.eval().sum(), n)
-        _a = np.array([a, a])
-        with Model() as model:
-            m = DirichletMultinomial("m", n, _a, _a.shape)
-        assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
-
-    def test_dirichlet_multinomial_mode_with_shape(self):
-        n = [1, 10]
-        a = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
+    def test_dirichlet_multinomial_mode(self, a, n, shape):
+        a = np.asarray(a)
         with Model() as model:
-            m = DirichletMultinomial("m", n=n, a=a, shape=(2, 4))
+            m = DirichletMultinomial("m", n=n, a=a, shape=shape)
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
     def test_dirichlet_multinomial_vec(self):
@@ -1855,37 +1847,6 @@ def test_dirichlet_multinomial_vec_2d_a(self):
             decimal=4,
         )
 
-    @pytest.mark.parametrize(
-        "a, shape, n",
-        [
-            [[0.25, 0.25, 0.25, 0.25], 4, 2],
-            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
-            [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10],
-            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
-            [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
-            [
-                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
-                (10, 2, 4),
-                [31, 37],
-            ],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
-        ],
-    )
-    def test_dirichlet_multinomial_random(self, a, shape, n):
-        a = np.asarray(a)
-        with Model() as model:
-            m = DirichletMultinomial("m", n=n, a=a, shape=shape)
-        samp0 = m.random()
-        samp1 = m.random(size=1)
-        samp2 = m.random(size=2)
-
-        shape_ = to_tuple(shape)
-        assert to_tuple(samp0.shape) == shape_
-        assert to_tuple(samp1.shape) == (1, *shape_)
-        assert to_tuple(samp2.shape) == (2, *shape_)
-
     def test_batch_dirichlet_multinomial(self):
         # Test that DM can handle a 3d array for `a`
         n = 10
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index e34d1328ef..e08328a416 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -1008,6 +1008,37 @@ def ref_rand(size, a, n):
                 ref_rand=ref_rand,
             )
 
+    @pytest.mark.parametrize(
+        "a, shape, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 4, 2],
+            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
+            [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10],
+            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
+            [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
+            [
+                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
+                (10, 2, 4),
+                [31, 37],
+            ],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
+        ],
+    )
+    def test_dirichlet_multinomial_shapes(self, a, shape, n):
+        a = np.asarray(a)
+        with pm.Model() as model:
+            m = pm.DirichletMultinomial("m", n=n, a=a, shape=shape)
+        samp0 = m.random()
+        samp1 = m.random(size=1)
+        samp2 = m.random(size=2)
+
+        shape_ = to_tuple(shape)
+        assert to_tuple(samp0.shape) == shape_
+        assert to_tuple(samp1.shape) == (1, *shape_)
+        assert to_tuple(samp2.shape) == (2, *shape_)
+
     def test_multinomial(self):
         def ref_rand(size, p, n):
             return nr.multinomial(pvals=p, n=n, size=size)

From 086459f9d706222c989daa388b79e963cc7d74f4 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Jan 2021 10:00:34 +0100
Subject: [PATCH 40/56] Require shape argument

Also allow more forgiveness if user passes lists instead of arrays (WIP/suggestion only)
---
 pymc3/distributions/multivariate.py | 15 ++++++++++++---
 pymc3/tests/test_distributions.py   |  4 +++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 10cd2abbf3..462e107e3f 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -720,14 +720,23 @@ class DirichletMultinomial(Discrete):
         Dirichlet parameter.  Elements must be non-negative.
         Dimension of each element of the distribution is the length
         of the second dimension of *a*.
+
+    shape : numerical tuple
+        Describes shape of distribution. For example if n=array([5, 10]), and
+        p=array([1, 1, 1]), shape should be (2, 3).
     """
 
-    def __init__(self, n, a, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, n, a, shape, *args, **kwargs):
+        super().__init__(shape, *args, **kwargs)
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
-            self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
+            # Be forgiving if users pass a list instead of np.array
+            # TODO: Find more elegant apprach or simply remove forgiveness
+            try:
+                self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
+            except AttributeError:
+                self.a = tt.as_tensor_variable(a) if np.asarray(a).ndim > 1 else tt.shape_padleft(a)
         else:
             # n is a scalar, p is a 1d array
             self.n = tt.as_tensor_variable(n)
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 74de118ea7..661af092a5 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1755,7 +1755,9 @@ def test_dirichlet_multinomial_matches_beta_binomial(self):
         ns = np.arange(n + 1)
         ns_dm = np.vstack((ns, n - ns)).T  # covert ns=1 to ns_dm=[1, 4], for all ns...
         bb_logp = pm.BetaBinomial.dist(n=n, alpha=a, beta=b).logp(ns).tag.test_value
-        dm_logp = pm.DirichletMultinomial.dist(n=n, a=[a, b]).logp(ns_dm).tag.test_value
+        dm_logp = (
+            pm.DirichletMultinomial.dist(n=n, a=[a, b], shape=(1, 2)).logp(ns_dm).tag.test_value
+        )
         dm_logp = dm_logp.ravel()
         assert_allclose(bb_logp, dm_logp)
 

From f8499d38b5efc0be276e1215a6a6fff6465450b1 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Jan 2021 10:06:23 +0100
Subject: [PATCH 41/56] Remove unused import `to_tuple`

---
 pymc3/tests/test_distributions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 661af092a5..2f89da81cf 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -92,7 +92,6 @@
     ZeroInflatedPoisson,
     continuous,
 )
-from pymc3.distributions.shape_utils import to_tuple
 from pymc3.math import kronecker, logsumexp
 from pymc3.model import Deterministic, Model, Point
 from pymc3.tests.helpers import SeededTest, select_by_precision

From 1cd2a9f86efcd581b01dfc77928e2d17b283cb1b Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Jan 2021 10:34:56 +0100
Subject: [PATCH 42/56] Simplify logic to handle list as input for `a`

---
 pymc3/distributions/multivariate.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 462e107e3f..14ae9dbb0b 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -731,12 +731,7 @@ def __init__(self, n, a, shape, *args, **kwargs):
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
-            # Be forgiving if users pass a list instead of np.array
-            # TODO: Find more elegant apprach or simply remove forgiveness
-            try:
-                self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
-            except AttributeError:
-                self.a = tt.as_tensor_variable(a) if np.asarray(a).ndim > 1 else tt.shape_padleft(a)
+            self.a = tt.as_tensor_variable(a) if np.ndim(a) > 1 else tt.shape_padleft(a)
         else:
             # n is a scalar, p is a 1d array
             self.n = tt.as_tensor_variable(n)

From ef00fe193ab84ad3a7ceb28c5c621830cfb1d728 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Sun, 10 Jan 2021 10:41:04 +0100
Subject: [PATCH 43/56] Raise ShapeError in random()

---
 pymc3/distributions/multivariate.py      | 19 +++++++++++--------
 pymc3/tests/test_distributions_random.py | 17 +++++++++++++++++
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 14ae9dbb0b..250e54a684 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -42,6 +42,7 @@
 )
 from pymc3.distributions.shape_utils import broadcast_dist_samples_to, to_tuple
 from pymc3.distributions.special import gammaln, multigammaln
+from pymc3.exceptions import ShapeError
 from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker
 from pymc3.model import Deterministic
 from pymc3.theanof import floatX
@@ -717,9 +718,8 @@ class DirichletMultinomial(Discrete):
         with N = a.shape[0]
 
     a : one- or two-dimensional array
-        Dirichlet parameter.  Elements must be non-negative.
-        Dimension of each element of the distribution is the length
-        of the second dimension of *a*.
+        Dirichlet parameter. Elements must be non-negative.
+        The number of categories is given by the length of the last axis.
 
     shape : numerical tuple
         Describes shape of distribution. For example if n=array([5, 10]), and
@@ -794,11 +794,14 @@ def random(self, point=None, size=None):
             size=size,
         )
 
-        if size is not None:
-            expect_shape = (size, *self.shape)
-        else:
-            expect_shape = self.shape
-        assert tuple(samples.shape) == tuple(expect_shape)
+        # If distribution is initialized with .dist(), valid init shape is not asserted.
+        # Under normal use in a model context valid init shape is asserted at start.
+        expected_shape = tuple(self.shape) if size is None else (size, *self.shape)
+        sample_shape = tuple(samples.shape)
+        if sample_shape != expected_shape:
+            raise ShapeError(
+                f"Expected sample shape was {expected_shape} but got {sample_shape}. This may reflect an invalid initialization shape."
+            )
 
         return samples
 
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index e08328a416..9b4c235a97 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -15,6 +15,8 @@
 import itertools
 import sys
 
+from contextlib import ExitStack as does_not_raise
+
 import numpy as np
 import numpy.random as nr
 import numpy.testing as npt
@@ -34,6 +36,7 @@
     draw_values,
     to_tuple,
 )
+from pymc3.exceptions import ShapeError
 from pymc3.tests.helpers import SeededTest
 from pymc3.tests.test_distributions import (
     Domain,
@@ -1039,6 +1042,20 @@ def test_dirichlet_multinomial_shapes(self, a, shape, n):
         assert to_tuple(samp1.shape) == (1, *shape_)
         assert to_tuple(samp2.shape) == (2, *shape_)
 
+    @pytest.mark.parametrize(
+        "n, a, shape, expectation",
+        [
+            ([5], [[1000, 1, 1], [1, 1, 1000]], (2, 3), does_not_raise()),
+            ([5, 3], [[1000, 1, 1], [1, 1, 1000]], (2, 3), does_not_raise()),
+            ([[5]], [[1000, 1, 1], [1, 1, 1000]], (2, 3), pytest.raises(ShapeError)),
+            ([[5], [3]], [[1000, 1, 1], [1, 1, 1000]], (2, 3), pytest.raises(ShapeError)),
+        ],
+    )
+    def test_dirichlet_multinomial_dist_shapes_raise(self, n, a, shape, expectation):
+        m = pm.DirichletMultinomial.dist(n=n, a=a, shape=shape)
+        with expectation:
+            m.random()
+
     def test_multinomial(self):
         def ref_rand(size, p, n):
             return nr.multinomial(pvals=p, n=n, size=size)

From f2ac8e9f0ef36686643a8c121e855fd3db0de967 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Sun, 10 Jan 2021 10:41:26 +0100
Subject: [PATCH 44/56] Finish batch and repr unittests

---
 pymc3/tests/test_distributions.py | 32 ++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 2f89da81cf..4a2d0eb780 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1850,27 +1850,26 @@ def test_dirichlet_multinomial_vec_2d_a(self):
 
     def test_batch_dirichlet_multinomial(self):
         # Test that DM can handle a 3d array for `a`
-        n = 10
+
         # Create an almost deterministic DM by setting a to 0.001, everywehere
-        # except for one category / dimensions which is given the value fo 100
+        # except for one category / dimensions which is given the value of 1000
+        n = 5
         vals = np.zeros((4, 5, 3), dtype="int32")
         a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001
         inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
         np.put_along_axis(vals, inds, n, axis=-1)
-        np.put_along_axis(a, inds, 100, axis=-1)
+        np.put_along_axis(a, inds, 1000, axis=-1)
 
         dist = DirichletMultinomial.dist(n=n, a=a, shape=vals.shape)
 
-        # TODO: Test logp is as expected (not as simple as the Multinomial case)
-        # value = tt.tensor3(dtype="int32")
-        # value.tag.test_value = np.zeros_like(vals, dtype="int32")
-        # logp = tt.exp(dist.logp(value))
-        # f = theano.function(inputs=[value], outputs=logp)
-        # assert_almost_equal(
-        #     f(vals),
-        #     np.ones(vals.shape[:-1] + (1,)),
-        #     decimal=select_by_precision(float64=6, float32=3),
-        # )
+        # Logp should be approx -9.924431e-06
+        dist_logp = dist.logp(vals).tag.test_value
+        expected_logp = np.full(shape=vals.shape[:-1] + (1,), fill_value=-9.924431e-06)
+        assert_almost_equal(
+            dist_logp,
+            expected_logp,
+            decimal=select_by_precision(float64=6, float32=3),
+        )
 
         # Samples should be equal given the almost deterministic DM
         sample = dist.random(size=2)
@@ -2218,6 +2217,9 @@ def setup_class(self):
                 shape=(n, n),
             )
 
+            # DirichletMultinomial
+            dm = DirichletMultinomial("dm", n=5, a=[1, 1, 1], shape=(2, 3))
+
             # Likelihood (sampling distribution) of observations
             Y_obs = Normal("Y_obs", mu=mu, sigma=sigma, observed=Y)
 
@@ -2235,6 +2237,7 @@ def setup_class(self):
                 r"$\text{bound_var} \sim \text{Bound}$ -- \text{Normal}$",
                 r"$\text{kron_normal} \sim \text{KroneckerNormal}$",
                 r"$\text{mat_normal} \sim \text{MatrixNormal}$",
+                r"$\text{dm} \sim \text{DirichletMultinomial}$",
             ),
             "plain": (
                 r"alpha ~ Normal",
@@ -2248,6 +2251,7 @@ def setup_class(self):
                 r"bound_var ~ Bound-Normal",
                 r"kron_normal ~ KroneckerNormal",
                 r"mat_normal ~ MatrixNormal",
+                r"dm ~ DirichletMultinomial",
             ),
             "latex_with_params": (
                 r"$\text{alpha} \sim \text{Normal}(\mathit{mu}=0.0,~\mathit{sigma}=10.0)$",
@@ -2261,6 +2265,7 @@ def setup_class(self):
                 r"$\text{bound_var} \sim \text{Bound}(\mathit{lower}=1.0,~\mathit{upper}=\text{None})$ -- \text{Normal}(\mathit{mu}=0.0,~\mathit{sigma}=10.0)$",
                 r"$\text{kron_normal} \sim \text{KroneckerNormal}(\mathit{mu}=array)$",
                 r"$\text{mat_normal} \sim \text{MatrixNormal}(\mathit{mu}=array,~\mathit{rowcov}=array,~\mathit{colchol_cov}=array)$",
+                r"$\text{dm} \sim \text{DirichletMultinomial}(\mathit{n}=5,~\mathit{a}=array)$",
             ),
             "plain_with_params": (
                 r"alpha ~ Normal(mu=0.0, sigma=10.0)",
@@ -2274,6 +2279,7 @@ def setup_class(self):
                 r"bound_var ~ Bound(lower=1.0, upper=None)-Normal(mu=0.0, sigma=10.0)",
                 r"kron_normal ~ KroneckerNormal(mu=array)",
                 r"mat_normal ~ MatrixNormal(mu=array, rowcov=array, colchol_cov=array)",
+                r"dm∼DirichletMultinomial(n=5, a=array)",
             ),
         }
 

From f5dcdc37c38df79b0ec8a045341eea6d9d6dd029 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Sun, 10 Jan 2021 10:44:36 +0100
Subject: [PATCH 45/56] Add note about mode

---
 pymc3/distributions/multivariate.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 250e54a684..f178c5114a 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -596,6 +596,8 @@ def __init__(self, n, p, *args, **kwargs):
             self.p = tt.as_tensor_variable(p)
 
         self.mean = self.n * self.p
+        # Mode is only an approximation. Exact computation requires a complex
+        # iterative algorithm as described in https://doi.org/10.1016/j.spl.2009.09.013
         mode = tt.cast(tt.round(self.mean), "int32")
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
@@ -740,6 +742,8 @@ def __init__(self, n, a, shape, *args, **kwargs):
         p = self.a / self.a.sum(-1, keepdims=True)
 
         self.mean = self.n * p
+        # Mode is only an approximation. Exact computation requires a complex
+        # iterative algorithm as described in https://doi.org/10.1016/j.spl.2009.09.013
         mode = tt.cast(tt.round(self.mean), "int32")
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0

From c4e017a6d85a422404df004419cfd1adad8ac15b Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Sun, 10 Jan 2021 10:48:21 +0100
Subject: [PATCH 46/56] Tiny rewording

---
 pymc3/tests/test_distributions.py        | 2 +-
 pymc3/tests/test_distributions_random.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 4a2d0eb780..5ad91e01bc 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1852,7 +1852,7 @@ def test_batch_dirichlet_multinomial(self):
         # Test that DM can handle a 3d array for `a`
 
         # Create an almost deterministic DM by setting a to 0.001, everywehere
-        # except for one category / dimensions which is given the value of 1000
+        # except for one category / dimension which is given the value of 1000
         n = 5
         vals = np.zeros((4, 5, 3), dtype="int32")
         a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 9b4c235a97..f83df4eb2d 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -1029,7 +1029,7 @@ def ref_rand(size, a, n):
             [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
         ],
     )
-    def test_dirichlet_multinomial_shapes(self, a, shape, n):
+    def test_dirichlet_multinomial_shape(self, a, shape, n):
         a = np.asarray(a)
         with pm.Model() as model:
             m = pm.DirichletMultinomial("m", n=n, a=a, shape=shape)
@@ -1051,7 +1051,7 @@ def test_dirichlet_multinomial_shapes(self, a, shape, n):
             ([[5], [3]], [[1000, 1, 1], [1, 1, 1000]], (2, 3), pytest.raises(ShapeError)),
         ],
     )
-    def test_dirichlet_multinomial_dist_shapes_raise(self, n, a, shape, expectation):
+    def test_dirichlet_multinomial_dist_shape_raise(self, n, a, shape, expectation):
         m = pm.DirichletMultinomial.dist(n=n, a=a, shape=shape)
         with expectation:
             m.random()

From d46dd50577f970bcfab5eafe76ff1b1e96bdff0e Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 12 Jan 2021 09:11:58 +0100
Subject: [PATCH 47/56] Change mode to _defaultval

---
 pymc3/distributions/multivariate.py | 7 ++++---
 pymc3/tests/test_distributions.py   | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index f178c5114a..07547142ce 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -723,13 +723,14 @@ class DirichletMultinomial(Discrete):
         Dirichlet parameter. Elements must be non-negative.
         The number of categories is given by the length of the last axis.
 
-    shape : numerical tuple
+    shape : integer tuple
         Describes shape of distribution. For example if n=array([5, 10]), and
         p=array([1, 1, 1]), shape should be (2, 3).
     """
 
     def __init__(self, n, a, shape, *args, **kwargs):
-        super().__init__(shape, *args, **kwargs)
+
+        super().__init__(shape=shape, defaults=("_defaultval",), *args, **kwargs)
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
@@ -748,7 +749,7 @@ def __init__(self, n, a, shape, *args, **kwargs):
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
         mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
-        self.mode = mode
+        self._defaultval = mode
 
     def _random(self, n, a, size=None, raw_size=None):
         # numpy will cast dirichlet and multinomial samples to float64 by default
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 5ad91e01bc..99d99975e8 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1771,11 +1771,11 @@ def test_dirichlet_multinomial_matches_beta_binomial(self):
             [[[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]], [1, 10], (2, 4)],
         ],
     )
-    def test_dirichlet_multinomial_mode(self, a, n, shape):
+    def test_dirichlet_multinomial_defaultval(self, a, n, shape):
         a = np.asarray(a)
         with Model() as model:
             m = DirichletMultinomial("m", n=n, a=a, shape=shape)
-        assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
+        assert_allclose(m.distribution._defaultval.eval().sum(axis=-1), n)
 
     def test_dirichlet_multinomial_vec(self):
         vals = np.array([[2, 4, 4], [3, 3, 4]])

From 3ab518dc0486a9193890410af11ade55d5f917bb Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 12 Jan 2021 09:12:24 +0100
Subject: [PATCH 48/56] Revert comment for Multinomial mode

---
 pymc3/distributions/multivariate.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 07547142ce..dac46ae588 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -596,8 +596,6 @@ def __init__(self, n, p, *args, **kwargs):
             self.p = tt.as_tensor_variable(p)
 
         self.mean = self.n * self.p
-        # Mode is only an approximation. Exact computation requires a complex
-        # iterative algorithm as described in https://doi.org/10.1016/j.spl.2009.09.013
         mode = tt.cast(tt.round(self.mean), "int32")
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0

From cdd6d27c19d2000236945906d9fc883c6f5e07f8 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 12 Jan 2021 13:39:07 +0100
Subject: [PATCH 49/56] Update shape check logic

---
 pymc3/distributions/multivariate.py      | 2 +-
 pymc3/tests/test_distributions_random.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index dac46ae588..99cae61b6b 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -799,7 +799,7 @@ def random(self, point=None, size=None):
 
         # If distribution is initialized with .dist(), valid init shape is not asserted.
         # Under normal use in a model context valid init shape is asserted at start.
-        expected_shape = tuple(self.shape) if size is None else (size, *self.shape)
+        expected_shape = to_tuple(size) + to_tuple(self.shape)
         sample_shape = tuple(samples.shape)
         if sample_shape != expected_shape:
             raise ShapeError(
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index f83df4eb2d..b55c8eda4f 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -1051,7 +1051,7 @@ def test_dirichlet_multinomial_shape(self, a, shape, n):
             ([[5], [3]], [[1000, 1, 1], [1, 1, 1000]], (2, 3), pytest.raises(ShapeError)),
         ],
     )
-    def test_dirichlet_multinomial_dist_shape_raise(self, n, a, shape, expectation):
+    def test_dirichlet_multinomial_dist_ShapeError(self, n, a, shape, expectation):
         m = pm.DirichletMultinomial.dist(n=n, a=a, shape=shape)
         with expectation:
             m.random()

From 24447a4c88c4cd1f8c2b823090012a2b024562b9 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Tue, 12 Jan 2021 10:03:07 -0800
Subject: [PATCH 50/56] Add DM to release notes.

---
 RELEASE-NOTES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index a313d6cc33..f6fd67d4fe 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -21,6 +21,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
 - `plot_posterior_predictive_glm` now works with `arviz.InferenceData` as well (see [#4234](https://github.com/pymc-devs/pymc3/pull/4234))
 - Add `logcdf` method to all univariate discrete distributions (see [#4387](https://github.com/pymc-devs/pymc3/pull/4387)).
 - Add `random` method to `MvGaussianRandomWalk` (see [#4388](https://github.com/pymc-devs/pymc3/pull/4388))
+- `DirichletMultinomial` distribution added (see [#4373](https://github.com/pymc-devs/pymc3/pull/4373)).
 
 ### Maintenance
 - Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318)

From 0bd6c3d81f636eec16ca5479aff17ef94e6ac9dc Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Thu, 14 Jan 2021 06:12:54 -0800
Subject: [PATCH 51/56] Minor docstring revisions as suggested by @AlexAndorra.

---
 pymc3/distributions/multivariate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 7dae5143d3..bd461e3205 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -695,7 +695,7 @@ def logp(self, x):
 class DirichletMultinomial(Discrete):
     R"""Dirichlet Multinomial log-likelihood.
 
-    Dirichlet mixture of multinomials distribution, with a marginalized PMF.
+    Dirichlet mixture of Multinomials distribution, with a marginalized PMF.
 
     .. math::
 
@@ -718,7 +718,7 @@ class DirichletMultinomial(Discrete):
         with N = a.shape[0]
 
     a : one- or two-dimensional array
-        Dirichlet parameter. Elements must be non-negative.
+        Dirichlet parameter. Elements are strictly positive.
         The number of categories is given by the length of the last axis.
 
     shape : integer tuple

From f919456502df3bd89e95d800b70b80e3e1f2a95f Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Thu, 14 Jan 2021 06:15:21 -0800
Subject: [PATCH 52/56] Revise the revision.

---
 pymc3/distributions/multivariate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index bd461e3205..ff493eb08a 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -718,7 +718,7 @@ class DirichletMultinomial(Discrete):
         with N = a.shape[0]
 
     a : one- or two-dimensional array
-        Dirichlet parameter. Elements are strictly positive.
+        Dirichlet parameter. Elements must be strictly positive.
         The number of categories is given by the length of the last axis.
 
     shape : integer tuple

From c082f0061a4f191c96770524975d5b5137e9666d Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Thu, 14 Jan 2021 15:24:25 -0800
Subject: [PATCH 53/56] Add comment clarifying bounds checking in logp()

---
 pymc3/distributions/multivariate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index ff493eb08a..980d5918e2 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -816,6 +816,8 @@ def logp(self, x):
         const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
         series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
         result = const + series.sum(axis=-1, keepdims=True)
+        # Bounds checking to confirm parameters and data meet all constraints
+        # and that each observation x_i sums to n_i.
         return bound(
             result,
             tt.all(tt.ge(x, 0)),

From ea0ae59ba42ed673bcc4f64ca8b9ea8a5c3e052a Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 15 Jan 2021 11:47:02 +0100
Subject: [PATCH 54/56] Address review suggestions

---
 pymc3/distributions/multivariate.py | 37 ++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 980d5918e2..e5c73179d9 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -45,7 +45,7 @@
 from pymc3.exceptions import ShapeError
 from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker
 from pymc3.model import Deterministic
-from pymc3.theanof import floatX
+from pymc3.theanof import floatX, intX
 
 __all__ = [
     "MvNormal",
@@ -723,16 +723,18 @@ class DirichletMultinomial(Discrete):
 
     shape : integer tuple
         Describes shape of distribution. For example if n=array([5, 10]), and
-        p=array([1, 1, 1]), shape should be (2, 3).
+        a=array([1, 1, 1]), shape should be (2, 3).
     """
 
     def __init__(self, n, a, shape, *args, **kwargs):
 
         super().__init__(shape=shape, defaults=("_defaultval",), *args, **kwargs)
 
+        n = intX(n)
+        a = floatX(a)
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
-            self.a = tt.as_tensor_variable(a) if np.ndim(a) > 1 else tt.shape_padleft(a)
+            self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
         else:
             # n is a scalar, p is a 1d array
             self.n = tt.as_tensor_variable(n)
@@ -749,7 +751,7 @@ def __init__(self, n, a, shape, *args, **kwargs):
         mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
         self._defaultval = mode
 
-    def _random(self, n, a, size=None, raw_size=None):
+    def _random(self, n, a, size=None):
         # numpy will cast dirichlet and multinomial samples to float64 by default
         original_dtype = a.dtype
 
@@ -793,7 +795,6 @@ def random(self, point=None, size=None):
             n,
             a,
             dist_shape=self.shape,
-            not_broadcast_kwargs={"raw_size": size},
             size=size,
         )
 
@@ -803,27 +804,41 @@ def random(self, point=None, size=None):
         sample_shape = tuple(samples.shape)
         if sample_shape != expected_shape:
             raise ShapeError(
-                f"Expected sample shape was {expected_shape} but got {sample_shape}. This may reflect an invalid initialization shape."
+                f"Expected sample shape was {expected_shape} but got {sample_shape}. "
+                "This may reflect an invalid initialization shape."
             )
 
         return samples
 
-    def logp(self, x):
+    def logp(self, value):
+        """
+        Calculate log-probability of DirichletMultinomial distribution
+        at specified value.
+
+        Parameters
+        ----------
+        value: integer array
+            Value for which log-probability is calculated.
+
+        Returns
+        -------
+        TensorVariable
+        """
         a = self.a
         n = self.n
         sum_a = a.sum(axis=-1, keepdims=True)
 
         const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
-        series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
+        series = gammaln(value + a) - (gammaln(value + 1) + gammaln(a))
         result = const + series.sum(axis=-1, keepdims=True)
         # Bounds checking to confirm parameters and data meet all constraints
-        # and that each observation x_i sums to n_i.
+        # and that each observation value_i sums to n_i.
         return bound(
             result,
-            tt.all(tt.ge(x, 0)),
+            tt.all(tt.ge(value, 0)),
             tt.all(tt.gt(a, 0)),
             tt.all(tt.ge(n, 0)),
-            tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
+            tt.all(tt.eq(value.sum(axis=-1, keepdims=True), n)),
             broadcast_conditions=False,
         )
 

From b451967f3a0620894ab772b1cc8a6fea180430df Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 15 Jan 2021 12:41:01 +0100
Subject: [PATCH 55/56] Update `matches_beta_binomial` to take into
 consideration float precision

---
 pymc3/tests/test_distributions.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 896395cc15..e15996e7ab 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1782,7 +1782,11 @@ def test_dirichlet_multinomial_matches_beta_binomial(self):
             pm.DirichletMultinomial.dist(n=n, a=[a, b], shape=(1, 2)).logp(ns_dm).tag.test_value
         )
         dm_logp = dm_logp.ravel()
-        assert_allclose(bb_logp, dm_logp)
+        assert_almost_equal(
+            dm_logp,
+            bb_logp,
+            decimal=select_by_precision(float64=6, float32=3),
+        )
 
     @pytest.mark.parametrize(
         "a, n, shape",

From 128d5cfbbdfd4c8cc029e907339b057d9f0c3e45 Mon Sep 17 00:00:00 2001
From: Byron Smith <me@byronjsmith.com>
Date: Fri, 15 Jan 2021 22:54:05 -0800
Subject: [PATCH 56/56] Add DM to multivariate distributions docs.

---
 docs/source/api/distributions/multivariate.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/api/distributions/multivariate.rst b/docs/source/api/distributions/multivariate.rst
index 0e91f1b499..54122ae1db 100644
--- a/docs/source/api/distributions/multivariate.rst
+++ b/docs/source/api/distributions/multivariate.rst
@@ -14,6 +14,7 @@ Multivariate
    LKJCorr
    Multinomial
    Dirichlet
+   DirichletMultinomial
 
 .. automodule:: pymc3.distributions.multivariate
    :members: