From d9b67bb300c03fd96d7e9fb42e6c7a239fc77e15 Mon Sep 17 00:00:00 2001
From: Ali Akbar Septiandri <mail.septiandri@gmail.com>
Date: Sun, 27 Sep 2020 12:09:19 +0100
Subject: [PATCH 1/7] Run black on distributions/ and */__init__.py

---
 pymc3/distributions/__init__.py     | 151 ++++----
 pymc3/distributions/bound.py        |  41 +--
 pymc3/distributions/continuous.py   |  12 +-
 pymc3/distributions/discrete.py     | 251 ++++++-------
 pymc3/distributions/multivariate.py | 543 +++++++++++++++-------------
 pymc3/distributions/shape_utils.py  |  12 +-
 pymc3/distributions/simulator.py    |   1 -
 pymc3/distributions/special.py      |  38 +-
 pymc3/distributions/timeseries.py   |  44 +--
 pymc3/distributions/transforms.py   |   7 +-
 pymc3/plots/__init__.py             |  86 +++--
 pymc3/stats/__init__.py             |   1 +
 pymc3/variational/__init__.py       |  27 +-
 13 files changed, 586 insertions(+), 628 deletions(-)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index bfecad95ef..d396d61dd6 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -100,78 +100,79 @@
 
 from .bound import Bound
 
-__all__ = ['Uniform',
-           'Flat',
-           'HalfFlat',
-           'TruncatedNormal',
-           'Normal',
-           'Beta',
-           'Kumaraswamy',
-           'Exponential',
-           'Laplace',
-           'StudentT',
-           'Cauchy',
-           'HalfCauchy',
-           'Gamma',
-           'Weibull',
-           'Bound',
-           'Lognormal',
-           'HalfStudentT',
-           'ChiSquared',
-           'HalfNormal',
-           'Wald',
-           'Pareto',
-           'InverseGamma',
-           'ExGaussian',
-           'VonMises',
-           'Binomial',
-           'BetaBinomial',
-           'Bernoulli',
-           'Poisson',
-           'NegativeBinomial',
-           'ConstantDist',
-           'Constant',
-           'ZeroInflatedPoisson',
-           'ZeroInflatedNegativeBinomial',
-           'ZeroInflatedBinomial',
-           'DiscreteUniform',
-           'Geometric',
-           'Categorical',
-           'OrderedLogistic',
-           'DensityDist',
-           'Distribution',
-           'Continuous',
-           'Discrete',
-           'NoDistribution',
-           'TensorType',
-           'MvNormal',
-           'MatrixNormal',
-           'KroneckerNormal',
-           'MvStudentT',
-           'Dirichlet',
-           'Multinomial',
-           'Wishart',
-           'WishartBartlett',
-           'LKJCholeskyCov',
-           'LKJCorr',
-           'AR1',
-           'AR',
-           'GaussianRandomWalk',
-           'MvGaussianRandomWalk',
-           'MvStudentTRandomWalk',
-           'GARCH11',
-           'SkewNormal',
-           'Mixture',
-           'NormalMixture',
-           'Triangular',
-           'DiscreteWeibull',
-           'Gumbel',
-           'Logistic',
-           'LogitNormal',
-           'Interpolated',
-           'Bound',
-           'Rice',
-           'Moyal',
-           'Simulator',
-           'fast_sample_posterior_predictive'
-           ]
+__all__ = [
+    "Uniform",
+    "Flat",
+    "HalfFlat",
+    "TruncatedNormal",
+    "Normal",
+    "Beta",
+    "Kumaraswamy",
+    "Exponential",
+    "Laplace",
+    "StudentT",
+    "Cauchy",
+    "HalfCauchy",
+    "Gamma",
+    "Weibull",
+    "Bound",
+    "Lognormal",
+    "HalfStudentT",
+    "ChiSquared",
+    "HalfNormal",
+    "Wald",
+    "Pareto",
+    "InverseGamma",
+    "ExGaussian",
+    "VonMises",
+    "Binomial",
+    "BetaBinomial",
+    "Bernoulli",
+    "Poisson",
+    "NegativeBinomial",
+    "ConstantDist",
+    "Constant",
+    "ZeroInflatedPoisson",
+    "ZeroInflatedNegativeBinomial",
+    "ZeroInflatedBinomial",
+    "DiscreteUniform",
+    "Geometric",
+    "Categorical",
+    "OrderedLogistic",
+    "DensityDist",
+    "Distribution",
+    "Continuous",
+    "Discrete",
+    "NoDistribution",
+    "TensorType",
+    "MvNormal",
+    "MatrixNormal",
+    "KroneckerNormal",
+    "MvStudentT",
+    "Dirichlet",
+    "Multinomial",
+    "Wishart",
+    "WishartBartlett",
+    "LKJCholeskyCov",
+    "LKJCorr",
+    "AR1",
+    "AR",
+    "GaussianRandomWalk",
+    "MvGaussianRandomWalk",
+    "MvStudentTRandomWalk",
+    "GARCH11",
+    "SkewNormal",
+    "Mixture",
+    "NormalMixture",
+    "Triangular",
+    "DiscreteWeibull",
+    "Gumbel",
+    "Logistic",
+    "LogitNormal",
+    "Interpolated",
+    "Bound",
+    "Rice",
+    "Moyal",
+    "Simulator",
+    "fast_sample_posterior_predictive",
+]
diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index 573b578957..b52dc905b1 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -82,16 +82,13 @@ def _random(self, lower, upper, point=None, size=None):
         upper = np.asarray(upper)
         if lower.size > 1 or upper.size > 1:
             raise ValueError(
-                "Drawing samples from distributions with "
-                "array-valued bounds is not supported."
+                "Drawing samples from distributions with " "array-valued bounds is not supported."
             )
         total_size = np.prod(size).astype(np.int)
         samples = []
         s = 0
         while s < total_size:
-            sample = np.atleast_1d(
-                self._wrapped.random(point=point, size=total_size)
-            ).flatten()
+            sample = np.atleast_1d(self._wrapped.random(point=point, size=total_size)).flatten()
 
             select = sample[np.logical_and(sample >= lower, sample <= upper)]
             samples.append(select)
@@ -128,7 +125,7 @@ def random(self, point=None, size=None):
                 upper,
                 dist_shape=self.shape,
                 size=size,
-                not_broadcast_kwargs={'point': point},
+                not_broadcast_kwargs={"point": point},
             )
         elif self.lower is not None:
             lower = draw_values([self.lower], point=point, size=size)
@@ -138,7 +135,7 @@ def random(self, point=None, size=None):
                 np.inf,
                 dist_shape=self.shape,
                 size=size,
-                not_broadcast_kwargs={'point': point},
+                not_broadcast_kwargs={"point": point},
             )
         else:
             upper = draw_values([self.upper], point=point, size=size)
@@ -148,7 +145,7 @@ def random(self, point=None, size=None):
                 upper,
                 dist_shape=self.shape,
                 size=size,
-                not_broadcast_kwargs={'point': point},
+                not_broadcast_kwargs={"point": point},
             )
 
 
@@ -168,9 +165,7 @@ def __init__(self, distribution, lower, upper, transform="infer", *args, **kwarg
         if lower is not None:
             default = lower + 1
 
-        super().__init__(
-            distribution, lower, upper, default, *args, transform=transform, **kwargs
-        )
+        super().__init__(distribution, lower, upper, default, *args, transform=transform, **kwargs)
 
 
 class _ContinuousBounded(_Bounded, Continuous):
@@ -215,9 +210,7 @@ def __init__(self, distribution, lower, upper, transform="infer", *args, **kwarg
         else:
             default = None
 
-        super().__init__(
-            distribution, lower, upper, default, *args, transform=transform, **kwargs
-        )
+        super().__init__(distribution, lower, upper, default, *args, transform=transform, **kwargs)
 
 
 class Bound:
@@ -283,23 +276,11 @@ def __call__(self, name, *args, **kwargs):
         transform = kwargs.pop("transform", "infer")
         if issubclass(self.distribution, Continuous):
             return _ContinuousBounded(
-                name,
-                self.distribution,
-                self.lower,
-                self.upper,
-                transform,
-                *args,
-                **kwargs
+                name, self.distribution, self.lower, self.upper, transform, *args, **kwargs
             )
         elif issubclass(self.distribution, Discrete):
             return _DiscreteBounded(
-                name,
-                self.distribution,
-                self.lower,
-                self.upper,
-                transform,
-                *args,
-                **kwargs
+                name, self.distribution, self.lower, self.upper, transform, *args, **kwargs
             )
         else:
             raise ValueError("Distribution is neither continuous nor discrete.")
@@ -311,8 +292,6 @@ def dist(self, *args, **kwargs):
             )
 
         elif issubclass(self.distribution, Discrete):
-            return _DiscreteBounded.dist(
-                self.distribution, self.lower, self.upper, *args, **kwargs
-            )
+            return _DiscreteBounded.dist(self.distribution, self.lower, self.upper, *args, **kwargs)
         else:
             raise ValueError("Distribution is neither continuous nor discrete.")
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 5ccdb7f5d2..80fc6a0b3e 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -134,9 +134,7 @@ def assert_negative_support(var, label, distname, value=-1e-6):
             support = False
 
     if np.any(support):
-        msg = "The variable specified for {} has negative support for {}, ".format(
-            label, distname
-        )
+        msg = "The variable specified for {} has negative support for {}, ".format(label, distname)
         msg += "likely making it unsuitable for this parameter."
         warnings.warn(msg)
 
@@ -712,7 +710,7 @@ def random(self, point=None, size=None):
         )
 
     def _random(self, mu, sigma, lower, upper, size):
-        """ Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's
+        """Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's
         parametrization to scipy.truncnorm. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -3447,7 +3445,7 @@ def random(self, point=None, size=None):
         )
 
     def _random(self, c, lower, upper, size):
-        """ Wrapper around stats.triang.rvs that converts Triangular's
+        """Wrapper around stats.triang.rvs that converts Triangular's
         parametrization to scipy.triang. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -3706,7 +3704,7 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
         self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma))
         self.b = b = tt.as_tensor_variable(floatX(b))
 
-        nu_sigma_ratio = -nu ** 2 / (2 * sigma ** 2)
+        nu_sigma_ratio = -(nu ** 2) / (2 * sigma ** 2)
         self.mean = (
             sigma
             * np.sqrt(np.pi / 2)
@@ -3762,7 +3760,7 @@ def random(self, point=None, size=None):
         return generate_samples(self._random, nu=nu, sigma=sigma, dist_shape=self.shape, size=size)
 
     def _random(self, nu, sigma, size):
-        """ Wrapper around stats.rice.rvs that converts Rice's
+        """Wrapper around stats.rice.rvs that converts Rice's
         parametrization to scipy.rice. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 090949c905..cfe87ed3d4 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -24,10 +24,23 @@
 from ..theanof import floatX, intX, take_along_axis
 
 
-__all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'DiscreteWeibull',
-           'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
-           'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
-           'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic']
+__all__ = [
+    "Binomial",
+    "BetaBinomial",
+    "Bernoulli",
+    "DiscreteWeibull",
+    "Poisson",
+    "NegativeBinomial",
+    "ConstantDist",
+    "Constant",
+    "ZeroInflatedPoisson",
+    "ZeroInflatedBinomial",
+    "ZeroInflatedNegativeBinomial",
+    "DiscreteUniform",
+    "Geometric",
+    "Categorical",
+    "OrderedLogistic",
+]
 
 
 class Binomial(Discrete):
@@ -96,9 +109,7 @@ def random(self, point=None, size=None):
         array
         """
         n, p = draw_values([self.n, self.p], point=point, size=size)
-        return generate_samples(stats.binom.rvs, n=n, p=p,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(stats.binom.rvs, n=n, p=p, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         r"""
@@ -119,8 +130,11 @@ def logp(self, value):
 
         return bound(
             binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value),
-            0 <= value, value <= n,
-            0 <= p, p <= 1)
+            0 <= value,
+            value <= n,
+            0 <= p,
+            p <= 1,
+        )
 
 
 class BetaBinomial(Discrete):
@@ -183,7 +197,7 @@ def __init__(self, alpha, beta, n, *args, **kwargs):
         self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
         self.beta = beta = tt.as_tensor_variable(floatX(beta))
         self.n = n = tt.as_tensor_variable(intX(n))
-        self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8')
+        self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8")
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -197,8 +211,11 @@ def _random(self, alpha, beta, n, size=None):
 
         quotient, remainder = divmod(_p.shape[0], _n.shape[0])
         if remainder != 0:
-            raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format(
-                _n.shape[0], _p.shape[0]))
+            raise TypeError(
+                "n has a bad size! Was cast to {}, must evenly divide {}".format(
+                    _n.shape[0], _p.shape[0]
+                )
+            )
         if quotient != 1:
             _n = np.tile(_n, quotient)
         samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size)
@@ -221,11 +238,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta, n = \
-            draw_values([self.alpha, self.beta, self.n], point=point, size=size)
-        return generate_samples(self._random, alpha=alpha, beta=beta, n=n,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size)
+        return generate_samples(
+            self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         r"""
@@ -243,11 +259,15 @@ def logp(self, value):
         """
         alpha = self.alpha
         beta = self.beta
-        return bound(binomln(self.n, value)
-                     + betaln(value + alpha, self.n - value + beta)
-                     - betaln(alpha, beta),
-                     value >= 0, value <= self.n,
-                     alpha > 0, beta > 0)
+        return bound(
+            binomln(self.n, value)
+            + betaln(value + alpha, self.n - value + beta)
+            - betaln(alpha, beta),
+            value >= 0,
+            value <= self.n,
+            alpha > 0,
+            beta > 0,
+        )
 
 
 class Bernoulli(Discrete):
@@ -293,7 +313,7 @@ class Bernoulli(Discrete):
     def __init__(self, p=None, logit_p=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if sum(int(var is None) for var in [p, logit_p]) != 1:
-            raise ValueError('Specify one of p and logit_p')
+            raise ValueError("Specify one of p and logit_p")
         if p is not None:
             self._is_logit = False
             self.p = p = tt.as_tensor_variable(floatX(p))
@@ -303,7 +323,7 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs):
             self.p = tt.nnet.sigmoid(floatX(logit_p))
             self._logit_p = tt.as_tensor_variable(logit_p)
 
-        self.mode = tt.cast(tround(self.p), 'int8')
+        self.mode = tt.cast(tround(self.p), "int8")
 
     def random(self, point=None, size=None):
         r"""
@@ -323,9 +343,7 @@ def random(self, point=None, size=None):
         array
         """
         p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(stats.bernoulli.rvs, p,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(stats.bernoulli.rvs, p, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         r"""
@@ -347,9 +365,8 @@ def logp(self, value):
         else:
             p = self.p
             return bound(
-                tt.switch(value, tt.log(p), tt.log(1 - p)),
-                value >= 0, value <= 1,
-                p >= 0, p <= 1)
+                tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1
+            )
 
     def _distr_parameters_for_repr(self):
         return ["p"]
@@ -393,8 +410,9 @@ def DiscreteWeibull(q, b, x):
     Variance  :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2`
     ========  ======================
     """
+
     def __init__(self, q, beta, *args, **kwargs):
-        super().__init__(*args, defaults=('median',), **kwargs)
+        super().__init__(*args, defaults=("median",), **kwargs)
 
         self.q = q = tt.as_tensor_variable(floatX(q))
         self.beta = beta = tt.as_tensor_variable(floatX(beta))
@@ -418,10 +436,13 @@ def logp(self, value):
         q = self.q
         beta = self.beta
 
-        return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
-                     0 <= value,
-                     0 < q, q < 1,
-                     0 < beta)
+        return bound(
+            tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
+            0 <= value,
+            0 < q,
+            q < 1,
+            0 < beta,
+        )
 
     def _ppf(self, p):
         r"""
@@ -431,12 +452,12 @@ def _ppf(self, p):
         q = self.q
         beta = self.beta
 
-        return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64')
+        return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64")
 
     def _random(self, q, beta, size=None):
         p = np.random.uniform(size=size)
 
-        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1
+        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1
 
     def random(self, point=None, size=None):
         r"""
@@ -457,9 +478,7 @@ def random(self, point=None, size=None):
         """
         q, beta = draw_values([self.q, self.beta], point=point, size=size)
 
-        return generate_samples(self._random, q, beta,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size)
 
 
 class Poisson(Discrete):
@@ -529,9 +548,7 @@ def random(self, point=None, size=None):
         array
         """
         mu = draw_values([self.mu], point=point, size=size)[0]
-        return generate_samples(stats.poisson.rvs, mu,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         r"""
@@ -548,12 +565,9 @@ def logp(self, value):
         TensorVariable
         """
         mu = self.mu
-        log_prob = bound(
-            logpow(mu, value) - factln(value) - mu,
-            mu >= 0, value >= 0)
+        log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0)
         # Return zero when mu and value are both zero
-        return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0),
-                         0, log_prob)
+        return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob)
 
 
 class NegativeBinomial(Discrete):
@@ -630,14 +644,12 @@ def random(self, point=None, size=None):
         array
         """
         mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size)
-        g = generate_samples(self._random, mu=mu, alpha=alpha,
-                             dist_shape=self.shape,
-                             size=size)
+        g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
         return np.asarray(stats.poisson.rvs(g)).reshape(g.shape)
 
     def _random(self, mu, alpha, size):
-        r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's
+        r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's
         parametrization to scipy.gamma. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -664,15 +676,17 @@ def logp(self, value):
         """
         mu = self.mu
         alpha = self.alpha
-        negbinom = bound(binomln(value + alpha - 1, value)
-                         + logpow(mu / (mu + alpha), value)
-                         + logpow(alpha / (mu + alpha), alpha),
-                         value >= 0, mu > 0, alpha > 0)
+        negbinom = bound(
+            binomln(value + alpha - 1, value)
+            + logpow(mu / (mu + alpha), value)
+            + logpow(alpha / (mu + alpha), alpha),
+            value >= 0,
+            mu > 0,
+            alpha > 0,
+        )
 
         # Return Poisson when alpha gets very large.
-        return tt.switch(tt.gt(alpha, 1e10),
-                         Poisson.dist(self.mu).logp(value),
-                         negbinom)
+        return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom)
 
 
 class Geometric(Discrete):
@@ -735,9 +749,7 @@ def random(self, point=None, size=None):
         array
         """
         p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(np.random.geometric, p,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         r"""
@@ -754,8 +766,7 @@ def logp(self, value):
         TensorVariable
         """
         p = self.p
-        return bound(tt.log(p) + logpow(1 - p, value - 1),
-                     0 <= p, p <= 1, value >= 1)
+        return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
 
 
 class DiscreteUniform(Discrete):
@@ -801,8 +812,7 @@ def __init__(self, lower, upper, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.lower = intX(tt.floor(lower))
         self.upper = intX(tt.floor(upper))
-        self.mode = tt.maximum(
-            intX(tt.floor((upper + lower) / 2.)), self.lower)
+        self.mode = tt.maximum(intX(tt.floor((upper + lower) / 2.0)), self.lower)
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -828,10 +838,7 @@ def random(self, point=None, size=None):
         array
         """
         lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
-        return generate_samples(self._random,
-                                lower, upper,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         r"""
@@ -849,8 +856,7 @@ def logp(self, value):
         """
         upper = self.upper
         lower = self.lower
-        return bound(-tt.log(upper - lower + 1),
-                     lower <= value, value <= upper)
+        return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper)
 
 
 class Categorical(Discrete):
@@ -923,11 +929,13 @@ def random(self, point=None, size=None):
         p, k = draw_values([self.p, self.k], point=point, size=size)
         p = p / np.sum(p, axis=-1, keepdims=True)
 
-        return generate_samples(random_choice,
-                                p=p,
-                                broadcast_shape=p.shape[:-1] or (1,),
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            random_choice,
+            p=p,
+            broadcast_shape=p.shape[:-1] or (1,),
+            dist_shape=self.shape,
+            size=size,
+        )
 
     def logp(self, value):
         r"""
@@ -953,13 +961,9 @@ def logp(self, value):
 
         if p.ndim > 1:
             if p.ndim > value_clip.ndim:
-                value_clip = tt.shape_padleft(
-                    value_clip, p_.ndim - value_clip.ndim
-                )
+                value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
             elif p.ndim < value_clip.ndim:
-                p = tt.shape_padleft(
-                    p, value_clip.ndim - p_.ndim
-                )
+                p = tt.shape_padleft(p, value_clip.ndim - p_.ndim)
             pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
             a = tt.log(
                 take_along_axis(
@@ -970,8 +974,9 @@ def logp(self, value):
         else:
             a = tt.log(p[value_clip])
 
-        return bound(a, value >= 0, value <= (k - 1),
-                     tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
+        return bound(
+            a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)
+        )
 
 
 class Constant(Discrete):
@@ -985,8 +990,10 @@ class Constant(Discrete):
     """
 
     def __init__(self, c, *args, **kwargs):
-        warnings.warn("Constant has been deprecated. We recommend using a Deterministic object instead.",
-                    DeprecationWarning)
+        warnings.warn(
+            "Constant has been deprecated. We recommend using a Deterministic object instead.",
+            DeprecationWarning,
+        )
         super().__init__(*args, **kwargs)
         self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c)
 
@@ -1013,8 +1020,7 @@ def random(self, point=None, size=None):
         def _random(c, dtype=dtype, size=None):
             return np.full(size, fill_value=c, dtype=dtype)
 
-        return generate_samples(_random, c=c, dist_shape=self.shape,
-                                size=size).astype(dtype)
+        return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype)
 
     def logp(self, value):
         r"""
@@ -1112,9 +1118,7 @@ def random(self, point=None, size=None):
         array
         """
         theta, psi = draw_values([self.theta, self.psi], point=point, size=size)
-        g = generate_samples(stats.poisson.rvs, theta,
-                             dist_shape=self.shape,
-                             size=size)
+        g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size)
         g, psi = broadcast_distribution_samples([g, psi], size=size)
         return g * (np.random.random(g.shape) < psi)
 
@@ -1138,13 +1142,10 @@ def logp(self, value):
         logp_val = tt.switch(
             tt.gt(value, 0),
             tt.log(psi) + self.pois.logp(value),
-            logaddexp(tt.log1p(-psi), tt.log(psi) - theta))
+            logaddexp(tt.log1p(-psi), tt.log(psi) - theta),
+        )
 
-        return bound(
-            logp_val,
-            0 <= value,
-            0 <= psi, psi <= 1,
-            0 <= theta)
+        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta)
 
 
 class ZeroInflatedBinomial(Discrete):
@@ -1224,9 +1225,7 @@ def random(self, point=None, size=None):
         array
         """
         n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size)
-        g = generate_samples(stats.binom.rvs, n, p,
-                             dist_shape=self.shape,
-                             size=size)
+        g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size)
         g, psi = broadcast_distribution_samples([g, psi], size=size)
         return g * (np.random.random(g.shape) < psi)
 
@@ -1251,13 +1250,10 @@ def logp(self, value):
         logp_val = tt.switch(
             tt.gt(value, 0),
             tt.log(psi) + self.bin.logp(value),
-            logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)))
+            logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)),
+        )
 
-        return bound(
-            logp_val,
-            0 <= value, value <= n,
-            0 <= psi, psi <= 1,
-            0 <= p, p <= 1)
+        return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1)
 
 
 class ZeroInflatedNegativeBinomial(Discrete):
@@ -1353,21 +1349,14 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, alpha, psi = draw_values(
-            [self.mu, self.alpha, self.psi], point=point, size=size)
-        g = generate_samples(
-            self._random,
-            mu=mu,
-            alpha=alpha,
-            dist_shape=self.shape,
-            size=size
-        )
+        mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size)
+        g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
         g, psi = broadcast_distribution_samples([g, psi], size=size)
         return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi)
 
     def _random(self, mu, alpha, size):
-        r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's
+        r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's
         parametrization to scipy.gamma. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -1398,19 +1387,12 @@ def logp(self, value):
 
         logp_other = tt.log(psi) + self.nb.logp(value)
         logp_0 = logaddexp(
-            tt.log1p(-psi),
-            tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)))
+            tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))
+        )
 
-        logp_val = tt.switch(
-            tt.gt(value, 0),
-            logp_other,
-            logp_0)
+        logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0)
 
-        return bound(
-            logp_val,
-            0 <= value,
-            0 <= psi, psi <= 1,
-            mu > 0, alpha > 0)
+        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0)
 
 
 class OrderedLogistic(Categorical):
@@ -1484,11 +1466,14 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
         self.cutpoints = tt.as_tensor_variable(cutpoints)
 
         pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta))
-        p_cum = tt.concatenate([
-            tt.zeros_like(tt.shape_padright(pa[..., 0])),
-            pa,
-            tt.ones_like(tt.shape_padright(pa[..., 0]))
-        ], axis=-1)
+        p_cum = tt.concatenate(
+            [
+                tt.zeros_like(tt.shape_padright(pa[..., 0])),
+                pa,
+                tt.ones_like(tt.shape_padright(pa[..., 0])),
+            ],
+            axis=-1,
+        )
         p = p_cum[..., 1:] - p_cum[..., :-1]
 
         super().__init__(p=p, *args, **kwargs)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index b22ffd7ce4..01783d35e5 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -30,8 +30,7 @@
 
 from pymc3.theanof import floatX
 from . import transforms
-from .distribution import (Continuous, Discrete, draw_values, generate_samples,
-                           _DrawValuesContext)
+from .distribution import Continuous, Discrete, draw_values, generate_samples, _DrawValuesContext
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
 from .special import gammaln, multigammaln
@@ -40,15 +39,22 @@
 from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker
 
 
-__all__ = ['MvNormal', 'MvStudentT', 'Dirichlet',
-           'Multinomial', 'Wishart', 'WishartBartlett',
-           'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal',
-           'KroneckerNormal']
+__all__ = [
+    "MvNormal",
+    "MvStudentT",
+    "Dirichlet",
+    "Multinomial",
+    "Wishart",
+    "WishartBartlett",
+    "LKJCorr",
+    "LKJCholeskyCov",
+    "MatrixNormal",
+    "KroneckerNormal",
+]
 
 
 class _QuadFormBase(Continuous):
-    def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True,
-                 *args, **kwargs):
+    def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if len(self.shape) > 2:
             raise ValueError("Only 1 or 2 dimensions are allowed.")
@@ -56,40 +62,40 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True,
         if chol is not None and not lower:
             chol = chol.T
         if len([i for i in [tau, cov, chol] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of tau, cov, '
-                             'or chol.')
+            raise ValueError(
+                "Incompatible parameterization. " "Specify exactly one of tau, cov, " "or chol."
+            )
         self.mu = mu = tt.as_tensor_variable(mu)
         self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
         # Step methods and advi do not catch LinAlgErrors at the
         # moment. We work around that by using a cholesky op
         # that returns a nan as first entry instead of raising
         # an error.
-        cholesky = Cholesky(lower=True, on_error='nan')
+        cholesky = Cholesky(lower=True, on_error="nan")
 
         if cov is not None:
             self.k = cov.shape[0]
-            self._cov_type = 'cov'
+            self._cov_type = "cov"
             cov = tt.as_tensor_variable(cov)
             if cov.ndim != 2:
-                raise ValueError('cov must be two dimensional.')
+                raise ValueError("cov must be two dimensional.")
             self.chol_cov = cholesky(cov)
             self.cov = cov
             self._n = self.cov.shape[-1]
         elif tau is not None:
             self.k = tau.shape[0]
-            self._cov_type = 'tau'
+            self._cov_type = "tau"
             tau = tt.as_tensor_variable(tau)
             if tau.ndim != 2:
-                raise ValueError('tau must be two dimensional.')
+                raise ValueError("tau must be two dimensional.")
             self.chol_tau = cholesky(tau)
             self.tau = tau
             self._n = self.tau.shape[-1]
         else:
             self.k = chol.shape[0]
-            self._cov_type = 'chol'
+            self._cov_type = "chol"
             if chol.ndim != 2:
-                raise ValueError('chol must be two dimensional.')
+                raise ValueError("chol must be two dimensional.")
             self.chol_cov = tt.as_tensor_variable(chol)
             self._n = self.chol_cov.shape[-1]
 
@@ -97,7 +103,7 @@ def _quaddist(self, value):
         """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma."""
         mu = self.mu
         if value.ndim > 2 or value.ndim == 0:
-            raise ValueError('Invalid dimension for value: %s' % value.ndim)
+            raise ValueError("Invalid dimension for value: %s" % value.ndim)
         if value.ndim == 1:
             onedim = True
             value = value[None, :]
@@ -106,11 +112,11 @@ def _quaddist(self, value):
 
         delta = value - mu
 
-        if self._cov_type == 'cov':
+        if self._cov_type == "cov":
             # Use this when Theano#5908 is released.
             # return MvNormalLogp()(self.cov, delta)
             dist, logdet, ok = self._quaddist_cov(delta)
-        elif self._cov_type == 'tau':
+        elif self._cov_type == "tau":
             dist, logdet, ok = self._quaddist_tau(delta)
         else:
             dist, logdet, ok = self._quaddist_chol(delta)
@@ -222,8 +228,7 @@ class MvNormal(_QuadFormBase):
         vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T)
     """
 
-    def __init__(self, mu, cov=None, tau=None, chol=None, lower=True,
-                 *args, **kwargs):
+    def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs):
         super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs)
         self.mean = self.median = self.mode = self.mu = self.mu
 
@@ -253,40 +258,35 @@ def random(self, point=None, size=None):
                 except TypeError:
                     size = (size,)
 
-        if self._cov_type == 'cov':
+        if self._cov_type == "cov":
             mu, cov = draw_values([self.mu, self.cov], point=point, size=size)
             if mu.shape[-1] != cov.shape[-1]:
                 raise ValueError("Shapes for mu and cov don't match")
 
             try:
-                dist = stats.multivariate_normal(
-                    mean=mu, cov=cov, allow_singular=True)
+                dist = stats.multivariate_normal(mean=mu, cov=cov, allow_singular=True)
             except ValueError:
                 size += (mu.shape[-1],)
                 return np.nan * np.zeros(size)
             return dist.rvs(size)
-        elif self._cov_type == 'chol':
-            mu, chol = draw_values([self.mu, self.chol_cov],
-                                   point=point, size=size)
+        elif self._cov_type == "chol":
+            mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size)
             if size and mu.ndim == len(size) and mu.shape == size:
                 mu = mu[..., np.newaxis]
             if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1:
                 raise ValueError("Shapes for mu and chol don't match")
-            broadcast_shape = (
-                np.broadcast(np.empty(mu.shape[:-1]),
-                             np.empty(chol.shape[:-2])).shape
-            )
+            broadcast_shape = np.broadcast(np.empty(mu.shape[:-1]), np.empty(chol.shape[:-2])).shape
 
             mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],))
             chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:])
             # If mu and chol were fixed by the point, only the standard normal
             # should change
-            if mu.shape[:len(size)] != size:
+            if mu.shape[: len(size)] != size:
                 std_norm_shape = size + mu.shape
             else:
                 std_norm_shape = mu.shape
             standard_normal = np.random.standard_normal(std_norm_shape)
-            return mu + np.einsum('...ij,...j->...i', chol, standard_normal)
+            return mu + np.einsum("...ij,...j->...i", chol, standard_normal)
         else:
             mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
             if mu.shape[-1] != tau[0].shape[-1]:
@@ -299,8 +299,7 @@ def random(self, point=None, size=None):
                 return np.nan * np.zeros(size)
 
             standard_normal = np.random.standard_normal(size)
-            transformed = linalg.solve_triangular(
-                chol, standard_normal.T, lower=True)
+            transformed = linalg.solve_triangular(chol, standard_normal.T, lower=True)
             return mu + transformed.T
 
     def logp(self, value):
@@ -319,7 +318,7 @@ def logp(self, value):
         """
         quaddist, logdet, ok = self._quaddist(value)
         k = floatX(value.shape[-1])
-        norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi))
+        norm = -0.5 * k * pm.floatX(np.log(2 * np.pi))
         return bound(norm - 0.5 * quaddist - logdet, ok)
 
     def _distr_parameters_for_repr(self):
@@ -367,11 +366,12 @@ class MvStudentT(_QuadFormBase):
         Whether the cholesky fatcor is given as a lower triangular matrix.
     """
 
-    def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None,
-                 lower=True, *args, **kwargs):
+    def __init__(
+        self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, lower=True, *args, **kwargs
+    ):
         if Sigma is not None:
             if cov is not None:
-                raise ValueError('Specify only one of cov and Sigma')
+                raise ValueError("Specify only one of cov and Sigma")
             cov = Sigma
         super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs)
         self.nu = nu = tt.as_tensor_variable(nu)
@@ -396,14 +396,14 @@ def random(self, point=None, size=None):
         """
         with _DrawValuesContext():
             nu, mu = draw_values([self.nu, self.mu], point=point, size=size)
-            if self._cov_type == 'cov':
-                cov, = draw_values([self.cov], point=point, size=size)
+            if self._cov_type == "cov":
+                (cov,) = draw_values([self.cov], point=point, size=size)
                 dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov)
-            elif self._cov_type == 'tau':
-                tau, = draw_values([self.tau], point=point, size=size)
+            elif self._cov_type == "tau":
+                (tau,) = draw_values([self.tau], point=point, size=size)
                 dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau)
             else:
-                chol, = draw_values([self.chol_cov], point=point, size=size)
+                (chol,) = draw_values([self.chol_cov], point=point, size=size)
                 dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol)
 
             samples = dist.random(point, size)
@@ -428,10 +428,12 @@ def logp(self, value):
         quaddist, logdet, ok = self._quaddist(value)
         k = floatX(value.shape[-1])
 
-        norm = (gammaln((self.nu + k) / 2.)
-                - gammaln(self.nu / 2.)
-                - 0.5 * k * floatX(np.log(self.nu * np.pi)))
-        inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu)
+        norm = (
+            gammaln((self.nu + k) / 2.0)
+            - gammaln(self.nu / 2.0)
+            - 0.5 * k * floatX(np.log(self.nu * np.pi))
+        )
+        inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu)
         return bound(norm + inner - logdet, ok)
 
     def _distr_parameters_for_repr(self):
@@ -462,20 +464,19 @@ class Dirichlet(Continuous):
         Concentration parameters (a > 0).
     """
 
-    def __init__(self, a, transform=transforms.stick_breaking,
-                 *args, **kwargs):
+    def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs):
 
-        if kwargs.get('shape') is None:
+        if kwargs.get("shape") is None:
             warnings.warn(
                 (
                     "Shape not explicitly set. "
                     "Please, set the value using the `shape` keyword argument. "
                     "Using the test value to infer the shape."
                 ),
-                DeprecationWarning
+                DeprecationWarning,
             )
             try:
-                kwargs['shape'] = np.shape(get_test_value(a))
+                kwargs["shape"] = np.shape(get_test_value(a))
             except AttributeError:
                 pass
 
@@ -485,15 +486,13 @@ def __init__(self, a, transform=transforms.stick_breaking,
         self.a = a = tt.as_tensor_variable(a)
         self.mean = a / tt.sum(a)
 
-        self.mode = tt.switch(tt.all(a > 1),
-                              (a - 1) / tt.sum(a - 1),
-                              np.nan)
+        self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan)
 
     def _random(self, a, size=None):
         gen = stats.dirichlet.rvs
         shape = tuple(np.atleast_1d(self.shape))
-        if size[-len(shape):] == shape:
-            real_size = size[:-len(shape)]
+        if size[-len(shape) :] == shape:
+            real_size = size[: -len(shape)]
         else:
             real_size = size
         if self.size_prefix:
@@ -528,10 +527,7 @@ def random(self, point=None, size=None):
         array
         """
         a = draw_values([self.a], point=point, size=size)[0]
-        samples = generate_samples(self._random,
-                                   a=a,
-                                   dist_shape=self.shape,
-                                   size=size)
+        samples = generate_samples(self._random, a=a, dist_shape=self.shape, size=size)
         return samples
 
     def logp(self, value):
@@ -551,11 +547,14 @@ def logp(self, value):
         a = self.a
 
         # only defined for sum(value) == 1
-        return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1)
-                     + gammaln(tt.sum(a, axis=-1)),
-                     tt.all(value >= 0), tt.all(value <= 1),
-                     np.logical_not(a.broadcastable), tt.all(a > 0),
-                     broadcast_conditions=False)
+        return bound(
+            tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)),
+            tt.all(value >= 0),
+            tt.all(value <= 1),
+            np.logical_not(a.broadcastable),
+            tt.all(a > 0),
+            broadcast_conditions=False,
+        )
 
     def _distr_parameters_for_repr(self):
         return ["a"]
@@ -598,7 +597,7 @@ def __init__(self, n, p, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         p = p / tt.sum(p, axis=-1, keepdims=True)
-        n = np.squeeze(n) # works also if n is a tensor
+        n = np.squeeze(n)  # works also if n is a tensor
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
@@ -612,17 +611,16 @@ def __init__(self, n, p, *args, **kwargs):
             self.p = tt.as_tensor_variable(p)
 
         self.mean = self.n * self.p
-        mode = tt.cast(tt.round(self.mean), 'int32')
+        mode = tt.cast(tt.round(self.mean), "int32")
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
-        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
-                                diff[inc_bool_arr.nonzero()])
+        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
         self.mode = mode
 
     def _random(self, n, p, size=None, raw_size=None):
         original_dtype = p.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
-        p = p.astype('float64')
+        p = p.astype("float64")
         # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
         p /= np.sum(p, axis=-1, keepdims=True)
 
@@ -635,23 +633,19 @@ def _random(self, n, p, size=None, raw_size=None):
         # np.random.multinomial needs `n` to be a scalar int and `p` a
         # sequence so we semi flatten them and iterate over them
         size_ = to_tuple(raw_size)
-        if p.ndim > len(size_) and p.shape[:len(size_)] == size_:
+        if p.ndim > len(size_) and p.shape[: len(size_)] == size_:
             # p and n have the size_ prepend so we don't need it in np.random
             n_ = n.reshape([-1])
             p_ = p.reshape([-1, p.shape[-1]])
-            samples = np.array([
-                np.random.multinomial(nn, pp)
-                for nn, pp in zip(n_, p_)
-            ])
+            samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
             samples = samples.reshape(p.shape)
         else:
             # p and n don't have the size prepend
             n_ = n.reshape([-1])
             p_ = p.reshape([-1, p.shape[-1]])
-            samples = np.array([
-                np.random.multinomial(nn, pp, size=size_)
-                for nn, pp in zip(n_, p_)
-            ])
+            samples = np.array(
+                [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)]
+            )
             samples = np.moveaxis(samples, 0, -1)
             samples = samples.reshape(size + p.shape)
         # We cast back to the original dtype
@@ -675,10 +669,14 @@ def random(self, point=None, size=None):
         array
         """
         n, p = draw_values([self.n, self.p], point=point, size=size)
-        samples = generate_samples(self._random, n, p,
-                                   dist_shape=self.shape,
-                                   not_broadcast_kwargs={'raw_size': size},
-                                   size=size)
+        samples = generate_samples(
+            self._random,
+            n,
+            p,
+            dist_shape=self.shape,
+            not_broadcast_kwargs={"raw_size": size},
+            size=size,
+        )
         return samples
 
     def logp(self, x):
@@ -705,7 +703,7 @@ def logp(self, x):
             tt.all(p <= 1),
             tt.all(tt.eq(tt.sum(p, axis=-1), 1)),
             tt.all(tt.ge(n, 0)),
-            broadcast_conditions=False
+            broadcast_conditions=False,
         )
 
 
@@ -731,7 +729,7 @@ class PosDefMatrix(theano.Op):
     def make_node(self, x):
         x = tt.as_tensor_variable(x)
         assert x.ndim == 2
-        o = tt.TensorType(dtype='int8', broadcastable=[])()
+        o = tt.TensorType(dtype="int8", broadcastable=[])()
         return theano.Apply(self, [x], [o])
 
     # Python implementation:
@@ -740,21 +738,22 @@ def perform(self, node, inputs, outputs):
         (x,) = inputs
         (z,) = outputs
         try:
-            z[0] = np.array(posdef(x), dtype='int8')
+            z[0] = np.array(posdef(x), dtype="int8")
         except Exception:
-            pm._log.exception('Failed to check if %s positive definite', x)
+            pm._log.exception("Failed to check if %s positive definite", x)
             raise
 
     def infer_shape(self, node, shapes):
         return [[]]
 
     def grad(self, inp, grads):
-        x, = inp
+        (x,) = inp
         return [x.zeros_like(theano.config.floatX)]
 
     def __str__(self):
         return "MatrixIsPositiveDefinite"
 
+
 matrix_pos_def = PosDefMatrix()
 
 
@@ -797,20 +796,20 @@ class Wishart(Continuous):
 
     def __init__(self, nu, V, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        warnings.warn('The Wishart distribution can currently not be used '
-                      'for MCMC sampling. The probability of sampling a '
-                      'symmetric matrix is basically zero. Instead, please '
-                      'use LKJCholeskyCov or LKJCorr. For more information '
-                      'on the issues surrounding the Wishart see here: '
-                      'https://github.com/pymc-devs/pymc3/issues/538.',
-                      UserWarning)
+        warnings.warn(
+            "The Wishart distribution can currently not be used "
+            "for MCMC sampling. The probability of sampling a "
+            "symmetric matrix is basically zero. Instead, please "
+            "use LKJCholeskyCov or LKJCorr. For more information "
+            "on the issues surrounding the Wishart see here: "
+            "https://github.com/pymc-devs/pymc3/issues/538.",
+            UserWarning,
+        )
         self.nu = nu = tt.as_tensor_variable(nu)
         self.p = p = tt.as_tensor_variable(V.shape[0])
         self.V = V = tt.as_tensor_variable(V)
         self.mean = nu * V
-        self.mode = tt.switch(tt.ge(nu, p + 1),
-                              (nu - p - 1) * V,
-                              np.nan)
+        self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan)
 
     def random(self, point=None, size=None):
         """
@@ -830,9 +829,8 @@ def random(self, point=None, size=None):
         array
         """
         nu, V = draw_values([self.nu, self.V], point=point, size=size)
-        size= 1 if size is None else size
-        return generate_samples(stats.wishart.rvs, np.asscalar(nu), V,
-                                    broadcast_shape=(size,))
+        size = 1 if size is None else size
+        return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, broadcast_shape=(size,))
 
     def logp(self, X):
         """
@@ -855,14 +853,19 @@ def logp(self, X):
         IVI = det(V)
         IXI = det(X)
 
-        return bound(((nu - p - 1) * tt.log(IXI)
-                      - trace(matrix_inverse(V).dot(X))
-                      - nu * p * tt.log(2) - nu * tt.log(IVI)
-                      - 2 * multigammaln(nu / 2., p)) / 2,
-                     matrix_pos_def(X),
-                     tt.eq(X, X.T),
-                     nu > (p - 1),
-                     broadcast_conditions=False
+        return bound(
+            (
+                (nu - p - 1) * tt.log(IXI)
+                - trace(matrix_inverse(V).dot(X))
+                - nu * p * tt.log(2)
+                - nu * tt.log(IVI)
+                - 2 * multigammaln(nu / 2.0, p)
+            )
+            / 2,
+            matrix_pos_def(X),
+            tt.eq(X, X.T),
+            nu > (p - 1),
+            broadcast_conditions=False,
         )
 
 
@@ -924,17 +927,18 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
         # Inverse transform
         testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T))
         testval = linalg.cholesky(testval, lower=True)
-        diag_testval = testval[diag_idx]**2
+        diag_testval = testval[diag_idx] ** 2
         tril_testval = testval[tril_idx]
     else:
         diag_testval = None
         tril_testval = None
 
-    c = tt.sqrt(ChiSquared('%s_c' % name, nu - np.arange(2, 2 + n_diag), shape=n_diag,
-                           testval=diag_testval))
-    pm._log.info('Added new variable %s_c to model diagonal of Wishart.' % name)
-    z = Normal('%s_z' % name, 0., 1., shape=n_tril, testval=tril_testval)
-    pm._log.info('Added new variable %s_z to model off-diagonals of Wishart.' % name)
+    c = tt.sqrt(
+        ChiSquared("%s_c" % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval)
+    )
+    pm._log.info("Added new variable %s_c to model diagonal of Wishart." % name)
+    z = Normal("%s_z" % name, 0.0, 1.0, shape=n_tril, testval=tril_testval)
+    pm._log.info("Added new variable %s_z to model off-diagonals of Wishart." % name)
     # Construct A matrix
     A = tt.zeros(S.shape, dtype=np.float32)
     A = tt.set_subtensor(A[diag_idx], c)
@@ -949,20 +953,24 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
 
 def _lkj_normalizing_constant(eta, n):
     if eta == 1:
-        result = gammaln(2. * tt.arange(1, int((n - 1) / 2) + 1)).sum()
+        result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum()
         if n % 2 == 1:
-            result += (0.25 * (n ** 2 - 1) * tt.log(np.pi)
-                       - 0.25 * (n - 1) ** 2 * tt.log(2.)
-                       - (n - 1) * gammaln(int((n + 1) / 2)))
+            result += (
+                0.25 * (n ** 2 - 1) * tt.log(np.pi)
+                - 0.25 * (n - 1) ** 2 * tt.log(2.0)
+                - (n - 1) * gammaln(int((n + 1) / 2))
+            )
         else:
-            result += (0.25 * n * (n - 2) * tt.log(np.pi)
-                       + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.)
-                       + n * gammaln(n / 2) - (n - 1) * gammaln(n))
+            result += (
+                0.25 * n * (n - 2) * tt.log(np.pi)
+                + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0)
+                + n * gammaln(n / 2)
+                - (n - 1) * gammaln(n)
+            )
     else:
         result = -(n - 1) * gammaln(eta + 0.5 * (n - 1))
         k = tt.arange(1, n)
-        result += (0.5 * k * tt.log(np.pi)
-                   + gammaln(eta + 0.5 * (n - 1 - k))).sum()
+        result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum()
     return result
 
 
@@ -970,24 +978,25 @@ class _LKJCholeskyCov(Continuous):
     R"""Underlying class for covariance matrix with LKJ distributed correlations.
     See docs for LKJCholeskyCov function for more details on how to use it in models.
     """
+
     def __init__(self, eta, n, sd_dist, *args, **kwargs):
         self.n = n
         self.eta = eta
 
-        if 'transform' in kwargs and kwargs['transform'] is not None:
-            raise ValueError('Invalid parameter: transform.')
-        if 'shape' in kwargs:
-            raise ValueError('Invalid parameter: shape.')
+        if "transform" in kwargs and kwargs["transform"] is not None:
+            raise ValueError("Invalid parameter: transform.")
+        if "shape" in kwargs:
+            raise ValueError("Invalid parameter: shape.")
 
         shape = n * (n + 1) // 2
 
         if sd_dist.shape.ndim not in [0, 1]:
-            raise ValueError('Invalid shape for sd_dist.')
+            raise ValueError("Invalid shape for sd_dist.")
 
         transform = transforms.CholeskyCovPacked(n)
 
-        kwargs['shape'] = shape
-        kwargs['transform'] = transform
+        kwargs["shape"] = shape
+        kwargs["transform"] = transform
         super().__init__(*args, **kwargs)
 
         self.sd_dist = sd_dist
@@ -1017,9 +1026,7 @@ def logp(self, x):
         cumsum = tt.cumsum(x ** 2)
         variance = tt.zeros(n)
         variance = tt.inc_subtensor(variance[0], x[0] ** 2)
-        variance = tt.inc_subtensor(
-            variance[1:],
-            cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
+        variance = tt.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
         sd_vals = tt.sqrt(variance)
 
         logp_sd = self.sd_dist.logp(sd_vals).sum()
@@ -1043,40 +1050,40 @@ def _random(self, n, eta, size=1):
         P = np.eye(n) * np.ones(eta_sample_shape + (n, n))
         # original implementation in R see:
         # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
-        beta = eta - 1. + n/2.
-        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.
+        beta = eta - 1.0 + n / 2.0
+        r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.0
         P[..., 0, 1] = r12
-        P[..., 1, 1] = np.sqrt(1. - r12**2)
+        P[..., 1, 1] = np.sqrt(1.0 - r12 ** 2)
         for mp1 in range(2, n):
             beta -= 0.5
-            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape)
+            y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=eta_sample_shape)
             z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,))
-            z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
+            z = z / np.sqrt(np.einsum("ij,ij->j", z, z))
             P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z
-            P[..., mp1, mp1] = np.sqrt(1. - y)
-        C = np.einsum('...ji,...jk->...ik', P, P)
+            P[..., mp1, mp1] = np.sqrt(1.0 - y)
+        C = np.einsum("...ji,...jk->...ik", P, P)
         D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2]))
         if D.shape in [tuple(), (1,)]:
             D = self.sd_dist.random(size=P.shape[:-1])
         elif D.ndim < C.ndim - 1:
-            D = [D] + [self.sd_dist.random(size=P.shape[:-2])
-                       for _ in range(n - 1)]
+            D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)]
             D = np.moveaxis(np.array(D), 0, C.ndim - 2)
         elif D.ndim == C.ndim - 1:
             if D.shape[-1] == 1:
-                D = [D] + [self.sd_dist.random(size=P.shape[:-2])
-                           for _ in range(n - 1)]
+                D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)]
                 D = np.concatenate(D, axis=-1)
             elif D.shape[-1] != n:
-                raise ValueError('The size of the samples drawn from the '
-                                 'supplied sd_dist.random have the wrong '
-                                 'size. Expected {} but got {} instead.'.
-                                 format(n, D.shape[-1]))
+                raise ValueError(
+                    "The size of the samples drawn from the "
+                    "supplied sd_dist.random have the wrong "
+                    "size. Expected {} but got {} instead.".format(n, D.shape[-1])
+                )
         else:
-            raise ValueError('Supplied sd_dist.random generates samples with '
-                             'too many dimensions. It must yield samples '
-                             'with 0 or 1 dimensions. Got {} instead'.
-                             format(D.ndim - C.ndim - 2))
+            raise ValueError(
+                "Supplied sd_dist.random generates samples with "
+                "too many dimensions. It must yield samples "
+                "with 0 or 1 dimensions. Got {} instead".format(D.ndim - C.ndim - 2)
+            )
         C *= D[..., :, np.newaxis] * D[..., np.newaxis, :]
         tril_idx = np.tril_indices(n, k=0)
         return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]]
@@ -1105,7 +1112,7 @@ def random(self, point=None, size=None):
         # We can only handle cov matrices with a constant n per random call
         n = np.unique(n)
         if len(n) > 1:
-            raise RuntimeError('Varying n is not supported for LKJCholeskyCov')
+            raise RuntimeError("Varying n is not supported for LKJCholeskyCov")
         n = int(n[0])
         dist_shape = ((n * (n + 1)) // 2,)
         # We make sure that eta and the drawn n get their shapes broadcasted
@@ -1122,10 +1129,10 @@ def random(self, point=None, size=None):
                 size = None
             elif size == broadcast_shape:
                 size = None
-            elif size[-len(sample_shape):] == sample_shape:
-                size = size[:len(size) - len(sample_shape)]
-            elif size[-len(broadcast_shape):] == broadcast_shape:
-                size = size[:len(size) - len(broadcast_shape)]
+            elif size[-len(sample_shape) :] == sample_shape:
+                size = size[: len(size) - len(sample_shape)]
+            elif size[-len(broadcast_shape) :] == broadcast_shape:
+                size = size[: len(size) - len(broadcast_shape)]
         # We will always provide _random with an integer size and then reshape
         # the output to get the correct size
         if size is not None:
@@ -1140,9 +1147,7 @@ def random(self, point=None, size=None):
         return samples
 
 
-def LKJCholeskyCov(
-        name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs
-):
+def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs):
     R"""Wrapper function for covariance matrix with LKJ distributed correlations.
 
     This defines a distribution over Cholesky decomposed covariance
@@ -1337,12 +1342,14 @@ class LKJCorr(Continuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs):
+    def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwargs):
         if (p is not None) and (n is not None) and (eta is None):
-            warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta '
-                          'dimension parameter p -> n. Please update your code. '
-                          'Automatically re-assigning parameters for backwards compatibility.',
-                          DeprecationWarning)
+            warnings.warn(
+                "Parameters to LKJCorr have changed: shape parameter n -> eta "
+                "dimension parameter p -> n. Please update your code. "
+                "Automatically re-assigning parameters for backwards compatibility.",
+                DeprecationWarning,
+            )
             self.n = p
             self.eta = n
             eta = self.eta
@@ -1351,20 +1358,24 @@ def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwar
             self.n = n
             self.eta = eta
         else:
-            raise ValueError('Invalid parameter: please use eta as the shape parameter and '
-                             'n as the dimension parameter.')
+            raise ValueError(
+                "Invalid parameter: please use eta as the shape parameter and "
+                "n as the dimension parameter."
+            )
 
         shape = n * (n - 1) // 2
         self.mean = floatX(np.zeros(shape))
 
-        if transform == 'interval':
+        if transform == "interval":
             transform = transforms.interval(-1, 1)
 
         super().__init__(shape=shape, transform=transform, *args, **kwargs)
-        warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta '
-                      'dimension parameter p -> n. Please double check your initialization.',
-                      DeprecationWarning)
-        self.tri_index = np.zeros([n, n], dtype='int32')
+        warnings.warn(
+            "Parameters in LKJCorr have been rename: shape parameter n -> eta "
+            "dimension parameter p -> n. Please double check your initialization.",
+            DeprecationWarning,
+        )
+        self.tri_index = np.zeros([n, n], dtype="int32")
         self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape)
         self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape)
 
@@ -1372,19 +1383,19 @@ def _random(self, n, eta, size=None):
         size = size if isinstance(size, tuple) else (size,)
         # original implementation in R see:
         # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
-        beta = eta - 1. + n/2.
-        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=size) - 1.
+        beta = eta - 1.0 + n / 2.0
+        r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=size) - 1.0
         P = np.eye(n)[:, :, np.newaxis] * np.ones(size)
         P[0, 1] = r12
-        P[1, 1] = np.sqrt(1. - r12**2)
+        P[1, 1] = np.sqrt(1.0 - r12 ** 2)
         for mp1 in range(2, n):
             beta -= 0.5
-            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=size)
-            z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + size)
-            z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
+            y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=size)
+            z = stats.norm.rvs(loc=0, scale=1, size=(mp1,) + size)
+            z = z / np.sqrt(np.einsum("ij,ij->j", z, z))
             P[0:mp1, mp1] = np.sqrt(y) * z
-            P[mp1, mp1] = np.sqrt(1. - y)
-        C = np.einsum('ji...,jk...->...ik', P, P)
+            P[mp1, mp1] = np.sqrt(1.0 - y)
+        C = np.einsum("ji...,jk...->...ik", P, P)
         triu_idx = np.triu_indices(n, k=1)
         return C[..., triu_idx[0], triu_idx[1]]
 
@@ -1406,9 +1417,8 @@ def random(self, point=None, size=None):
         array
         """
         n, eta = draw_values([self.n, self.eta], point=point, size=size)
-        size= 1 if size is None else size
-        samples = generate_samples(self._random, n, eta,
-                                   broadcast_shape=(size,))
+        size = 1 if size is None else size
+        samples = generate_samples(self._random, n, eta, broadcast_shape=(size,))
         return samples
 
     def logp(self, x):
@@ -1432,12 +1442,14 @@ def logp(self, x):
         X = tt.fill_diagonal(X, 1)
 
         result = _lkj_normalizing_constant(eta, n)
-        result += (eta - 1.) * tt.log(det(X))
-        return bound(result,
-                     tt.all(X <= 1), tt.all(X >= -1),
-                     matrix_pos_def(X),
-                     eta > 0,
-                     broadcast_conditions=False
+        result += (eta - 1.0) * tt.log(det(X))
+        return bound(
+            result,
+            tt.all(X <= 1),
+            tt.all(X >= -1),
+            matrix_pos_def(X),
+            eta > 0,
+            broadcast_conditions=False,
         )
 
 
@@ -1530,12 +1542,22 @@ class MatrixNormal(Continuous):
                                    observed=data, shape=(m, n))
     """
 
-    def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None,
-                 colcov=None, colchol=None, coltau=None, shape=None, *args,
-                 **kwargs):
+    def __init__(
+        self,
+        mu=0,
+        rowcov=None,
+        rowchol=None,
+        rowtau=None,
+        colcov=None,
+        colchol=None,
+        coltau=None,
+        shape=None,
+        *args,
+        **kwargs,
+    ):
         self._setup_matrices(colcov, colchol, coltau, rowcov, rowchol, rowtau)
         if shape is None:
-            raise TypeError('shape is a required argument')
+            raise TypeError("shape is a required argument")
         assert len(shape) == 2, "shape must have length 2: mxn"
         self.shape = shape
         super().__init__(shape=shape, *args, **kwargs)
@@ -1545,64 +1567,68 @@ def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None,
         self.solve_upper = tt.slinalg.solve_upper_triangular
 
     def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
-        cholesky = Cholesky(lower=True, on_error='raise')
+        cholesky = Cholesky(lower=True, on_error="raise")
 
         # Among-row matrices
         if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of rowtau, rowcov, '
-                             'or rowchol.')
+            raise ValueError(
+                "Incompatible parameterization. "
+                "Specify exactly one of rowtau, rowcov, "
+                "or rowchol."
+            )
         if rowcov is not None:
             self.m = rowcov.shape[0]
-            self._rowcov_type = 'cov'
+            self._rowcov_type = "cov"
             rowcov = tt.as_tensor_variable(rowcov)
             if rowcov.ndim != 2:
-                raise ValueError('rowcov must be two dimensional.')
+                raise ValueError("rowcov must be two dimensional.")
             self.rowchol_cov = cholesky(rowcov)
             self.rowcov = rowcov
         elif rowtau is not None:
-            raise ValueError('rowtau not supported at this time')
+            raise ValueError("rowtau not supported at this time")
             self.m = rowtau.shape[0]
-            self._rowcov_type = 'tau'
+            self._rowcov_type = "tau"
             rowtau = tt.as_tensor_variable(rowtau)
             if rowtau.ndim != 2:
-                raise ValueError('rowtau must be two dimensional.')
+                raise ValueError("rowtau must be two dimensional.")
             self.rowchol_tau = cholesky(rowtau)
             self.rowtau = rowtau
         else:
             self.m = rowchol.shape[0]
-            self._rowcov_type = 'chol'
+            self._rowcov_type = "chol"
             if rowchol.ndim != 2:
-                raise ValueError('rowchol must be two dimensional.')
+                raise ValueError("rowchol must be two dimensional.")
             self.rowchol_cov = tt.as_tensor_variable(rowchol)
 
         # Among-column matrices
         if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of coltau, colcov, '
-                             'or colchol.')
+            raise ValueError(
+                "Incompatible parameterization. "
+                "Specify exactly one of coltau, colcov, "
+                "or colchol."
+            )
         if colcov is not None:
             self.n = colcov.shape[0]
-            self._colcov_type = 'cov'
+            self._colcov_type = "cov"
             colcov = tt.as_tensor_variable(colcov)
             if colcov.ndim != 2:
-                raise ValueError('colcov must be two dimensional.')
+                raise ValueError("colcov must be two dimensional.")
             self.colchol_cov = cholesky(colcov)
             self.colcov = colcov
         elif coltau is not None:
-            raise ValueError('coltau not supported at this time')
+            raise ValueError("coltau not supported at this time")
             self.n = coltau.shape[0]
-            self._colcov_type = 'tau'
+            self._colcov_type = "tau"
             coltau = tt.as_tensor_variable(coltau)
             if coltau.ndim != 2:
-                raise ValueError('coltau must be two dimensional.')
+                raise ValueError("coltau must be two dimensional.")
             self.colchol_tau = cholesky(coltau)
             self.coltau = coltau
         else:
             self.n = colchol.shape[0]
-            self._colcov_type = 'chol'
+            self._colcov_type = "chol"
             if colchol.ndim != 2:
-                raise ValueError('colchol must be two dimensional.')
+                raise ValueError("colchol must be two dimensional.")
             self.colchol_cov = tt.as_tensor_variable(colchol)
 
     def random(self, point=None, size=None):
@@ -1623,9 +1649,8 @@ def random(self, point=None, size=None):
         array
         """
         mu, colchol, rowchol = draw_values(
-                                [self.mu, self.colchol_cov, self.rowchol_cov],
-                                point=point,
-                                size=size)
+            [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size
+        )
         if size is None:
             size = ()
         if size in (None, ()):
@@ -1640,14 +1665,15 @@ def random(self, point=None, size=None):
                     samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)))
             else:
                 for j in range(np.prod(size)):
-                    standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1]))
-                    samples.append(mu[j] +
-                                np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)))
+                    standard_normal = np.random.standard_normal(
+                        (self.shape[0], colchol[j].shape[-1])
+                    )
+                    samples.append(
+                        mu[j] + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T))
+                    )
             samples = np.array(samples).reshape(size + tuple(self.shape))
         return samples
 
-
-
     def _trquaddist(self, value):
         """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and
         the logdet of colcov and rowcov."""
@@ -1686,8 +1712,8 @@ def logp(self, value):
         trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value)
         m = self.m
         n = self.n
-        norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi))
-        return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet
+        norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi))
+        return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet
 
 
 class KroneckerNormal(Continuous):
@@ -1779,24 +1805,23 @@ class KroneckerNormal(Continuous):
     .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models"
     """
 
-    def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None,
-                 *args, **kwargs):
+    def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs):
         self._setup(covs, chols, evds, sigma)
         super().__init__(*args, **kwargs)
         self.mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu
 
     def _setup(self, covs, chols, evds, sigma):
-        self.cholesky = Cholesky(lower=True, on_error='raise')
+        self.cholesky = Cholesky(lower=True, on_error="raise")
         if len([i for i in [covs, chols, evds] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of covs, chols, '
-                             'or evds.')
+            raise ValueError(
+                "Incompatible parameterization. " "Specify exactly one of covs, chols, " "or evds."
+            )
         self._isEVD = False
         self.sigma = sigma
         self.is_noisy = self.sigma is not None and self.sigma != 0
         if covs is not None:
-            self._cov_type = 'cov'
+            self._cov_type = "cov"
             self.covs = covs
             if self.is_noisy:
                 # Noise requires eigendecomposition
@@ -1806,11 +1831,10 @@ def _setup(self, covs, chols, evds, sigma):
                 # Otherwise use cholesky as usual
                 self.chols = list(map(self.cholesky, self.covs))
                 self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
-                self.sizes = tt.as_tensor_variable(
-                                [chol.shape[0] for chol in self.chols])
+                self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols])
                 self.N = tt.prod(self.sizes)
         elif chols is not None:
-            self._cov_type = 'chol'
+            self._cov_type = "chol"
             if self.is_noisy:  # A strange case...
                 # Noise requires eigendecomposition
                 covs = [tt.dot(chol, chol.T) for chol in chols]
@@ -1819,11 +1843,10 @@ def _setup(self, covs, chols, evds, sigma):
             else:
                 self.chols = chols
                 self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
-                self.sizes = tt.as_tensor_variable(
-                                [chol.shape[0] for chol in self.chols])
+                self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols])
                 self.N = tt.prod(self.sizes)
         else:
-            self._cov_type = 'evd'
+            self._cov_type = "evd"
             self._setup_evd(evds)
 
     def _setup_evd(self, eigh_iterable):
@@ -1835,18 +1858,18 @@ def _setup_evd(self, eigh_iterable):
         self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep))
         self.eigs = kron_diag(*self.eigs_sep)  # Combine separate eigs
         if self.is_noisy:
-            self.eigs += self.sigma**2
+            self.eigs += self.sigma ** 2
         self.N = self.eigs.shape[0]
 
     def _setup_random(self):
-        if not hasattr(self, 'mv_params'):
-            self.mv_params = {'mu': self.mu}
-            if self._cov_type == 'cov':
+        if not hasattr(self, "mv_params"):
+            self.mv_params = {"mu": self.mu}
+            if self._cov_type == "cov":
                 cov = kronecker(*self.covs)
                 if self.is_noisy:
-                    cov = cov + self.sigma**2 * tt.identity_like(cov)
-                self.mv_params['cov'] = cov
-            elif self._cov_type == 'chol':
+                    cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+                self.mv_params["cov"] = cov
+            elif self._cov_type == "chol":
                 if self.is_noisy:
                     covs = []
                     for eig, Q in zip(self.eigs_sep, self.Qs):
@@ -1854,19 +1877,19 @@ def _setup_random(self):
                         covs.append(cov_i)
                     cov = kronecker(*covs)
                     if self.is_noisy:
-                        cov = cov + self.sigma**2 * tt.identity_like(cov)
-                    self.mv_params['chol'] = self.cholesky(cov)
+                        cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+                    self.mv_params["chol"] = self.cholesky(cov)
                 else:
-                    self.mv_params['chol'] = kronecker(*self.chols)
-            elif self._cov_type == 'evd':
+                    self.mv_params["chol"] = kronecker(*self.chols)
+            elif self._cov_type == "evd":
                 covs = []
                 for eig, Q in zip(self.eigs_sep, self.Qs):
                     cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
                     covs.append(cov_i)
                 cov = kronecker(*covs)
                 if self.is_noisy:
-                    cov = cov + self.sigma**2 * tt.identity_like(cov)
-                self.mv_params['cov'] = cov
+                    cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+                self.mv_params["cov"] = cov
 
     def random(self, point=None, size=None):
         """
@@ -1894,7 +1917,7 @@ def random(self, point=None, size=None):
     def _quaddist(self, value):
         """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))"""
         if value.ndim > 2 or value.ndim == 0:
-            raise ValueError('Invalid dimension for value: %s' % value.ndim)
+            raise ValueError("Invalid dimension for value: %s" % value.ndim)
         if value.ndim == 1:
             onedim = True
             value = value[None, :]
@@ -1904,14 +1927,14 @@ def _quaddist(self, value):
         delta = value - self.mu
         if self._isEVD:
             sqrt_quad = kron_dot(self.QTs, delta.T)
-            sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None])
+            sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None])
             logdet = tt.sum(tt.log(self.eigs))
         else:
             sqrt_quad = kron_solve_lower(self.chols, delta.T)
             logdet = 0
             for chol_size, chol_diag in zip(self.sizes, self.chol_diags):
-                logchol = tt.log(chol_diag) * self.N/chol_size
-                logdet += tt.sum(2*logchol)
+                logchol = tt.log(chol_diag) * self.N / chol_size
+                logdet += tt.sum(2 * logchol)
         # Square each sample
         quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T)
         if onedim:
@@ -1933,4 +1956,4 @@ def logp(self, value):
         TensorVariable
         """
         quad, logdet = self._quaddist(value)
-        return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0
+        return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0
diff --git a/pymc3/distributions/shape_utils.py b/pymc3/distributions/shape_utils.py
index b0ff463a05..c41507620f 100644
--- a/pymc3/distributions/shape_utils.py
+++ b/pymc3/distributions/shape_utils.py
@@ -64,9 +64,7 @@ def _check_shape_type(shape):
                 raise TypeError(f"Value {s} is not a valid integer")
             out.append(o)
     except Exception:
-        raise TypeError(
-            f"Supplied value {shape} does not represent a valid shape"
-        )
+        raise TypeError(f"Supplied value {shape} does not represent a valid shape")
     return tuple(out)
 
 
@@ -172,10 +170,7 @@ def broadcast_dist_samples_shape(shapes, size=None):
     shapes = [_check_shape_type(s) for s in shapes]
     _size = to_tuple(size)
     # samples shapes without the size prepend
-    sp_shapes = [
-        s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s
-        for s in shapes
-    ]
+    sp_shapes = [s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s for s in shapes]
     try:
         broadcast_shape = shapes_broadcasting(*sp_shapes, raise_exception=True)
     except ValueError:
@@ -277,8 +272,7 @@ def get_broadcastable_dist_samples(
     out_shape = broadcast_dist_samples_shape(p_shapes, size=size)
     # samples shapes without the size prepend
     sp_shapes = [
-        s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s
-        for s in p_shapes
+        s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s for s in p_shapes
     ]
     broadcast_shape = shapes_broadcasting(*sp_shapes, raise_exception=True)
     broadcastable_samples = []
diff --git a/pymc3/distributions/simulator.py b/pymc3/distributions/simulator.py
index 026fc2f9bb..218611a577 100644
--- a/pymc3/distributions/simulator.py
+++ b/pymc3/distributions/simulator.py
@@ -130,7 +130,6 @@ def _str_repr(self, name=None, dist=None, formatting="plain"):
             return f"{name} ~ Simulator({function}({params}), {distance}, {sum_stat})"
 
 
-
 def identity(x):
     """Identity function, used as a summary statistics."""
     return x
diff --git a/pymc3/distributions/special.py b/pymc3/distributions/special.py
index 2521e190d8..1401596486 100644
--- a/pymc3/distributions/special.py
+++ b/pymc3/distributions/special.py
@@ -17,10 +17,10 @@
 from theano.scalar.basic_scipy import GammaLn, Psi
 from theano import scalar
 
-__all__ = ['gammaln', 'multigammaln', 'psi', 'log_i0']
+__all__ = ["gammaln", "multigammaln", "psi", "log_i0"]
 
-scalar_gammaln = GammaLn(scalar.upgrade_to_float, name='scalar_gammaln')
-gammaln = tt.Elemwise(scalar_gammaln, name='gammaln')
+scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln")
+gammaln = tt.Elemwise(scalar_gammaln, name="gammaln")
 
 
 def multigammaln(a, p):
@@ -33,21 +33,33 @@ def multigammaln(a, p):
        degrees of freedom. p > 0
     """
     i = tt.arange(1, p + 1)
-    return (p * (p - 1) * tt.log(np.pi) / 4.
-            + tt.sum(gammaln(a + (1. - i) / 2.), axis=0))
+    return p * (p - 1) * tt.log(np.pi) / 4.0 + tt.sum(gammaln(a + (1.0 - i) / 2.0), axis=0)
 
 
 def log_i0(x):
     """
     Calculates the logarithm of the 0 order modified Bessel function of the first kind""
     """
-    return tt.switch(tt.lt(x, 5), tt.log1p(x**2. / 4. + x**4. / 64. + x**6. / 2304.
-                                           + x**8. / 147456. + x**10. / 14745600.
-                                           + x**12. / 2123366400.),
-                                  x - 0.5 * tt.log(2. * np.pi * x) + tt.log1p(1. / (8. * x)
-                                  + 9. / (128. * x**2.) + 225. / (3072. * x**3.)
-                                  + 11025. / (98304. * x**4.)))
+    return tt.switch(
+        tt.lt(x, 5),
+        tt.log1p(
+            x ** 2.0 / 4.0
+            + x ** 4.0 / 64.0
+            + x ** 6.0 / 2304.0
+            + x ** 8.0 / 147456.0
+            + x ** 10.0 / 14745600.0
+            + x ** 12.0 / 2123366400.0
+        ),
+        x
+        - 0.5 * tt.log(2.0 * np.pi * x)
+        + tt.log1p(
+            1.0 / (8.0 * x)
+            + 9.0 / (128.0 * x ** 2.0)
+            + 225.0 / (3072.0 * x ** 3.0)
+            + 11025.0 / (98304.0 * x ** 4.0)
+        ),
+    )
 
 
-scalar_psi = Psi(scalar.upgrade_to_float, name='scalar_psi')
-psi = tt.Elemwise(scalar_psi, name='psi')
+scalar_psi = Psi(scalar.upgrade_to_float, name="scalar_psi")
+psi = tt.Elemwise(scalar_psi, name="psi")
diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py
index 3956550d82..ab637cd547 100644
--- a/pymc3/distributions/timeseries.py
+++ b/pymc3/distributions/timeseries.py
@@ -112,15 +112,7 @@ class AR(distribution.Continuous):
     """
 
     def __init__(
-        self,
-        rho,
-        sigma=None,
-        tau=None,
-        constant=False,
-        init=Flat.dist(),
-        sd=None,
-        *args,
-        **kwargs
+        self, rho, sigma=None, tau=None, constant=False, init=Flat.dist(), sd=None, *args, **kwargs
     ):
         super().__init__(*args, **kwargs)
         if sd is not None:
@@ -170,10 +162,7 @@ def logp(self, value):
         """
         if self.constant:
             x = tt.add(
-                *[
-                    self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)]
-                    for i in range(self.p)
-                ]
+                *[self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)]
             )
             eps = value[self.p :] - self.rho[0] - x
         else:
@@ -181,10 +170,7 @@ def logp(self, value):
                 x = self.rho * value[:-1]
             else:
                 x = tt.add(
-                    *[
-                        self.rho[i] * value[self.p - (i + 1) : -(i + 1)]
-                        for i in range(self.p)
-                    ]
+                    *[self.rho[i] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)]
                 )
             eps = value[self.p :] - x
 
@@ -219,15 +205,11 @@ class GaussianRandomWalk(distribution.Continuous):
         distribution for initial value (Defaults to Flat())
     """
 
-    def __init__(
-        self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *args, **kwargs
-    ):
+    def __init__(self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *args, **kwargs):
         kwargs.setdefault("shape", 1)
         super().__init__(*args, **kwargs)
         if sum(self.shape) == 0:
-            raise TypeError(
-                "GaussianRandomWalk must be supplied a non-zero shape argument!"
-            )
+            raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
         if sd is not None:
             sigma = sd
             warnings.warn("sd is deprecated, use sigma instead", DeprecationWarning)
@@ -284,9 +266,7 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        sigma, mu = distribution.draw_values(
-            [self.sigma, self.mu], point=point, size=size
-        )
+        sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
         return distribution.generate_samples(
             self._random,
             sigma=sigma,
@@ -309,7 +289,7 @@ def _random(self, sigma, mu, size, sample_shape):
         rv = stats.norm(mu, sigma)
         data = rv.rvs(size).cumsum(axis=axis)
         data = np.array(data)
-        if len(data.shape)>1:
+        if len(data.shape) > 1:
             for i in range(data.shape[0]):
                 data[i] = data[i] - data[i][0]
         else:
@@ -454,15 +434,7 @@ class MvGaussianRandomWalk(distribution.Continuous):
     """
 
     def __init__(
-        self,
-        mu=0.0,
-        cov=None,
-        tau=None,
-        chol=None,
-        lower=True,
-        init=Flat.dist(),
-        *args,
-        **kwargs
+        self, mu=0.0, cov=None, tau=None, chol=None, lower=True, init=Flat.dist(), *args, **kwargs
     ):
         super().__init__(*args, **kwargs)
 
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index d9381a48a8..81a2c79ad6 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -235,8 +235,8 @@ def backward(self, x):
     def forward(self, x):
         """Inverse operation of softplus.
 
-            y = Log(Exp(x) - 1)
-              = Log(1 - Exp(-x)) + x
+        y = Log(Exp(x) - 1)
+          = Log(1 - Exp(-x)) + x
         """
         return tt.log(1.0 - tt.exp(-x)) + x
 
@@ -500,8 +500,7 @@ def t_stick_breaking(eps: float) -> StickBreaking:
 
 
 class Circular(ElemwiseTransform):
-    """Transforms a linear space into a circular one.
-    """
+    """Transforms a linear space into a circular one."""
 
     name = "circular"
 
diff --git a/pymc3/plots/__init__.py b/pymc3/plots/__init__.py
index 56a3442678..219b9e75cf 100644
--- a/pymc3/plots/__init__.py
+++ b/pymc3/plots/__init__.py
@@ -24,19 +24,23 @@
 
 import arviz as az
 
+
 def map_args(func):
-    swaps = [
-        ('varnames', 'var_names')
-    ]
+    swaps = [("varnames", "var_names")]
+
     @functools.wraps(func)
     def wrapped(*args, **kwargs):
         for (old, new) in swaps:
             if old in kwargs and new not in kwargs:
-                warnings.warn(f'Keyword argument `{old}` renamed to `{new}`, and will be removed in pymc3 3.8')
+                warnings.warn(
+                    f"Keyword argument `{old}` renamed to `{new}`, and will be removed in pymc3 3.8"
+                )
                 kwargs[new] = kwargs.pop(old)
             return func(*args, **kwargs)
+
     return wrapped
 
+
 # pymc3 custom plots: override these names for custom behavior
 autocorrplot = map_args(az.plot_autocorr)
 forestplot = map_args(az.plot_forest)
@@ -51,42 +55,52 @@ def wrapped(*args, **kwargs):
 @functools.wraps(az.plot_trace)
 def traceplot(*args, **kwargs):
     try:
-        kwargs.setdefault('compact', True)
+        kwargs.setdefault("compact", True)
         return az.plot_trace(*args, **kwargs)
     except TypeError:
-        kwargs.pop('compact')
+        kwargs.pop("compact")
         return az.plot_trace(*args, **kwargs)
 
+
 # addition arg mapping for compare plot
 @functools.wraps(az.plot_compare)
 def compareplot(*args, **kwargs):
-    if 'comp_df' in kwargs:
-        comp_df = kwargs['comp_df'].copy()
+    if "comp_df" in kwargs:
+        comp_df = kwargs["comp_df"].copy()
     else:
         args = list(args)
         comp_df = args[0].copy()
-    if 'WAIC' in comp_df.columns:
-        comp_df = comp_df.rename(index=str,
-                                 columns={'WAIC': 'waic',
-                                          'pWAIC': 'p_waic',
-                                          'dWAIC': 'd_waic',
-                                          'SE': 'se',
-                                          'dSE': 'dse',
-                                          'var_warn': 'warning'})
-    elif 'LOO' in comp_df.columns:
-        comp_df = comp_df.rename(index=str,
-                                 columns={'LOO': 'loo',
-                                          'pLOO': 'p_loo',
-                                          'dLOO': 'd_loo',
-                                          'SE': 'se',
-                                          'dSE': 'dse',
-                                          'shape_warn': 'warning'})
-    if 'comp_df' in kwargs:
-        kwargs['comp_df'] = comp_df
+    if "WAIC" in comp_df.columns:
+        comp_df = comp_df.rename(
+            index=str,
+            columns={
+                "WAIC": "waic",
+                "pWAIC": "p_waic",
+                "dWAIC": "d_waic",
+                "SE": "se",
+                "dSE": "dse",
+                "var_warn": "warning",
+            },
+        )
+    elif "LOO" in comp_df.columns:
+        comp_df = comp_df.rename(
+            index=str,
+            columns={
+                "LOO": "loo",
+                "pLOO": "p_loo",
+                "dLOO": "d_loo",
+                "SE": "se",
+                "dSE": "dse",
+                "shape_warn": "warning",
+            },
+        )
+    if "comp_df" in kwargs:
+        kwargs["comp_df"] = comp_df
     else:
         args[0] = comp_df
     return az.plot_compare(*args, **kwargs)
 
+
 from .posteriorplot import plot_posterior_predictive_glm
 
 
@@ -95,14 +109,14 @@ def compareplot(*args, **kwargs):
     setattr(sys.modules[__name__], plot, map_args(getattr(az.plots, plot)))
 
 __all__ = tuple(az.plots.__all__) + (
-    'autocorrplot',
-    'compareplot',
-    'forestplot',
-    'kdeplot',
-    'plot_posterior',
-    'traceplot',
-    'energyplot',
-    'densityplot',
-    'pairplot',
-    'plot_posterior_predictive_glm',
+    "autocorrplot",
+    "compareplot",
+    "forestplot",
+    "kdeplot",
+    "plot_posterior",
+    "traceplot",
+    "energyplot",
+    "densityplot",
+    "pairplot",
+    "plot_posterior_predictive_glm",
 )
diff --git a/pymc3/stats/__init__.py b/pymc3/stats/__init__.py
index 9b059b4dcb..d4670d67ce 100644
--- a/pymc3/stats/__init__.py
+++ b/pymc3/stats/__init__.py
@@ -23,6 +23,7 @@
 
 import arviz as az
 
+
 def map_args(func):
     swaps = [("varnames", "var_names")]
 
diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py
index a53ab44ba9..9dbe93a94a 100644
--- a/pymc3/variational/__init__.py
+++ b/pymc3/variational/__init__.py
@@ -27,35 +27,16 @@
     adam,
     adamax,
     norm_constraint,
-    total_norm_constraint
+    total_norm_constraint,
 )
 
 from . import inference
-from .inference import (
-    ADVI,
-    FullRankADVI,
-    SVGD,
-    ASVGD,
-    NFVI,
-    Inference,
-    KLqp,
-    ImplicitGradient,
-    fit
-)
+from .inference import ADVI, FullRankADVI, SVGD, ASVGD, NFVI, Inference, KLqp, ImplicitGradient, fit
 
 from . import approximations
-from .approximations import (
-    MeanField,
-    FullRank,
-    Empirical,
-    NormalizingFlow,
-    sample_approx
-)
+from .approximations import MeanField, FullRank, Empirical, NormalizingFlow, sample_approx
 from . import opvi
-from .opvi import (
-    Group,
-    Approximation
-)
+from .opvi import Group, Approximation
 
 # special
 from .stein import Stein

From 154ea6384908fec6ff642923f7e26fba3c5ce1d2 Mon Sep 17 00:00:00 2001
From: Ali Akbar Septiandri <mail.septiandri@gmail.com>
Date: Sun, 27 Sep 2020 12:17:42 +0100
Subject: [PATCH 2/7] Run black on step_methods/

---
 pymc3/step_methods/arraystep.py         |  47 ++--
 pymc3/step_methods/compound.py          |  13 +-
 pymc3/step_methods/elliptical_slice.py  |   7 +-
 pymc3/step_methods/gibbs.py             |  27 ++-
 pymc3/step_methods/hmc/base_hmc.py      |  24 +-
 pymc3/step_methods/hmc/hmc.py           |  71 +++---
 pymc3/step_methods/hmc/integration.py   |  13 +-
 pymc3/step_methods/hmc/nuts.py          |  38 +--
 pymc3/step_methods/hmc/quadpotential.py | 113 +++++----
 pymc3/step_methods/metropolis.py        | 309 ++++++++++++++----------
 pymc3/step_methods/sgmcmc.py            |  63 +++--
 pymc3/step_methods/slicer.py            |  27 ++-
 pymc3/step_methods/step_sizes.py        |  24 +-
 13 files changed, 410 insertions(+), 366 deletions(-)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 863c7a8075..d3fc891270 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -21,8 +21,7 @@
 from numpy.random import uniform
 from enum import IntEnum, unique
 
-__all__ = [
-    'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence']
+__all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"]
 
 
 @unique
@@ -34,6 +33,7 @@ class Competence(IntEnum):
     2: PREFERRED
     3: IDEAL
     """
+
     INCOMPATIBLE = 0
     COMPATIBLE = 1
     PREFERRED = 2
@@ -45,21 +45,21 @@ class BlockedStep:
     generates_stats = False
 
     def __new__(cls, *args, **kwargs):
-        blocked = kwargs.get('blocked')
+        blocked = kwargs.get("blocked")
         if blocked is None:
             # Try to look up default value from class
-            blocked = getattr(cls, 'default_blocked', True)
-            kwargs['blocked'] = blocked
+            blocked = getattr(cls, "default_blocked", True)
+            kwargs["blocked"] = blocked
 
-        model = modelcontext(kwargs.get('model'))
-        kwargs.update({'model':model})
+        model = modelcontext(kwargs.get("model"))
+        kwargs.update({"model": model})
 
         # vars can either be first arg or a kwarg
-        if 'vars' not in kwargs and len(args) >= 1:
+        if "vars" not in kwargs and len(args) >= 1:
             vars = args[0]
             args = args[1:]
-        elif 'vars' in kwargs:
-            vars = kwargs.pop('vars')
+        elif "vars" in kwargs:
+            vars = kwargs.pop("vars")
         else:  # Assume all model variables
             vars = model.vars
 
@@ -67,7 +67,7 @@ def __new__(cls, *args, **kwargs):
         vars = inputvars(vars)
 
         if len(vars) == 0:
-            raise ValueError('No free random variables to sample.')
+            raise ValueError("No free random variables to sample.")
 
         if not blocked and len(vars) > 1:
             # In this case we create a separate sampler for each var
@@ -79,14 +79,14 @@ def __new__(cls, *args, **kwargs):
                 # call __init__
                 step.__init__([var], *args, **kwargs)
                 # Hack for creating the class correctly when unpickling.
-                step.__newargs = ([var], ) + args, kwargs
+                step.__newargs = ([var],) + args, kwargs
                 steps.append(step)
 
             return CompoundStep(steps)
         else:
             step = super().__new__(cls)
             # Hack for creating the class correctly when unpickling.
-            step.__newargs = (vars, ) + args, kwargs
+            step.__newargs = (vars,) + args, kwargs
             return step
 
     # Hack for creating the class correctly when unpickling.
@@ -119,7 +119,7 @@ def vars_shape_dtype(self):
         return shape_dtypes
 
     def stop_tuning(self):
-        if hasattr(self, 'tune'):
+        if hasattr(self, "tune"):
             self.tune = False
 
 
@@ -228,22 +228,24 @@ def link_population(self, population, chain_index):
         self.other_chains = [c for c in range(len(population)) if c != chain_index]
         if not len(self.other_chains) > 1:
             raise ValueError(
-                'Population is just {} + {}. ' \
-                'This is too small and the error should have been raised earlier.'.format(self.this_chain, self.other_chains)
+                "Population is just {} + {}. "
+                "This is too small and the error should have been raised earlier.".format(
+                    self.this_chain, self.other_chains
+                )
             )
         return
 
 
 class GradientSharedStep(BlockedStep):
-    def __init__(self, vars, model=None, blocked=True,
-                 dtype=None, logp_dlogp_func=None, **theano_kwargs):
+    def __init__(
+        self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **theano_kwargs
+    ):
         model = modelcontext(model)
         self.vars = vars
         self.blocked = blocked
 
         if logp_dlogp_func is None:
-            func = model.logp_dlogp_function(
-                vars, dtype=dtype, **theano_kwargs)
+            func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs)
         else:
             func = logp_dlogp_func
 
@@ -255,9 +257,8 @@ def __init__(self, vars, model=None, blocked=True,
         except ValueError:
             if logp_dlogp_func is not None:
                 raise
-            theano_kwargs.update(mode='FAST_COMPILE')
-            func = model.logp_dlogp_function(
-                vars, dtype=dtype, **theano_kwargs)
+            theano_kwargs.update(mode="FAST_COMPILE")
+            func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs)
 
         self._logp_dlogp_func = func
 
diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py
index f3d7337ac2..15809ccda4 100644
--- a/pymc3/step_methods/compound.py
+++ b/pymc3/step_methods/compound.py
@@ -12,11 +12,11 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-'''
+"""
 Created on Mar 7, 2011
 
 @author: johnsalvatier
-'''
+"""
 from collections import namedtuple
 import numpy as np
 
@@ -27,8 +27,7 @@ class CompoundStep:
 
     def __init__(self, methods):
         self.methods = list(methods)
-        self.generates_stats = any(
-            method.generates_stats for method in self.methods)
+        self.generates_stats = any(method.generates_stats for method in self.methods)
         self.stats_dtypes = []
         for method in self.methods:
             if method.generates_stats:
@@ -47,7 +46,7 @@ def step(self, point):
             # one. Pop all others (if dict), or set to np.nan (if namedtuple).
             for state in states[:-1]:
                 if isinstance(state, dict):
-                    state.pop('model_logp', None)
+                    state.pop("model_logp", None)
                 elif isinstance(state, namedtuple):
                     state = state._replace(logp=np.nan)
             return point, states
@@ -59,7 +58,7 @@ def step(self, point):
     def warnings(self):
         warns = []
         for method in self.methods:
-            if hasattr(method, 'warnings'):
+            if hasattr(method, "warnings"):
                 warns.extend(method.warnings())
         return warns
 
@@ -69,7 +68,7 @@ def stop_tuning(self):
 
     def reset_tuning(self):
         for method in self.methods:
-            if hasattr(method, 'reset_tuning'):
+            if hasattr(method, "reset_tuning"):
                 method.reset_tuning()
 
     @property
diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py
index 31904e8105..65944cdf87 100644
--- a/pymc3/step_methods/elliptical_slice.py
+++ b/pymc3/step_methods/elliptical_slice.py
@@ -21,7 +21,7 @@
 from ..theanof import inputvars
 from ..distributions import draw_values
 
-__all__ = ['EllipticalSlice']
+__all__ = ["EllipticalSlice"]
 
 
 def get_chol(cov, chol):
@@ -41,7 +41,7 @@ def get_chol(cov, chol):
     """
 
     if len([i for i in [cov, chol] if i is not None]) != 1:
-        raise ValueError('Must pass exactly one of cov or chol')
+        raise ValueError("Must pass exactly one of cov or chol")
 
     if cov is not None:
         chol = tt.slinalg.cholesky(cov)
@@ -83,8 +83,7 @@ class EllipticalSlice(ArrayStep):
 
     default_blocked = True
 
-    def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None,
-                 **kwargs):
+    def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs):
         self.model = modelcontext(model)
         chol = get_chol(prior_cov, prior_chol)
         self.prior_chol = tt.as_tensor_variable(chol)
diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py
index 9a72172907..4f1dc09f50 100644
--- a/pymc3/step_methods/gibbs.py
+++ b/pymc3/step_methods/gibbs.py
@@ -12,11 +12,11 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-'''
+"""
 Created on May 12, 2012
 
 @author: john
-'''
+"""
 from .arraystep import ArrayStep, Competence
 from ..distributions.discrete import Categorical
 from numpy import array, max, exp, cumsum, nested_iters, empty, searchsorted, ones, arange
@@ -26,7 +26,8 @@
 from theano.gof.graph import inputs
 from theano.tensor import add
 from ..model import modelcontext
-__all__ = ['ElemwiseCategorical']
+
+__all__ = ["ElemwiseCategorical"]
 
 
 class ElemwiseCategorical(ArrayStep):
@@ -35,13 +36,17 @@ class ElemwiseCategorical(ArrayStep):
     the variable can't be indexed into or transposed or anything otherwise that will mess things up
 
     """
+
     # TODO: It would be great to come up with a way to make
     # ElemwiseCategorical  more general (handling more complex elementwise
     # variables)
 
     def __init__(self, vars, values=None, model=None):
-        warn('ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.',
-             DeprecationWarning, stacklevel = 2)
+        warn(
+            "ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         model = modelcontext(model)
         self.var = vars[0]
         self.sh = ones(self.var.dshape, self.var.dtype)
@@ -64,8 +69,7 @@ def competence(var, has_grad):
 
 
 def elemwise_logp(model, var):
-    terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([
-                                                                        v.logpt])]
+    terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([v.logpt])]
     return model.fn(add(*terms))
 
 
@@ -73,9 +77,12 @@ def categorical(prob, shape):
     out = empty([1] + list(shape))
 
     n = len(shape)
-    it0, it1 = nested_iters([prob, out], [list(range(1, n + 1)), [0]],
-                            op_flags=[['readonly'], ['readwrite']],
-                            flags=['reduce_ok'])
+    it0, it1 = nested_iters(
+        [prob, out],
+        [list(range(1, n + 1)), [0]],
+        op_flags=[["readonly"], ["readwrite"]],
+        flags=["reduce_ok"],
+    )
 
     for _ in it0:
         p, o = it1.itviews
diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index 8551e45962..734f51e055 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -29,15 +29,9 @@
 
 logger = logging.getLogger("pymc3")
 
-HMCStepData = namedtuple(
-    "HMCStepData",
-    "end, accept_stat, divergence_info, stats"
-)
+HMCStepData = namedtuple("HMCStepData", "end, accept_stat, divergence_info, stats")
 
-DivergenceInfo = namedtuple(
-    "DivergenceInfo",
-    "message, exec_info, state, state_div"
-)
+DivergenceInfo = namedtuple("DivergenceInfo", "message, exec_info, state, state_div")
 
 
 class BaseHMC(arraystep.GradientSharedStep):
@@ -121,9 +115,7 @@ def __init__(
         else:
             self.potential = quad_potential(scaling, is_cov)
 
-        self.integrator = integration.CpuLeapfrogIntegrator(
-            self.potential, self._logp_dlogp_func
-        )
+        self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
 
         self._step_rand = step_rand
         self._warnings = []
@@ -154,8 +146,7 @@ def astep(self, q0):
             self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
             message_energy = (
                 "Bad initial energy, check any log probabilities that "
-                "are inf or -inf, nan or very small:\n{}"
-                .format(error_logp.to_string())
+                "are inf or -inf, nan or very small:\n{}".format(error_logp.to_string())
             )
             warning = SamplerWarning(
                 WarningType.BAD_ENERGY,
@@ -194,9 +185,7 @@ def astep(self, q0):
                 if self._num_divs_sample < 100 and info.state is not None:
                     point = self._logp_dlogp_func.array_to_dict(info.state.q)
                 if self._num_divs_sample < 100 and info.state_div is not None:
-                    point_dest = self._logp_dlogp_func.array_to_dict(
-                        info.state_div.q
-                    )
+                    point_dest = self._logp_dlogp_func.array_to_dict(info.state_div.q)
                 if self._num_divs_sample < 100:
                     info_store = info
             warning = SamplerWarning(
@@ -246,8 +235,7 @@ def warnings(self):
         n_divs = self._num_divs_sample
         if n_divs and self._samples_after_tune == n_divs:
             message = (
-                "The chain contains only diverging samples. The model "
-                "is probably misspecified."
+                "The chain contains only diverging samples. The model " "is probably misspecified."
             )
         elif n_divs == 1:
             message = (
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
index 6b68662fc2..621e7454ce 100644
--- a/pymc3/step_methods/hmc/hmc.py
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -20,10 +20,10 @@
 from pymc3.step_methods.hmc.base_hmc import BaseHMC, HMCStepData, DivergenceInfo
 
 
-__all__ = ['HamiltonianMC']
+__all__ = ["HamiltonianMC"]
 
 
-def unif(step_size, elow=.85, ehigh=1.15):
+def unif(step_size, elow=0.85, ehigh=1.15):
     return np.random.uniform(elow, ehigh) * step_size
 
 
@@ -33,27 +33,29 @@ class HamiltonianMC(BaseHMC):
     See NUTS sampler for automatically tuned stopping time and step size scaling.
     """
 
-    name = 'hmc'
+    name = "hmc"
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [{
-        'step_size': np.float64,
-        'n_steps': np.int64,
-        'tune': np.bool,
-        'step_size_bar': np.float64,
-        'accept': np.float64,
-        'diverging': np.bool,
-        'energy_error': np.float64,
-        'energy': np.float64,
-        'path_length': np.float64,
-        'accepted': np.bool,
-        'model_logp': np.float64,
-        'process_time_diff': np.float64,
-        'perf_counter_diff': np.float64,
-        'perf_counter_start': np.float64,
-    }]
-
-    def __init__(self, vars=None, path_length=2., max_steps=1024, **kwargs):
+    stats_dtypes = [
+        {
+            "step_size": np.float64,
+            "n_steps": np.int64,
+            "tune": np.bool,
+            "step_size_bar": np.float64,
+            "accept": np.float64,
+            "diverging": np.bool,
+            "energy_error": np.float64,
+            "energy": np.float64,
+            "path_length": np.float64,
+            "accepted": np.bool,
+            "model_logp": np.float64,
+            "process_time_diff": np.float64,
+            "perf_counter_diff": np.float64,
+            "perf_counter_start": np.float64,
+        }
+    ]
+
+    def __init__(self, vars=None, path_length=2.0, max_steps=1024, **kwargs):
         """Set up the Hamiltonian Monte Carlo sampler.
 
         Parameters
@@ -104,8 +106,8 @@ def __init__(self, vars=None, path_length=2., max_steps=1024, **kwargs):
             The model
         **kwargs: passed to BaseHMC
         """
-        kwargs.setdefault('step_rand', unif)
-        kwargs.setdefault('target_accept', 0.65)
+        kwargs.setdefault("step_rand", unif)
+        kwargs.setdefault("target_accept", 0.65)
         super().__init__(vars, **kwargs)
         self.path_length = path_length
         self.max_steps = max_steps
@@ -123,18 +125,17 @@ def _hamiltonian_step(self, start, p0, step_size):
                 last = state
                 state = self.integrator.step(step_size, state)
         except IntegrationError as e:
-            div_info = DivergenceInfo('Integration failed.', e, last, None)
+            div_info = DivergenceInfo("Integration failed.", e, last, None)
         else:
             if not np.isfinite(state.energy):
-                div_info = DivergenceInfo(
-                    'Divergence encountered, bad energy.', None, last, state)
+                div_info = DivergenceInfo("Divergence encountered, bad energy.", None, last, state)
             energy_change = start.energy - state.energy
             if np.isnan(energy_change):
                 energy_change = -np.inf
             if np.abs(energy_change) > self.Emax:
                 div_info = DivergenceInfo(
-                    'Divergence encountered, large integration error.',
-                    None, last, state)
+                    "Divergence encountered, large integration error.", None, last, state
+                )
 
         accept_stat = min(1, np.exp(energy_change))
 
@@ -146,13 +147,13 @@ def _hamiltonian_step(self, start, p0, step_size):
             accepted = True
 
         stats = {
-            'path_length': self.path_length,
-            'n_steps': n_steps,
-            'accept': accept_stat,
-            'energy_error': energy_change,
-            'energy': state.energy,
-            'accepted': accepted,
-            'model_logp': state.model_logp,
+            "path_length": self.path_length,
+            "n_steps": n_steps,
+            "accept": accept_stat,
+            "energy_error": energy_change,
+            "energy": state.energy,
+            "accepted": accepted,
+            "model_logp": state.model_logp,
         }
         return HMCStepData(end, accept_stat, div_info, stats)
 
diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py
index 2e52e094be..9cf37978a4 100644
--- a/pymc3/step_methods/hmc/integration.py
+++ b/pymc3/step_methods/hmc/integration.py
@@ -18,7 +18,7 @@
 from scipy import linalg
 
 
-State = namedtuple("State", 'q, p, v, q_grad, energy, model_logp')
+State = namedtuple("State", "q, p, v, q_grad, energy, model_logp")
 
 
 class IntegrationError(RuntimeError):
@@ -32,14 +32,15 @@ def __init__(self, potential, logp_dlogp_func):
         self._logp_dlogp_func = logp_dlogp_func
         self._dtype = self._logp_dlogp_func.dtype
         if self._potential.dtype != self._dtype:
-            raise ValueError("dtypes of potential (%s) and logp function (%s)"
-                             "don't match."
-                             % (self._potential.dtype, self._dtype))
+            raise ValueError(
+                "dtypes of potential (%s) and logp function (%s)"
+                "don't match." % (self._potential.dtype, self._dtype)
+            )
 
     def compute_state(self, q, p):
         """Compute Hamiltonian functions using a position and momentum."""
         if q.dtype != self._dtype or p.dtype != self._dtype:
-            raise ValueError('Invalid dtype. Must be %s' % self._dtype)
+            raise ValueError("Invalid dtype. Must be %s" % self._dtype)
         logp, dlogp = self._logp_dlogp_func(q)
         v = self._potential.velocity(p)
         kinetic = self._potential.energy(p, velocity=v)
@@ -79,7 +80,7 @@ def step(self, epsilon, state):
                 raise
 
     def _step(self, epsilon, state):
-        axpy = linalg.blas.get_blas_funcs('axpy', dtype=self._dtype)
+        axpy = linalg.blas.get_blas_funcs("axpy", dtype=self._dtype)
         pot = self._potential
 
         q_new = state.q.copy()
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 65079d582d..782a350654 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -210,7 +210,7 @@ def warnings(self):
                 "The chain reached the maximum tree depth. Increase "
                 "max_treedepth, increase target_accept or reparameterize."
             )
-            warn = SamplerWarning(WarningType.TREEDEPTH, msg, 'warn')
+            warn = SamplerWarning(WarningType.TREEDEPTH, msg, "warn")
             warnings.append(warn)
         return warnings
 
@@ -249,9 +249,7 @@ def __init__(self, ndim, integrator, start, step_size, Emax):
         self.start_energy = np.array(start.energy)
 
         self.left = self.right = start
-        self.proposal = Proposal(
-            start.q, start.q_grad, start.energy, 1.0, start.model_logp
-        )
+        self.proposal = Proposal(start.q, start.q_grad, start.energy, 1.0, start.model_logp)
         self.depth = 0
         self.log_size = 0
         self.log_weighted_accept_sum = -np.inf
@@ -313,13 +311,9 @@ def extend(self, direction):
             p_sum = self.p_sum
             turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0)
             p_sum1 = leftmost_p_sum + rightmost_begin.p
-            turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (
-                p_sum1.dot(rightmost_begin.v) <= 0
-            )
+            turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (p_sum1.dot(rightmost_begin.v) <= 0)
             p_sum2 = leftmost_end.p + rightmost_p_sum
-            turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (
-                p_sum2.dot(rightmost_end.v) <= 0
-            )
+            turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot(rightmost_end.v) <= 0)
             turning = turning | turning1 | turning2
 
         return diverging, turning
@@ -354,14 +348,10 @@ def _single_step(self, left, epsilon):
                     log_p_accept_weighted,
                     right.model_logp,
                 )
-                tree = Subtree(
-                    right, right, right.p, proposal, log_size, log_p_accept_weighted, 1
-                )
+                tree = Subtree(right, right, right.p, proposal, log_size, log_p_accept_weighted, 1)
                 return tree, None, False
             else:
-                error_msg = (
-                    "Energy change in leapfrog step is too large: %s." % energy_change
-                )
+                error_msg = "Energy change in leapfrog step is too large: %s." % energy_change
                 error = None
         tree = Subtree(None, None, None, None, -np.inf, -np.inf, 1)
         divergance_info = DivergenceInfo(error_msg, error, left, right)
@@ -385,13 +375,9 @@ def _build_subtree(self, left, depth, epsilon):
             # Additional U turn check only when depth > 1 to avoid redundant work.
             if depth - 1 > 0:
                 p_sum1 = tree1.p_sum + tree2.left.p
-                turning1 = (p_sum1.dot(tree1.left.v) <= 0) or (
-                    p_sum1.dot(tree2.left.v) <= 0
-                )
+                turning1 = (p_sum1.dot(tree1.left.v) <= 0) or (p_sum1.dot(tree2.left.v) <= 0)
                 p_sum2 = tree1.right.p + tree2.p_sum
-                turning2 = (p_sum2.dot(tree1.right.v) <= 0) or (
-                    p_sum2.dot(tree2.right.v) <= 0
-                )
+                turning2 = (p_sum2.dot(tree1.right.v) <= 0) or (p_sum2.dot(tree2.right.v) <= 0)
                 turning = turning | turning1 | turning2
 
             log_size = np.logaddexp(tree1.log_size, tree2.log_size)
@@ -410,9 +396,7 @@ def _build_subtree(self, left, depth, epsilon):
 
         n_proposals = tree1.n_proposals + tree2.n_proposals
 
-        tree = Subtree(
-            left, right, p_sum, proposal, log_size, log_weighted_accept_sum, n_proposals
-        )
+        tree = Subtree(left, right, p_sum, proposal, log_size, log_weighted_accept_sum, n_proposals)
         return tree, diverging, turning
 
     def stats(self):
@@ -421,9 +405,7 @@ def stats(self):
             # Remove contribution from initial state which is always a perfect
             # accept
             log_sum_weight = logdiffexp_numpy(self.log_size, 0.0)
-            self.mean_tree_accept = np.exp(
-                self.log_weighted_accept_sum - log_sum_weight
-            )
+            self.mean_tree_accept = np.exp(self.log_weighted_accept_sum - log_sum_weight)
         return {
             "depth": self.depth,
             "mean_tree_accept": self.mean_tree_accept,
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index c55f12a365..fd962f48cb 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -22,9 +22,15 @@
 from pymc3.theanof import floatX
 
 
-__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull',
-           'QuadPotentialFullInv', 'QuadPotentialDiagAdapt',
-           'QuadPotentialFullAdapt', 'isquadpotential']
+__all__ = [
+    "quad_potential",
+    "QuadPotentialDiag",
+    "QuadPotentialFull",
+    "QuadPotentialFullInv",
+    "QuadPotentialDiagAdapt",
+    "QuadPotentialFullAdapt",
+    "isquadpotential",
+]
 
 
 def quad_potential(C, is_cov):
@@ -56,7 +62,7 @@ def quad_potential(C, is_cov):
         if is_cov:
             return QuadPotentialDiag(C)
         else:
-            return QuadPotentialDiag(1. / C)
+            return QuadPotentialDiag(1.0 / C)
     else:
         if is_cov:
             return QuadPotentialFull(C)
@@ -70,11 +76,10 @@ def partial_check_positive_definite(C):
         d = C
     else:
         d = np.diag(C)
-    i, = np.nonzero(np.logical_or(np.isnan(d), d <= 0))
+    (i,) = np.nonzero(np.logical_or(np.isnan(d), d <= 0))
 
     if len(i):
-        raise PositiveDefiniteError(
-            "Simple check failed. Diagonal contains negatives", i)
+        raise PositiveDefiniteError("Simple check failed. Diagonal contains negatives", i)
 
 
 class PositiveDefiniteError(ValueError):
@@ -84,23 +89,22 @@ def __init__(self, msg, idx):
         self.msg = msg
 
     def __str__(self):
-        return ("Scaling is not positive definite: %s. Check indexes %s."
-                % (self.msg, self.idx))
+        return "Scaling is not positive definite: %s. Check indexes %s." % (self.msg, self.idx)
 
 
 class QuadPotential:
     def velocity(self, x, out=None):
         """Compute the current velocity at a position in parameter space."""
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def energy(self, x, velocity=None):
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def random(self, x):
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def velocity_energy(self, x, v_out):
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning.
@@ -152,15 +156,17 @@ def __init__(
     ):
         """Set up a diagonal mass matrix."""
         if initial_diag is not None and initial_diag.ndim != 1:
-            raise ValueError('Initial diagonal must be one-dimensional.')
+            raise ValueError("Initial diagonal must be one-dimensional.")
         if initial_mean.ndim != 1:
-            raise ValueError('Initial mean must be one-dimensional.')
+            raise ValueError("Initial mean must be one-dimensional.")
         if initial_diag is not None and len(initial_diag) != n:
-            raise ValueError('Wrong shape for initial_diag: expected %s got %s'
-                             % (n, len(initial_diag)))
+            raise ValueError(
+                "Wrong shape for initial_diag: expected %s got %s" % (n, len(initial_diag))
+            )
         if len(initial_mean) != n:
-            raise ValueError('Wrong shape for initial_mean: expected %s got %s'
-                             % (n, len(initial_mean)))
+            raise ValueError(
+                "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean))
+            )
 
         if dtype is None:
             dtype = theano.config.floatX
@@ -184,9 +190,10 @@ def reset(self):
         self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True)
         self._var_theano = theano.shared(self._var)
         self._stds = np.sqrt(self._initial_diag)
-        self._inv_stds = floatX(1.) / self._stds
+        self._inv_stds = floatX(1.0) / self._stds
         self._foreground_var = _WeightedVariance(
-            self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype)
+            self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype
+        )
         self._background_var = _WeightedVariance(self._n, dtype=self.dtype)
         self._n_samples = 0
 
@@ -256,11 +263,12 @@ def raise_ok(self, vmap):
                 for i in range(slclen):
                     name_slc.append((vmap_.var, i))
             index = np.where(self._stds == 0)[0]
-            errmsg = ['Mass matrix contains zeros on the diagonal. ']
+            errmsg = ["Mass matrix contains zeros on the diagonal. "]
             for ii in index:
-                errmsg.append('The derivative of RV `{}`.ravel()[{}]'
-                              ' is zero.'.format(*name_slc[ii]))
-            raise ValueError('\n'.join(errmsg))
+                errmsg.append(
+                    "The derivative of RV `{}`.ravel()[{}]" " is zero.".format(*name_slc[ii])
+                )
+            raise ValueError("\n".join(errmsg))
 
         if np.any(~np.isfinite(self._stds)):
             name_slc = []
@@ -270,11 +278,12 @@ def raise_ok(self, vmap):
                 for i in range(slclen):
                     name_slc.append((vmap_.var, i))
             index = np.where(~np.isfinite(self._stds))[0]
-            errmsg = ['Mass matrix contains non-finite values on the diagonal. ']
+            errmsg = ["Mass matrix contains non-finite values on the diagonal. "]
             for ii in index:
-                errmsg.append('The derivative of RV `{}`.ravel()[{}]'
-                              ' is non-finite.'.format(*name_slc[ii]))
-            raise ValueError('\n'.join(errmsg))
+                errmsg.append(
+                    "The derivative of RV `{}`.ravel()[{}]" " is non-finite.".format(*name_slc[ii])
+                )
+            raise ValueError("\n".join(errmsg))
 
 
 class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt):
@@ -321,25 +330,26 @@ def update(self, sample, grad, tune):
 class _WeightedVariance:
     """Online algorithm for computing mean of variance."""
 
-    def __init__(self, nelem, initial_mean=None, initial_variance=None,
-                 initial_weight=0, dtype='d'):
+    def __init__(
+        self, nelem, initial_mean=None, initial_variance=None, initial_weight=0, dtype="d"
+    ):
         self._dtype = dtype
         self.n_samples = float(initial_weight)
         if initial_mean is None:
-            self.mean = np.zeros(nelem, dtype='d')
+            self.mean = np.zeros(nelem, dtype="d")
         else:
-            self.mean = np.array(initial_mean, dtype='d', copy=True)
+            self.mean = np.array(initial_mean, dtype="d", copy=True)
         if initial_variance is None:
-            self.raw_var = np.zeros(nelem, dtype='d')
+            self.raw_var = np.zeros(nelem, dtype="d")
         else:
-            self.raw_var = np.array(initial_variance, dtype='d', copy=True)
+            self.raw_var = np.array(initial_variance, dtype="d", copy=True)
 
         self.raw_var[:] *= self.n_samples
 
         if self.raw_var.shape != (nelem,):
-            raise ValueError('Invalid shape for initial variance.')
+            raise ValueError("Invalid shape for initial variance.")
         if self.mean.shape != (nelem,):
-            raise ValueError('Invalid shape for initial mean.')
+            raise ValueError("Invalid shape for initial mean.")
 
     def add_sample(self, x, weight):
         x = np.asarray(x)
@@ -347,11 +357,11 @@ def add_sample(self, x, weight):
         old_diff = x - self.mean
         self.mean[:] += old_diff / self.n_samples
         new_diff = x - self.mean
-        self.raw_var[:] +=  weight * old_diff * new_diff
+        self.raw_var[:] += weight * old_diff * new_diff
 
     def current_variance(self, out=None):
         if self.n_samples == 0:
-            raise ValueError('Can not compute variance without samples.')
+            raise ValueError("Can not compute variance without samples.")
         if out is not None:
             return np.divide(self.raw_var, self.n_samples, out=out)
         else:
@@ -376,10 +386,10 @@ def __init__(self, v, dtype=None):
             dtype = theano.config.floatX
         self.dtype = dtype
         v = v.astype(self.dtype)
-        s = v ** .5
+        s = v ** 0.5
 
         self.s = s
-        self.inv_s = 1. / s
+        self.inv_s = 1.0 / s
         self.v = v
 
     def velocity(self, x, out=None):
@@ -397,7 +407,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is not None:
             return 0.5 * np.dot(x, velocity)
-        return .5 * x.dot(self.v * x)
+        return 0.5 * x.dot(self.v * x)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -437,7 +447,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is None:
             velocity = self.velocity(x)
-        return .5 * x.dot(velocity)
+        return 0.5 * x.dot(velocity)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -470,8 +480,7 @@ def velocity(self, x, out=None):
     def random(self):
         """Draw random value from QuadPotential."""
         vals = np.random.normal(size=self._n).astype(self.dtype)
-        return scipy.linalg.solve_triangular(self._chol.T, vals,
-                                             overwrite_b=True)
+        return scipy.linalg.solve_triangular(self._chol.T, vals, overwrite_b=True)
 
     def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
@@ -489,6 +498,7 @@ def velocity_energy(self, x, v_out):
 
 class QuadPotentialFullAdapt(QuadPotentialFull):
     """Adapt a dense mass matrix using the sample covariances."""
+
     def __init__(
         self,
         n,
@@ -508,13 +518,11 @@ def __init__(
             raise ValueError("Initial mean must be one-dimensional.")
         if initial_cov is not None and initial_cov.shape != (n, n):
             raise ValueError(
-                "Wrong shape for initial_cov: expected %s got %s"
-                % (n, initial_cov.shape)
+                "Wrong shape for initial_cov: expected %s got %s" % (n, initial_cov.shape)
             )
         if len(initial_mean) != n:
             raise ValueError(
-                "Wrong shape for initial_mean: expected %s got %s"
-                % (n, len(initial_mean))
+                "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean))
             )
 
         if dtype is None:
@@ -573,9 +581,7 @@ def update(self, sample, grad, tune):
         # window.
         if delta >= self.adaptation_window:
             self._foreground_cov = self._background_cov
-            self._background_cov = _WeightedCovariance(
-                self._n, dtype=self.dtype
-            )
+            self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype)
 
             self._previous_update = self._n_samples
             self.adaptation_window = int(self.adaptation_window * self.adaptation_window_multiplier)
@@ -645,12 +651,13 @@ def current_mean(self):
 
 try:
     import sksparse.cholmod as cholmod
+
     chol_available = True
 except ImportError:
     chol_available = False
 
 if chol_available:
-    __all__ += ['QuadPotentialSparse']
+    __all__ += ["QuadPotentialSparse"]
 
     import theano.sparse
 
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 383536725d..89b0ff63af 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -18,13 +18,29 @@
 import scipy.linalg
 
 from ..distributions import draw_values
-from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
+from .arraystep import (
+    ArrayStepShared,
+    PopulationArrayStepShared,
+    ArrayStep,
+    metrop_select,
+    Competence,
+)
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
-           'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
-           'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
+__all__ = [
+    "Metropolis",
+    "DEMetropolis",
+    "DEMetropolisZ",
+    "BinaryMetropolis",
+    "BinaryGibbsMetropolis",
+    "CategoricalGibbsMetropolis",
+    "NormalProposal",
+    "CauchyProposal",
+    "LaplaceProposal",
+    "PoissonProposal",
+    "MultivariateNormalProposal",
+]
 
 # Available proposal distributions for Metropolis
 
@@ -101,19 +117,32 @@ class Metropolis(ArrayStepShared):
     mode:  string or `Mode` instance.
         compilation mode passed to Theano functions
     """
-    name = 'metropolis'
+
+    name = "metropolis"
 
     default_blocked = False
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'accepted': np.bool,
-        'tune': np.bool,
-        'scaling': np.float64,
-    }]
-
-    def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
-                 tune=True, tune_interval=100, model=None, mode=None, **kwargs):
+    stats_dtypes = [
+        {
+            "accept": np.float64,
+            "accepted": np.bool,
+            "tune": np.bool,
+            "scaling": np.float64,
+        }
+    ]
+
+    def __init__(
+        self,
+        vars=None,
+        S=None,
+        proposal_dist=None,
+        scaling=1.0,
+        tune=True,
+        tune_interval=100,
+        model=None,
+        mode=None,
+        **kwargs
+    ):
 
         model = pm.modelcontext(model)
 
@@ -133,7 +162,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
         else:
             raise ValueError("Invalid rank for variance: %s" % S.ndim)
 
-        self.scaling = np.atleast_1d(scaling).astype('d')
+        self.scaling = np.atleast_1d(scaling).astype("d")
         self.tune = tune
         self.tune_interval = tune_interval
         self.steps_until_tune = tune_interval
@@ -141,15 +170,14 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
 
         # Determine type of variables
         self.discrete = np.concatenate(
-            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars])
+            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]
+        )
         self.any_discrete = self.discrete.any()
         self.all_discrete = self.discrete.all()
 
         # remember initial settings before tuning so they can be reset
         self._untuned_settings = dict(
-            scaling=self.scaling,
-            steps_until_tune=tune_interval,
-            accepted=self.accepted
+            scaling=self.scaling, steps_until_tune=tune_interval, accepted=self.accepted
         )
 
         self.mode = mode
@@ -167,8 +195,7 @@ def reset_tuning(self):
     def astep(self, q0):
         if not self.steps_until_tune and self.tune:
             # Tune scaling parameter
-            self.scaling = tune(
-                self.scaling, self.accepted / float(self.tune_interval))
+            self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
             self.accepted = 0
@@ -177,13 +204,12 @@ def astep(self, q0):
 
         if self.any_discrete:
             if self.all_discrete:
-                delta = np.round(delta, 0).astype('int64')
-                q0 = q0.astype('int64')
-                q = (q0 + delta).astype('int64')
+                delta = np.round(delta, 0).astype("int64")
+                q0 = q0.astype("int64")
+                q = (q0 + delta).astype("int64")
             else:
-                delta[self.discrete] = np.round(
-                    delta[self.discrete], 0)
-                q = (q0 + delta)
+                delta[self.discrete] = np.round(delta[self.discrete], 0)
+                q = q0 + delta
         else:
             q = floatX(q0 + delta)
 
@@ -194,10 +220,10 @@ def astep(self, q0):
         self.steps_until_tune -= 1
 
         stats = {
-            'tune': self.tune,
-            'scaling': self.scaling,
-            'accept': np.exp(accept),
-            'accepted': accepted,
+            "tune": self.tune,
+            "scaling": self.scaling,
+            "accept": np.exp(accept),
+            "accepted": accepted,
         }
 
         return q_new, [stats]
@@ -261,16 +287,19 @@ class BinaryMetropolis(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-    name = 'binary_metropolis'
+
+    name = "binary_metropolis"
 
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'tune': np.bool,
-        'p_jump': np.float64,
-    }]
+    stats_dtypes = [
+        {
+            "accept": np.float64,
+            "tune": np.bool,
+            "p_jump": np.float64,
+        }
+    ]
 
-    def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None):
+    def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None):
 
         model = pm.modelcontext(model)
 
@@ -281,20 +310,19 @@ def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None):
         self.accepted = 0
 
         if not all([v.dtype in pm.discrete_types for v in vars]):
-            raise ValueError(
-                'All variables must be Bernoulli for BinaryMetropolis')
+            raise ValueError("All variables must be Bernoulli for BinaryMetropolis")
 
         super().__init__(vars, [model.fastlogp])
 
     def astep(self, q0, logp):
 
         # Convert adaptive_scale_factor to a jump probability
-        p_jump = 1. - .5 ** self.scaling
+        p_jump = 1.0 - 0.5 ** self.scaling
 
         rand_array = nr.random(q0.shape)
         q = np.copy(q0)
         # Locations where switches occur, according to p_jump
-        switch_locs = (rand_array < p_jump)
+        switch_locs = rand_array < p_jump
         q[switch_locs] = True - q[switch_locs]
 
         accept = logp(q) - logp(q0)
@@ -302,21 +330,20 @@ def astep(self, q0, logp):
         self.accepted += accepted
 
         stats = {
-            'tune': self.tune,
-            'accept': np.exp(accept),
-            'p_jump': p_jump,
+            "tune": self.tune,
+            "accept": np.exp(accept),
+            "p_jump": p_jump,
         }
 
         return q_new, [stats]
 
     @staticmethod
     def competence(var):
-        '''
+        """
         BinaryMetropolis is only suitable for binary (bool)
         and Categorical variables with k=1.
-        '''
-        distribution = getattr(
-            var.distribution, 'parent_dist', var.distribution)
+        """
+        distribution = getattr(var.distribution, "parent_dist", var.distribution)
         if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types):
             return Competence.COMPATIBLE
         elif isinstance(distribution, pm.Categorical) and (distribution.k == 2):
@@ -341,9 +368,10 @@ class BinaryGibbsMetropolis(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-    name = 'binary_gibbs_metropolis'
 
-    def __init__(self, vars, order='random', transit_p=.8, model=None):
+    name = "binary_gibbs_metropolis"
+
+    def __init__(self, vars, order="random", transit_p=0.8, model=None):
 
         model = pm.modelcontext(model)
 
@@ -352,18 +380,17 @@ def __init__(self, vars, order='random', transit_p=.8, model=None):
 
         self.dim = sum(v.dsize for v in vars)
 
-        if order == 'random':
+        if order == "random":
             self.shuffle_dims = True
             self.order = list(range(self.dim))
         else:
             if sorted(order) != list(range(self.dim)):
-                raise ValueError('Argument \'order\' has to be a permutation')
+                raise ValueError("Argument 'order' has to be a permutation")
             self.shuffle_dims = False
             self.order = order
 
         if not all([v.dtype in pm.discrete_types for v in vars]):
-            raise ValueError(
-                'All variables must be binary for BinaryGibbsMetropolis')
+            raise ValueError("All variables must be binary for BinaryGibbsMetropolis")
 
         super().__init__(vars, [model.fastlogp])
 
@@ -389,12 +416,11 @@ def astep(self, q0, logp):
 
     @staticmethod
     def competence(var):
-        '''
+        """
         BinaryMetropolis is only suitable for Bernoulli
         and Categorical variables with k=2.
-        '''
-        distribution = getattr(
-            var.distribution, 'parent_dist', var.distribution)
+        """
+        distribution = getattr(var.distribution, "parent_dist", var.distribution)
         if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types):
             return Competence.IDEAL
         elif isinstance(distribution, pm.Categorical) and (distribution.k == 2):
@@ -404,15 +430,16 @@ def competence(var):
 
 class CategoricalGibbsMetropolis(ArrayStep):
     """A Metropolis-within-Gibbs step method optimized for categorical variables.
-       This step method works for Bernoulli variables as well, but it is not
-       optimized for them, like BinaryGibbsMetropolis is. Step method supports
-       two types of proposals: A uniform proposal and a proportional proposal,
-       which was introduced by Liu in his 1996 technical report
-       "Metropolized Gibbs Sampler: An Improvement".
+    This step method works for Bernoulli variables as well, but it is not
+    optimized for them, like BinaryGibbsMetropolis is. Step method supports
+    two types of proposals: A uniform proposal and a proportional proposal,
+    which was introduced by Liu in his 1996 technical report
+    "Metropolized Gibbs Sampler: An Improvement".
     """
-    name = 'categorical_gibbs_metropolis'
 
-    def __init__(self, vars, proposal='uniform', order='random', model=None):
+    name = "categorical_gibbs_metropolis"
+
+    def __init__(self, vars, proposal="uniform", order="random", model=None):
 
         model = pm.modelcontext(model)
         vars = pm.inputvars(vars)
@@ -423,34 +450,36 @@ def __init__(self, vars, proposal='uniform', order='random', model=None):
         # variable with M categories and y being a 3-D variable with N
         # categories, we will have dimcats = [(0, M), (1, M), (2, N), (3, N), (4, N)].
         for v in vars:
-            distr = getattr(v.distribution, 'parent_dist', v.distribution)
+            distr = getattr(v.distribution, "parent_dist", v.distribution)
             if isinstance(distr, pm.Categorical):
                 k = draw_values([distr.k])[0]
             elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types):
                 k = 2
             else:
-                raise ValueError('All variables must be categorical or binary' +
-                                 'for CategoricalGibbsMetropolis')
+                raise ValueError(
+                    "All variables must be categorical or binary" + "for CategoricalGibbsMetropolis"
+                )
             start = len(dimcats)
             dimcats += [(dim, k) for dim in range(start, start + v.dsize)]
 
-        if order == 'random':
+        if order == "random":
             self.shuffle_dims = True
             self.dimcats = dimcats
         else:
             if sorted(order) != list(range(len(dimcats))):
-                raise ValueError('Argument \'order\' has to be a permutation')
+                raise ValueError("Argument 'order' has to be a permutation")
             self.shuffle_dims = False
             self.dimcats = [dimcats[j] for j in order]
 
-        if proposal == 'uniform':
+        if proposal == "uniform":
             self.astep = self.astep_unif
-        elif proposal == 'proportional':
+        elif proposal == "proportional":
             # Use the optimized "Metropolized Gibbs Sampler" described in Liu96.
             self.astep = self.astep_prop
         else:
-            raise ValueError('Argument \'proposal\' should either be ' +
-                    '\'uniform\' or \'proportional\'')
+            raise ValueError(
+                "Argument 'proposal' should either be " + "'uniform' or 'proportional'"
+            )
 
         super().__init__(vars, [model.fastlogp])
 
@@ -494,8 +523,8 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
                 log_probs[candidate_cat] = logp(q)
         probs = softmax(log_probs)
         prob_curr, probs[given_cat] = probs[given_cat], 0.0
-        probs /= (1.0 - prob_curr)
-        proposed_cat = nr.choice(candidates, p = probs)
+        probs /= 1.0 - prob_curr
+        proposed_cat = nr.choice(candidates, p=probs)
         accept_ratio = (1.0 - prob_curr) / (1.0 - probs[proposed_cat])
         if not np.isfinite(accept_ratio) or nr.uniform() >= accept_ratio:
             q[dim] = given_cat
@@ -505,12 +534,11 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
 
     @staticmethod
     def competence(var):
-        '''
+        """
         CategoricalGibbsMetropolis is only suitable for Bernoulli and
         Categorical variables.
-        '''
-        distribution = getattr(
-            var.distribution, 'parent_dist', var.distribution)
+        """
+        distribution = getattr(var.distribution, "parent_dist", var.distribution)
         if isinstance(distribution, pm.Categorical):
             if distribution.k > 2:
                 return Competence.IDEAL
@@ -554,20 +582,34 @@ class DEMetropolis(PopulationArrayStepShared):
         Statistics and Computing
         `link <https://doi.org/10.1007/s11222-006-8769-1>`__
     """
-    name = 'DEMetropolis'
+
+    name = "DEMetropolis"
 
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'accepted': np.bool,
-        'tune': np.bool,
-        'scaling': np.float64,
-        'lambda': np.float64,
-    }]
-
-    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
-                 tune=None, tune_interval=100, model=None, mode=None, **kwargs):
+    stats_dtypes = [
+        {
+            "accept": np.float64,
+            "accepted": np.bool,
+            "tune": np.bool,
+            "scaling": np.float64,
+            "lambda": np.float64,
+        }
+    ]
+
+    def __init__(
+        self,
+        vars=None,
+        S=None,
+        proposal_dist=None,
+        lamb=None,
+        scaling=0.001,
+        tune=None,
+        tune_interval=100,
+        model=None,
+        mode=None,
+        **kwargs
+    ):
 
         model = pm.modelcontext(model)
 
@@ -583,12 +625,12 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
         else:
             self.proposal_dist = UniformProposal(S)
 
-        self.scaling = np.atleast_1d(scaling).astype('d')
+        self.scaling = np.atleast_1d(scaling).astype("d")
         if lamb is None:
             # default to the optimal lambda for normally distributed targets
             lamb = 2.38 / np.sqrt(2 * model.ndim)
         self.lamb = float(lamb)
-        if tune not in {None, 'scaling', 'lambda'}:
+        if tune not in {None, "scaling", "lambda"}:
             raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
         self.tune = tune
         self.tune_interval = tune_interval
@@ -603,9 +645,9 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
 
     def astep(self, q0):
         if not self.steps_until_tune and self.tune:
-            if self.tune == 'scaling':
+            if self.tune == "scaling":
                 self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
-            elif self.tune == 'lambda':
+            elif self.tune == "lambda":
                 self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
@@ -628,11 +670,11 @@ def astep(self, q0):
         self.steps_until_tune -= 1
 
         stats = {
-            'tune': self.tune,
-            'scaling': self.scaling,
-            'lambda': self.lamb,
-            'accept': np.exp(accept),
-            'accepted': accepted
+            "tune": self.tune,
+            "scaling": self.scaling,
+            "lambda": self.lamb,
+            "accept": np.exp(accept),
+            "accepted": accepted,
         }
 
         return q_new, [stats]
@@ -681,20 +723,35 @@ class DEMetropolisZ(ArrayStepShared):
         Statistics and Computing
         `link <https://doi.org/10.1007/s11222-008-9104-9>`__
     """
-    name = 'DEMetropolisZ'
+
+    name = "DEMetropolisZ"
 
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'accepted': np.bool,
-        'tune': np.bool,
-        'scaling': np.float64,
-        'lambda': np.float64,
-    }]
-
-    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
-                 tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):
+    stats_dtypes = [
+        {
+            "accept": np.float64,
+            "accepted": np.bool,
+            "tune": np.bool,
+            "scaling": np.float64,
+            "lambda": np.float64,
+        }
+    ]
+
+    def __init__(
+        self,
+        vars=None,
+        S=None,
+        proposal_dist=None,
+        lamb=None,
+        scaling=0.001,
+        tune="lambda",
+        tune_interval=100,
+        tune_drop_fraction: float = 0.9,
+        model=None,
+        mode=None,
+        **kwargs
+    ):
         model = pm.modelcontext(model)
 
         if vars is None:
@@ -703,18 +760,18 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
 
         if S is None:
             S = np.ones(model.ndim)
-        
+
         if proposal_dist is not None:
             self.proposal_dist = proposal_dist(S)
         else:
             self.proposal_dist = UniformProposal(S)
 
-        self.scaling = np.atleast_1d(scaling).astype('d')
+        self.scaling = np.atleast_1d(scaling).astype("d")
         if lamb is None:
             # default to the optimal lambda for normally distributed targets
             lamb = 2.38 / np.sqrt(2 * model.ndim)
         self.lamb = float(lamb)
-        if tune not in {None, 'scaling', 'lambda'}:
+        if tune not in {None, "scaling", "lambda"}:
             raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
         self.tune = True
         self.tune_target = tune
@@ -730,7 +787,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
             scaling=self.scaling,
             lamb=self.lamb,
             steps_until_tune=tune_interval,
-            accepted=self.accepted
+            accepted=self.accepted,
         )
 
         self.mode = mode
@@ -750,9 +807,9 @@ def reset_tuning(self):
     def astep(self, q0):
         # same tuning scheme as DEMetropolis
         if not self.steps_until_tune and self.tune:
-            if self.tune_target == 'scaling':
+            if self.tune_target == "scaling":
                 self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
-            elif self.tune_target == 'lambda':
+            elif self.tune_target == "lambda":
                 self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
@@ -769,7 +826,7 @@ def astep(self, q0):
             iz2 = np.random.randint(it)
             while iz2 == iz1:
                 iz2 = np.random.randint(it)
-            
+
             z1 = self._history[iz1]
             z2 = self._history[iz2]
             # propose a jump
@@ -786,11 +843,11 @@ def astep(self, q0):
         self.steps_until_tune -= 1
 
         stats = {
-            'tune': self.tune,
-            'scaling': self.scaling,
-            'lambda': self.lamb,
-            'accept': np.exp(accept),
-            'accepted': accepted
+            "tune": self.tune,
+            "scaling": self.scaling,
+            "lambda": self.lamb,
+            "accept": np.exp(accept),
+            "accepted": accepted,
         }
 
         return q_new, [stats]
@@ -820,14 +877,14 @@ def sample_except(limit, excluded):
 
 def softmax(x):
     e_x = np.exp(x - np.max(x))
-    return e_x / np.sum(e_x, axis = 0)
+    return e_x / np.sum(e_x, axis=0)
 
 
 def delta_logp(logp, vars, shared):
     [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared)
 
     tensor_type = inarray0.type
-    inarray1 = tensor_type('inarray1')
+    inarray1 = tensor_type("inarray1")
 
     logp1 = pm.CallableTensor(logp0)(inarray1)
 
diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py
index 61c9584fd4..dff75b6621 100644
--- a/pymc3/step_methods/sgmcmc.py
+++ b/pymc3/step_methods/sgmcmc.py
@@ -23,8 +23,10 @@
 
 __all__ = []
 
-EXPERIMENTAL_WARNING = "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet"\
+EXPERIMENTAL_WARNING = (
+    "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet"
     " recommended for use in PyMC3!"
+)
 
 
 def _value_error(cond, str):
@@ -34,18 +36,14 @@ def _value_error(cond, str):
 
 
 def _check_minibatches(minibatch_tensors, minibatches):
-    _value_error(
-        isinstance(minibatch_tensors, list),
-        'minibatch_tensors must be a list.')
+    _value_error(isinstance(minibatch_tensors, list), "minibatch_tensors must be a list.")
 
-    _value_error(
-        hasattr(minibatches, "__iter__"), 'minibatches must be an iterator.')
+    _value_error(hasattr(minibatches, "__iter__"), "minibatches must be an iterator.")
 
 
 def prior_dlogp(vars, model, flat_view):
     """Returns the gradient of the prior on the parameters as a vector of size D x 1"""
-    terms = tt.concatenate(
-        [theano.grad(var.logpt, var).flatten() for var in vars], axis=0)
+    terms = tt.concatenate([theano.grad(var.logpt, var).flatten() for var in vars], axis=0)
     dlogp = theano.clone(terms, flat_view.replacements, strict=False)
 
     return dlogp
@@ -59,16 +57,16 @@ def elemwise_dlogL(vars, model, flat_view):
     # select one observed random variable
     obs_var = model.observed_RVs[0]
     # tensor of shape (batch_size,)
-    logL = obs_var.logp_elemwiset.sum(
-        axis=tuple(range(1, obs_var.logp_elemwiset.ndim)))
+    logL = obs_var.logp_elemwiset.sum(axis=tuple(range(1, obs_var.logp_elemwiset.ndim)))
     # calculate fisher information
     terms = []
     for var in vars:
-        output, _ =  theano.scan(lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),\
-                           sequences=[tt.arange(logL.shape[0])])
+        output, _ = theano.scan(
+            lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),
+            sequences=[tt.arange(logL.shape[0])],
+        )
         terms.append(output)
-    dlogL = theano.clone(
-        tt.concatenate(terms, axis=1), flat_view.replacements, strict=False)
+    dlogL = theano.clone(tt.concatenate(terms, axis=1), flat_view.replacements, strict=False)
     return dlogL
 
 
@@ -111,16 +109,18 @@ class BaseStochasticGradient(ArrayStepShared):
             Returns None it creates class variables which are required for the training fn
     """
 
-    def __init__(self,
-                 vars=None,
-                 batch_size=None,
-                 total_size=None,
-                 step_size=1.0,
-                 model=None,
-                 random_seed=None,
-                 minibatches=None,
-                 minibatch_tensors=None,
-                 **kwargs):
+    def __init__(
+        self,
+        vars=None,
+        batch_size=None,
+        total_size=None,
+        step_size=1.0,
+        model=None,
+        random_seed=None,
+        minibatches=None,
+        minibatch_tensors=None,
+        **kwargs
+    ):
         warnings.warn(EXPERIMENTAL_WARNING)
 
         model = modelcontext(model)
@@ -136,7 +136,8 @@ def __init__(self,
         self.total_size = total_size
         _value_error(
             total_size != None or batch_size != None,
-            'total_size and batch_size of training data have to be specified')
+            "total_size and batch_size of training data have to be specified",
+        )
         self.expected_iter = int(total_size / batch_size)
 
         # set random stream
@@ -168,12 +169,10 @@ def __init__(self,
             def is_shared(t):
                 return isinstance(t, theano.compile.sharedvalue.SharedVariable)
 
-            tensors = [(t.type() if is_shared(t) else t)
-                       for t in minibatch_tensors]
-            updates = OrderedDict({
-                t: t_
-                for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)
-            })
+            tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors]
+            updates = OrderedDict(
+                {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)}
+            )
             self.minibatch_tensors = tensors
             self.inarray += self.minibatch_tensors
             self.updates.update(updates)
@@ -207,7 +206,7 @@ def astep(self, q0):
         -------
         q
         """
-        if hasattr(self, 'minibatch_tensors'):
+        if hasattr(self, "minibatch_tensors"):
             return q0 + self.training_fn(q0, *next(self.minibatches))
         else:
             return q0 + self.training_fn(q0)
diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py
index b0d93dc2f0..f6e86e1fbe 100644
--- a/pymc3/step_methods/slicer.py
+++ b/pymc3/step_methods/slicer.py
@@ -22,9 +22,9 @@
 from ..theanof import inputvars
 from ..vartypes import continuous_types
 
-__all__ = ['Slice']
+__all__ = ["Slice"]
 
-LOOP_ERR_MSG = 'max slicer iters %d exceeded'
+LOOP_ERR_MSG = "max slicer iters %d exceeded"
 
 
 class Slice(ArrayStep):
@@ -43,15 +43,15 @@ class Slice(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-    name = 'slice'
+
+    name = "slice"
     default_blocked = False
 
-    def __init__(self, vars=None, w=1., tune=True, model=None,
-                 iter_limit=np.inf, **kwargs):
+    def __init__(self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, **kwargs):
         self.model = modelcontext(model)
         self.w = w
         self.tune = tune
-        self.n_tunes = 0.
+        self.n_tunes = 0.0
         self.iter_limit = iter_limit
 
         if vars is None:
@@ -72,13 +72,13 @@ def astep(self, q0, logp):
             qr[i] = q[i] + self.w[i]
             # Stepping out procedure
             cnt = 0
-            while(y <= logp(ql)):  # changed lt to leq  for locally uniform posteriors
+            while y <= logp(ql):  # changed lt to leq  for locally uniform posteriors
                 ql[i] -= self.w[i]
                 cnt += 1
                 if cnt > self.iter_limit:
                     raise RuntimeError(LOOP_ERR_MSG % self.iter_limit)
             cnt = 0
-            while(y <= logp(qr)):
+            while y <= logp(qr):
                 qr[i] += self.w[i]
                 cnt += 1
                 if cnt > self.iter_limit:
@@ -97,11 +97,14 @@ def astep(self, q0, logp):
                 if cnt > self.iter_limit:
                     raise RuntimeError(LOOP_ERR_MSG % self.iter_limit)
 
-            if self.tune:  # I was under impression from MacKays lectures that slice width can be tuned without
+            if (
+                self.tune
+            ):  # I was under impression from MacKays lectures that slice width can be tuned without
                 # breaking markovianness. Can we do it regardless of self.tune?(@madanh)
-                self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) +\
-                    (qr[i] - ql[i]) / (self.n_tunes + 1)  # same as before
-            # unobvious and important: return qr and ql to the same point
+                self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) + (qr[i] - ql[i]) / (
+                    self.n_tunes + 1
+                )  # same as before
+                # unobvious and important: return qr and ql to the same point
                 qr[i] = q[i]
                 ql[i] = q[i]
         if self.tune:
diff --git a/pymc3/step_methods/step_sizes.py b/pymc3/step_methods/step_sizes.py
index bdf0683c62..3603e30fa5 100644
--- a/pymc3/step_methods/step_sizes.py
+++ b/pymc3/step_methods/step_sizes.py
@@ -30,7 +30,7 @@ def __init__(self, initial_step, target, gamma, k, t0):
     def reset(self):
         self._log_step = np.log(self._initial_step)
         self._log_bar = self._log_step
-        self._hbar = 0.
+        self._hbar = 0.0
         self._count = 1
         self._mu = np.log(10 * self._initial_step)
         self._tuned_stats = []
@@ -47,8 +47,8 @@ def update(self, accept_stat, tune):
             return
 
         count, k, t0 = self._count, self._k, self._t0
-        w = 1. / (count + t0)
-        self._hbar = ((1 - w) * self._hbar + w * (self._target - accept_stat))
+        w = 1.0 / (count + t0)
+        self._hbar = (1 - w) * self._hbar + w * (self._target - accept_stat)
 
         self._log_step = self._mu - self._hbar * np.sqrt(count) / self._gamma
         mk = count ** -k
@@ -57,8 +57,8 @@ def update(self, accept_stat, tune):
 
     def stats(self):
         return {
-            'step_size': np.exp(self._log_step),
-            'step_size_bar': np.exp(self._log_bar),
+            "step_size": np.exp(self._log_step),
+            "step_size_bar": np.exp(self._log_bar),
         }
 
     def warnings(self):
@@ -71,13 +71,13 @@ def warnings(self):
         n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound
         lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95)
         if target_accept < lower or target_accept > upper:
-            msg = ('The acceptance probability does not match the target. It '
-                   'is %s, but should be close to %s. Try to increase the '
-                   'number of tuning steps.'
-                   % (mean_accept, target_accept))
-            info = {'target': target_accept, 'actual': mean_accept}
-            warning = SamplerWarning(
-                WarningType.BAD_ACCEPTANCE, msg, 'warn', extra=info)
+            msg = (
+                "The acceptance probability does not match the target. It "
+                "is %s, but should be close to %s. Try to increase the "
+                "number of tuning steps." % (mean_accept, target_accept)
+            )
+            info = {"target": target_accept, "actual": mean_accept}
+            warning = SamplerWarning(WarningType.BAD_ACCEPTANCE, msg, "warn", extra=info)
             return [warning]
         else:
             return []

From c4c48392d4e6f833ca8269592aa05e9a5e6a7242 Mon Sep 17 00:00:00 2001
From: Ali Akbar Septiandri <mail.septiandri@gmail.com>
Date: Sun, 27 Sep 2020 12:34:35 +0100
Subject: [PATCH 3/7] Revert two files

---
 pymc3/distributions/continuous.py       |  12 ++-
 pymc3/step_methods/hmc/quadpotential.py | 120 ++++++++++++------------
 2 files changed, 67 insertions(+), 65 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 80fc6a0b3e..5ccdb7f5d2 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -134,7 +134,9 @@ def assert_negative_support(var, label, distname, value=-1e-6):
             support = False
 
     if np.any(support):
-        msg = "The variable specified for {} has negative support for {}, ".format(label, distname)
+        msg = "The variable specified for {} has negative support for {}, ".format(
+            label, distname
+        )
         msg += "likely making it unsuitable for this parameter."
         warnings.warn(msg)
 
@@ -710,7 +712,7 @@ def random(self, point=None, size=None):
         )
 
     def _random(self, mu, sigma, lower, upper, size):
-        """Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's
+        """ Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's
         parametrization to scipy.truncnorm. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -3445,7 +3447,7 @@ def random(self, point=None, size=None):
         )
 
     def _random(self, c, lower, upper, size):
-        """Wrapper around stats.triang.rvs that converts Triangular's
+        """ Wrapper around stats.triang.rvs that converts Triangular's
         parametrization to scipy.triang. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -3704,7 +3706,7 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
         self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma))
         self.b = b = tt.as_tensor_variable(floatX(b))
 
-        nu_sigma_ratio = -(nu ** 2) / (2 * sigma ** 2)
+        nu_sigma_ratio = -nu ** 2 / (2 * sigma ** 2)
         self.mean = (
             sigma
             * np.sqrt(np.pi / 2)
@@ -3760,7 +3762,7 @@ def random(self, point=None, size=None):
         return generate_samples(self._random, nu=nu, sigma=sigma, dist_shape=self.shape, size=size)
 
     def _random(self, nu, sigma, size):
-        """Wrapper around stats.rice.rvs that converts Rice's
+        """ Wrapper around stats.rice.rvs that converts Rice's
         parametrization to scipy.rice. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index fd962f48cb..8750fd6484 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -22,15 +22,9 @@
 from pymc3.theanof import floatX
 
 
-__all__ = [
-    "quad_potential",
-    "QuadPotentialDiag",
-    "QuadPotentialFull",
-    "QuadPotentialFullInv",
-    "QuadPotentialDiagAdapt",
-    "QuadPotentialFullAdapt",
-    "isquadpotential",
-]
+__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull',
+           'QuadPotentialFullInv', 'QuadPotentialDiagAdapt',
+           'QuadPotentialFullAdapt', 'isquadpotential']
 
 
 def quad_potential(C, is_cov):
@@ -62,7 +56,7 @@ def quad_potential(C, is_cov):
         if is_cov:
             return QuadPotentialDiag(C)
         else:
-            return QuadPotentialDiag(1.0 / C)
+            return QuadPotentialDiag(1. / C)
     else:
         if is_cov:
             return QuadPotentialFull(C)
@@ -76,10 +70,11 @@ def partial_check_positive_definite(C):
         d = C
     else:
         d = np.diag(C)
-    (i,) = np.nonzero(np.logical_or(np.isnan(d), d <= 0))
+    i, = np.nonzero(np.logical_or(np.isnan(d), d <= 0))
 
     if len(i):
-        raise PositiveDefiniteError("Simple check failed. Diagonal contains negatives", i)
+        raise PositiveDefiniteError(
+            "Simple check failed. Diagonal contains negatives", i)
 
 
 class PositiveDefiniteError(ValueError):
@@ -89,22 +84,23 @@ def __init__(self, msg, idx):
         self.msg = msg
 
     def __str__(self):
-        return "Scaling is not positive definite: %s. Check indexes %s." % (self.msg, self.idx)
+        return ("Scaling is not positive definite: %s. Check indexes %s."
+                % (self.msg, self.idx))
 
 
 class QuadPotential:
     def velocity(self, x, out=None):
         """Compute the current velocity at a position in parameter space."""
-        raise NotImplementedError("Abstract method")
+        raise NotImplementedError('Abstract method')
 
     def energy(self, x, velocity=None):
-        raise NotImplementedError("Abstract method")
+        raise NotImplementedError('Abstract method')
 
     def random(self, x):
-        raise NotImplementedError("Abstract method")
+        raise NotImplementedError('Abstract method')
 
     def velocity_energy(self, x, v_out):
-        raise NotImplementedError("Abstract method")
+        raise NotImplementedError('Abstract method')
 
     def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning.
@@ -156,17 +152,15 @@ def __init__(
     ):
         """Set up a diagonal mass matrix."""
         if initial_diag is not None and initial_diag.ndim != 1:
-            raise ValueError("Initial diagonal must be one-dimensional.")
+            raise ValueError('Initial diagonal must be one-dimensional.')
         if initial_mean.ndim != 1:
-            raise ValueError("Initial mean must be one-dimensional.")
+            raise ValueError('Initial mean must be one-dimensional.')
         if initial_diag is not None and len(initial_diag) != n:
-            raise ValueError(
-                "Wrong shape for initial_diag: expected %s got %s" % (n, len(initial_diag))
-            )
+            raise ValueError('Wrong shape for initial_diag: expected %s got %s'
+                             % (n, len(initial_diag)))
         if len(initial_mean) != n:
-            raise ValueError(
-                "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean))
-            )
+            raise ValueError('Wrong shape for initial_mean: expected %s got %s'
+                             % (n, len(initial_mean)))
 
         if dtype is None:
             dtype = theano.config.floatX
@@ -190,10 +184,9 @@ def reset(self):
         self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True)
         self._var_theano = theano.shared(self._var)
         self._stds = np.sqrt(self._initial_diag)
-        self._inv_stds = floatX(1.0) / self._stds
+        self._inv_stds = floatX(1.) / self._stds
         self._foreground_var = _WeightedVariance(
-            self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype
-        )
+            self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype)
         self._background_var = _WeightedVariance(self._n, dtype=self.dtype)
         self._n_samples = 0
 
@@ -263,12 +256,11 @@ def raise_ok(self, vmap):
                 for i in range(slclen):
                     name_slc.append((vmap_.var, i))
             index = np.where(self._stds == 0)[0]
-            errmsg = ["Mass matrix contains zeros on the diagonal. "]
+            errmsg = ['Mass matrix contains zeros on the diagonal. ']
             for ii in index:
-                errmsg.append(
-                    "The derivative of RV `{}`.ravel()[{}]" " is zero.".format(*name_slc[ii])
-                )
-            raise ValueError("\n".join(errmsg))
+                errmsg.append('The derivative of RV `{}`.ravel()[{}]'
+                              ' is zero.'.format(*name_slc[ii]))
+            raise ValueError('\n'.join(errmsg))
 
         if np.any(~np.isfinite(self._stds)):
             name_slc = []
@@ -278,12 +270,11 @@ def raise_ok(self, vmap):
                 for i in range(slclen):
                     name_slc.append((vmap_.var, i))
             index = np.where(~np.isfinite(self._stds))[0]
-            errmsg = ["Mass matrix contains non-finite values on the diagonal. "]
+            errmsg = ['Mass matrix contains non-finite values on the diagonal. ']
             for ii in index:
-                errmsg.append(
-                    "The derivative of RV `{}`.ravel()[{}]" " is non-finite.".format(*name_slc[ii])
-                )
-            raise ValueError("\n".join(errmsg))
+                errmsg.append('The derivative of RV `{}`.ravel()[{}]'
+                              ' is non-finite.'.format(*name_slc[ii]))
+            raise ValueError('\n'.join(errmsg))
 
 
 class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt):
@@ -330,26 +321,25 @@ def update(self, sample, grad, tune):
 class _WeightedVariance:
     """Online algorithm for computing mean of variance."""
 
-    def __init__(
-        self, nelem, initial_mean=None, initial_variance=None, initial_weight=0, dtype="d"
-    ):
+    def __init__(self, nelem, initial_mean=None, initial_variance=None,
+                 initial_weight=0, dtype='d'):
         self._dtype = dtype
         self.n_samples = float(initial_weight)
         if initial_mean is None:
-            self.mean = np.zeros(nelem, dtype="d")
+            self.mean = np.zeros(nelem, dtype='d')
         else:
-            self.mean = np.array(initial_mean, dtype="d", copy=True)
+            self.mean = np.array(initial_mean, dtype='d', copy=True)
         if initial_variance is None:
-            self.raw_var = np.zeros(nelem, dtype="d")
+            self.raw_var = np.zeros(nelem, dtype='d')
         else:
-            self.raw_var = np.array(initial_variance, dtype="d", copy=True)
+            self.raw_var = np.array(initial_variance, dtype='d', copy=True)
 
         self.raw_var[:] *= self.n_samples
 
         if self.raw_var.shape != (nelem,):
-            raise ValueError("Invalid shape for initial variance.")
+            raise ValueError('Invalid shape for initial variance.')
         if self.mean.shape != (nelem,):
-            raise ValueError("Invalid shape for initial mean.")
+            raise ValueError('Invalid shape for initial mean.')
 
     def add_sample(self, x, weight):
         x = np.asarray(x)
@@ -357,11 +347,11 @@ def add_sample(self, x, weight):
         old_diff = x - self.mean
         self.mean[:] += old_diff / self.n_samples
         new_diff = x - self.mean
-        self.raw_var[:] += weight * old_diff * new_diff
+        self.raw_var[:] +=  weight * old_diff * new_diff
 
     def current_variance(self, out=None):
         if self.n_samples == 0:
-            raise ValueError("Can not compute variance without samples.")
+            raise ValueError('Can not compute variance without samples.')
         if out is not None:
             return np.divide(self.raw_var, self.n_samples, out=out)
         else:
@@ -386,10 +376,10 @@ def __init__(self, v, dtype=None):
             dtype = theano.config.floatX
         self.dtype = dtype
         v = v.astype(self.dtype)
-        s = v ** 0.5
+        s = v ** .5
 
         self.s = s
-        self.inv_s = 1.0 / s
+        self.inv_s = 1. / s
         self.v = v
 
     def velocity(self, x, out=None):
@@ -407,7 +397,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is not None:
             return 0.5 * np.dot(x, velocity)
-        return 0.5 * x.dot(self.v * x)
+        return .5 * x.dot(self.v * x)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -447,7 +437,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is None:
             velocity = self.velocity(x)
-        return 0.5 * x.dot(velocity)
+        return .5 * x.dot(velocity)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -480,7 +470,8 @@ def velocity(self, x, out=None):
     def random(self):
         """Draw random value from QuadPotential."""
         vals = np.random.normal(size=self._n).astype(self.dtype)
-        return scipy.linalg.solve_triangular(self._chol.T, vals, overwrite_b=True)
+        return scipy.linalg.solve_triangular(self._chol.T, vals,
+                                             overwrite_b=True)
 
     def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
@@ -498,7 +489,6 @@ def velocity_energy(self, x, v_out):
 
 class QuadPotentialFullAdapt(QuadPotentialFull):
     """Adapt a dense mass matrix using the sample covariances."""
-
     def __init__(
         self,
         n,
@@ -518,11 +508,13 @@ def __init__(
             raise ValueError("Initial mean must be one-dimensional.")
         if initial_cov is not None and initial_cov.shape != (n, n):
             raise ValueError(
-                "Wrong shape for initial_cov: expected %s got %s" % (n, initial_cov.shape)
+                "Wrong shape for initial_cov: expected %s got %s"
+                % (n, initial_cov.shape)
             )
         if len(initial_mean) != n:
             raise ValueError(
-                "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean))
+                "Wrong shape for initial_mean: expected %s got %s"
+                % (n, len(initial_mean))
             )
 
         if dtype is None:
@@ -581,7 +573,9 @@ def update(self, sample, grad, tune):
         # window.
         if delta >= self.adaptation_window:
             self._foreground_cov = self._background_cov
-            self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype)
+            self._background_cov = _WeightedCovariance(
+                self._n, dtype=self.dtype
+            )
 
             self._previous_update = self._n_samples
             self.adaptation_window = int(self.adaptation_window * self.adaptation_window_multiplier)
@@ -651,13 +645,12 @@ def current_mean(self):
 
 try:
     import sksparse.cholmod as cholmod
-
     chol_available = True
 except ImportError:
     chol_available = False
 
 if chol_available:
-    __all__ += ["QuadPotentialSparse"]
+    __all__ += ['QuadPotentialSparse']
 
     import theano.sparse
 
@@ -691,3 +684,10 @@ def random(self):
         def energy(self, x):
             """Compute kinetic energy at a position in parameter space."""
             return 0.5 * x.T.dot(self.velocity(x))
+            n = self.factor.solve_Lt(n)
+            n = self.factor.apply_Pt(n)
+            return n
+
+        def energy(self, x):
+            """Compute kinetic energy at a position in parameter space."""
+            return 0.5 * x.T.dot(self.velocity(x))

From 1f2b8568398bcf683b18ea6d7cf1915759e73f30 Mon Sep 17 00:00:00 2001
From: Ali Akbar S <mail.septiandri@gmail.com>
Date: Sun, 27 Sep 2020 13:17:27 +0100
Subject: [PATCH 4/7] Update pymc3/distributions/bound.py

Co-authored-by: Marco Gorelli <m.e.gorelli@gmail.com>
---
 pymc3/distributions/bound.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index b52dc905b1..11fc9ddafe 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -82,7 +82,7 @@ def _random(self, lower, upper, point=None, size=None):
         upper = np.asarray(upper)
         if lower.size > 1 or upper.size > 1:
             raise ValueError(
-                "Drawing samples from distributions with " "array-valued bounds is not supported."
+                "Drawing samples from distributions with array-valued bounds is not supported."
             )
         total_size = np.prod(size).astype(np.int)
         samples = []

From 53b42a11bcc919401ecc90f56ac8510c9a2318f1 Mon Sep 17 00:00:00 2001
From: Ali Akbar Septiandri <mail.septiandri@gmail.com>
Date: Sun, 27 Sep 2020 13:28:21 +0100
Subject: [PATCH 5/7] Revert blackened metropolis.py

---
 pymc3/step_methods/metropolis.py | 309 +++++++++++++------------------
 1 file changed, 126 insertions(+), 183 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 89b0ff63af..383536725d 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -18,29 +18,13 @@
 import scipy.linalg
 
 from ..distributions import draw_values
-from .arraystep import (
-    ArrayStepShared,
-    PopulationArrayStepShared,
-    ArrayStep,
-    metrop_select,
-    Competence,
-)
+from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-__all__ = [
-    "Metropolis",
-    "DEMetropolis",
-    "DEMetropolisZ",
-    "BinaryMetropolis",
-    "BinaryGibbsMetropolis",
-    "CategoricalGibbsMetropolis",
-    "NormalProposal",
-    "CauchyProposal",
-    "LaplaceProposal",
-    "PoissonProposal",
-    "MultivariateNormalProposal",
-]
+__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
+           'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
+           'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
 
 # Available proposal distributions for Metropolis
 
@@ -117,32 +101,19 @@ class Metropolis(ArrayStepShared):
     mode:  string or `Mode` instance.
         compilation mode passed to Theano functions
     """
-
-    name = "metropolis"
+    name = 'metropolis'
 
     default_blocked = False
     generates_stats = True
-    stats_dtypes = [
-        {
-            "accept": np.float64,
-            "accepted": np.bool,
-            "tune": np.bool,
-            "scaling": np.float64,
-        }
-    ]
-
-    def __init__(
-        self,
-        vars=None,
-        S=None,
-        proposal_dist=None,
-        scaling=1.0,
-        tune=True,
-        tune_interval=100,
-        model=None,
-        mode=None,
-        **kwargs
-    ):
+    stats_dtypes = [{
+        'accept': np.float64,
+        'accepted': np.bool,
+        'tune': np.bool,
+        'scaling': np.float64,
+    }]
+
+    def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
+                 tune=True, tune_interval=100, model=None, mode=None, **kwargs):
 
         model = pm.modelcontext(model)
 
@@ -162,7 +133,7 @@ def __init__(
         else:
             raise ValueError("Invalid rank for variance: %s" % S.ndim)
 
-        self.scaling = np.atleast_1d(scaling).astype("d")
+        self.scaling = np.atleast_1d(scaling).astype('d')
         self.tune = tune
         self.tune_interval = tune_interval
         self.steps_until_tune = tune_interval
@@ -170,14 +141,15 @@ def __init__(
 
         # Determine type of variables
         self.discrete = np.concatenate(
-            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]
-        )
+            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars])
         self.any_discrete = self.discrete.any()
         self.all_discrete = self.discrete.all()
 
         # remember initial settings before tuning so they can be reset
         self._untuned_settings = dict(
-            scaling=self.scaling, steps_until_tune=tune_interval, accepted=self.accepted
+            scaling=self.scaling,
+            steps_until_tune=tune_interval,
+            accepted=self.accepted
         )
 
         self.mode = mode
@@ -195,7 +167,8 @@ def reset_tuning(self):
     def astep(self, q0):
         if not self.steps_until_tune and self.tune:
             # Tune scaling parameter
-            self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
+            self.scaling = tune(
+                self.scaling, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
             self.accepted = 0
@@ -204,12 +177,13 @@ def astep(self, q0):
 
         if self.any_discrete:
             if self.all_discrete:
-                delta = np.round(delta, 0).astype("int64")
-                q0 = q0.astype("int64")
-                q = (q0 + delta).astype("int64")
+                delta = np.round(delta, 0).astype('int64')
+                q0 = q0.astype('int64')
+                q = (q0 + delta).astype('int64')
             else:
-                delta[self.discrete] = np.round(delta[self.discrete], 0)
-                q = q0 + delta
+                delta[self.discrete] = np.round(
+                    delta[self.discrete], 0)
+                q = (q0 + delta)
         else:
             q = floatX(q0 + delta)
 
@@ -220,10 +194,10 @@ def astep(self, q0):
         self.steps_until_tune -= 1
 
         stats = {
-            "tune": self.tune,
-            "scaling": self.scaling,
-            "accept": np.exp(accept),
-            "accepted": accepted,
+            'tune': self.tune,
+            'scaling': self.scaling,
+            'accept': np.exp(accept),
+            'accepted': accepted,
         }
 
         return q_new, [stats]
@@ -287,19 +261,16 @@ class BinaryMetropolis(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-
-    name = "binary_metropolis"
+    name = 'binary_metropolis'
 
     generates_stats = True
-    stats_dtypes = [
-        {
-            "accept": np.float64,
-            "tune": np.bool,
-            "p_jump": np.float64,
-        }
-    ]
+    stats_dtypes = [{
+        'accept': np.float64,
+        'tune': np.bool,
+        'p_jump': np.float64,
+    }]
 
-    def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None):
+    def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None):
 
         model = pm.modelcontext(model)
 
@@ -310,19 +281,20 @@ def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None):
         self.accepted = 0
 
         if not all([v.dtype in pm.discrete_types for v in vars]):
-            raise ValueError("All variables must be Bernoulli for BinaryMetropolis")
+            raise ValueError(
+                'All variables must be Bernoulli for BinaryMetropolis')
 
         super().__init__(vars, [model.fastlogp])
 
     def astep(self, q0, logp):
 
         # Convert adaptive_scale_factor to a jump probability
-        p_jump = 1.0 - 0.5 ** self.scaling
+        p_jump = 1. - .5 ** self.scaling
 
         rand_array = nr.random(q0.shape)
         q = np.copy(q0)
         # Locations where switches occur, according to p_jump
-        switch_locs = rand_array < p_jump
+        switch_locs = (rand_array < p_jump)
         q[switch_locs] = True - q[switch_locs]
 
         accept = logp(q) - logp(q0)
@@ -330,20 +302,21 @@ def astep(self, q0, logp):
         self.accepted += accepted
 
         stats = {
-            "tune": self.tune,
-            "accept": np.exp(accept),
-            "p_jump": p_jump,
+            'tune': self.tune,
+            'accept': np.exp(accept),
+            'p_jump': p_jump,
         }
 
         return q_new, [stats]
 
     @staticmethod
     def competence(var):
-        """
+        '''
         BinaryMetropolis is only suitable for binary (bool)
         and Categorical variables with k=1.
-        """
-        distribution = getattr(var.distribution, "parent_dist", var.distribution)
+        '''
+        distribution = getattr(
+            var.distribution, 'parent_dist', var.distribution)
         if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types):
             return Competence.COMPATIBLE
         elif isinstance(distribution, pm.Categorical) and (distribution.k == 2):
@@ -368,10 +341,9 @@ class BinaryGibbsMetropolis(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
+    name = 'binary_gibbs_metropolis'
 
-    name = "binary_gibbs_metropolis"
-
-    def __init__(self, vars, order="random", transit_p=0.8, model=None):
+    def __init__(self, vars, order='random', transit_p=.8, model=None):
 
         model = pm.modelcontext(model)
 
@@ -380,17 +352,18 @@ def __init__(self, vars, order="random", transit_p=0.8, model=None):
 
         self.dim = sum(v.dsize for v in vars)
 
-        if order == "random":
+        if order == 'random':
             self.shuffle_dims = True
             self.order = list(range(self.dim))
         else:
             if sorted(order) != list(range(self.dim)):
-                raise ValueError("Argument 'order' has to be a permutation")
+                raise ValueError('Argument \'order\' has to be a permutation')
             self.shuffle_dims = False
             self.order = order
 
         if not all([v.dtype in pm.discrete_types for v in vars]):
-            raise ValueError("All variables must be binary for BinaryGibbsMetropolis")
+            raise ValueError(
+                'All variables must be binary for BinaryGibbsMetropolis')
 
         super().__init__(vars, [model.fastlogp])
 
@@ -416,11 +389,12 @@ def astep(self, q0, logp):
 
     @staticmethod
     def competence(var):
-        """
+        '''
         BinaryMetropolis is only suitable for Bernoulli
         and Categorical variables with k=2.
-        """
-        distribution = getattr(var.distribution, "parent_dist", var.distribution)
+        '''
+        distribution = getattr(
+            var.distribution, 'parent_dist', var.distribution)
         if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types):
             return Competence.IDEAL
         elif isinstance(distribution, pm.Categorical) and (distribution.k == 2):
@@ -430,16 +404,15 @@ def competence(var):
 
 class CategoricalGibbsMetropolis(ArrayStep):
     """A Metropolis-within-Gibbs step method optimized for categorical variables.
-    This step method works for Bernoulli variables as well, but it is not
-    optimized for them, like BinaryGibbsMetropolis is. Step method supports
-    two types of proposals: A uniform proposal and a proportional proposal,
-    which was introduced by Liu in his 1996 technical report
-    "Metropolized Gibbs Sampler: An Improvement".
+       This step method works for Bernoulli variables as well, but it is not
+       optimized for them, like BinaryGibbsMetropolis is. Step method supports
+       two types of proposals: A uniform proposal and a proportional proposal,
+       which was introduced by Liu in his 1996 technical report
+       "Metropolized Gibbs Sampler: An Improvement".
     """
+    name = 'categorical_gibbs_metropolis'
 
-    name = "categorical_gibbs_metropolis"
-
-    def __init__(self, vars, proposal="uniform", order="random", model=None):
+    def __init__(self, vars, proposal='uniform', order='random', model=None):
 
         model = pm.modelcontext(model)
         vars = pm.inputvars(vars)
@@ -450,36 +423,34 @@ def __init__(self, vars, proposal="uniform", order="random", model=None):
         # variable with M categories and y being a 3-D variable with N
         # categories, we will have dimcats = [(0, M), (1, M), (2, N), (3, N), (4, N)].
         for v in vars:
-            distr = getattr(v.distribution, "parent_dist", v.distribution)
+            distr = getattr(v.distribution, 'parent_dist', v.distribution)
             if isinstance(distr, pm.Categorical):
                 k = draw_values([distr.k])[0]
             elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types):
                 k = 2
             else:
-                raise ValueError(
-                    "All variables must be categorical or binary" + "for CategoricalGibbsMetropolis"
-                )
+                raise ValueError('All variables must be categorical or binary' +
+                                 'for CategoricalGibbsMetropolis')
             start = len(dimcats)
             dimcats += [(dim, k) for dim in range(start, start + v.dsize)]
 
-        if order == "random":
+        if order == 'random':
             self.shuffle_dims = True
             self.dimcats = dimcats
         else:
             if sorted(order) != list(range(len(dimcats))):
-                raise ValueError("Argument 'order' has to be a permutation")
+                raise ValueError('Argument \'order\' has to be a permutation')
             self.shuffle_dims = False
             self.dimcats = [dimcats[j] for j in order]
 
-        if proposal == "uniform":
+        if proposal == 'uniform':
             self.astep = self.astep_unif
-        elif proposal == "proportional":
+        elif proposal == 'proportional':
             # Use the optimized "Metropolized Gibbs Sampler" described in Liu96.
             self.astep = self.astep_prop
         else:
-            raise ValueError(
-                "Argument 'proposal' should either be " + "'uniform' or 'proportional'"
-            )
+            raise ValueError('Argument \'proposal\' should either be ' +
+                    '\'uniform\' or \'proportional\'')
 
         super().__init__(vars, [model.fastlogp])
 
@@ -523,8 +494,8 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
                 log_probs[candidate_cat] = logp(q)
         probs = softmax(log_probs)
         prob_curr, probs[given_cat] = probs[given_cat], 0.0
-        probs /= 1.0 - prob_curr
-        proposed_cat = nr.choice(candidates, p=probs)
+        probs /= (1.0 - prob_curr)
+        proposed_cat = nr.choice(candidates, p = probs)
         accept_ratio = (1.0 - prob_curr) / (1.0 - probs[proposed_cat])
         if not np.isfinite(accept_ratio) or nr.uniform() >= accept_ratio:
             q[dim] = given_cat
@@ -534,11 +505,12 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
 
     @staticmethod
     def competence(var):
-        """
+        '''
         CategoricalGibbsMetropolis is only suitable for Bernoulli and
         Categorical variables.
-        """
-        distribution = getattr(var.distribution, "parent_dist", var.distribution)
+        '''
+        distribution = getattr(
+            var.distribution, 'parent_dist', var.distribution)
         if isinstance(distribution, pm.Categorical):
             if distribution.k > 2:
                 return Competence.IDEAL
@@ -582,34 +554,20 @@ class DEMetropolis(PopulationArrayStepShared):
         Statistics and Computing
         `link <https://doi.org/10.1007/s11222-006-8769-1>`__
     """
-
-    name = "DEMetropolis"
+    name = 'DEMetropolis'
 
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [
-        {
-            "accept": np.float64,
-            "accepted": np.bool,
-            "tune": np.bool,
-            "scaling": np.float64,
-            "lambda": np.float64,
-        }
-    ]
-
-    def __init__(
-        self,
-        vars=None,
-        S=None,
-        proposal_dist=None,
-        lamb=None,
-        scaling=0.001,
-        tune=None,
-        tune_interval=100,
-        model=None,
-        mode=None,
-        **kwargs
-    ):
+    stats_dtypes = [{
+        'accept': np.float64,
+        'accepted': np.bool,
+        'tune': np.bool,
+        'scaling': np.float64,
+        'lambda': np.float64,
+    }]
+
+    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
+                 tune=None, tune_interval=100, model=None, mode=None, **kwargs):
 
         model = pm.modelcontext(model)
 
@@ -625,12 +583,12 @@ def __init__(
         else:
             self.proposal_dist = UniformProposal(S)
 
-        self.scaling = np.atleast_1d(scaling).astype("d")
+        self.scaling = np.atleast_1d(scaling).astype('d')
         if lamb is None:
             # default to the optimal lambda for normally distributed targets
             lamb = 2.38 / np.sqrt(2 * model.ndim)
         self.lamb = float(lamb)
-        if tune not in {None, "scaling", "lambda"}:
+        if tune not in {None, 'scaling', 'lambda'}:
             raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
         self.tune = tune
         self.tune_interval = tune_interval
@@ -645,9 +603,9 @@ def __init__(
 
     def astep(self, q0):
         if not self.steps_until_tune and self.tune:
-            if self.tune == "scaling":
+            if self.tune == 'scaling':
                 self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
-            elif self.tune == "lambda":
+            elif self.tune == 'lambda':
                 self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
@@ -670,11 +628,11 @@ def astep(self, q0):
         self.steps_until_tune -= 1
 
         stats = {
-            "tune": self.tune,
-            "scaling": self.scaling,
-            "lambda": self.lamb,
-            "accept": np.exp(accept),
-            "accepted": accepted,
+            'tune': self.tune,
+            'scaling': self.scaling,
+            'lambda': self.lamb,
+            'accept': np.exp(accept),
+            'accepted': accepted
         }
 
         return q_new, [stats]
@@ -723,35 +681,20 @@ class DEMetropolisZ(ArrayStepShared):
         Statistics and Computing
         `link <https://doi.org/10.1007/s11222-008-9104-9>`__
     """
-
-    name = "DEMetropolisZ"
+    name = 'DEMetropolisZ'
 
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [
-        {
-            "accept": np.float64,
-            "accepted": np.bool,
-            "tune": np.bool,
-            "scaling": np.float64,
-            "lambda": np.float64,
-        }
-    ]
-
-    def __init__(
-        self,
-        vars=None,
-        S=None,
-        proposal_dist=None,
-        lamb=None,
-        scaling=0.001,
-        tune="lambda",
-        tune_interval=100,
-        tune_drop_fraction: float = 0.9,
-        model=None,
-        mode=None,
-        **kwargs
-    ):
+    stats_dtypes = [{
+        'accept': np.float64,
+        'accepted': np.bool,
+        'tune': np.bool,
+        'scaling': np.float64,
+        'lambda': np.float64,
+    }]
+
+    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
+                 tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):
         model = pm.modelcontext(model)
 
         if vars is None:
@@ -760,18 +703,18 @@ def __init__(
 
         if S is None:
             S = np.ones(model.ndim)
-
+        
         if proposal_dist is not None:
             self.proposal_dist = proposal_dist(S)
         else:
             self.proposal_dist = UniformProposal(S)
 
-        self.scaling = np.atleast_1d(scaling).astype("d")
+        self.scaling = np.atleast_1d(scaling).astype('d')
         if lamb is None:
             # default to the optimal lambda for normally distributed targets
             lamb = 2.38 / np.sqrt(2 * model.ndim)
         self.lamb = float(lamb)
-        if tune not in {None, "scaling", "lambda"}:
+        if tune not in {None, 'scaling', 'lambda'}:
             raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
         self.tune = True
         self.tune_target = tune
@@ -787,7 +730,7 @@ def __init__(
             scaling=self.scaling,
             lamb=self.lamb,
             steps_until_tune=tune_interval,
-            accepted=self.accepted,
+            accepted=self.accepted
         )
 
         self.mode = mode
@@ -807,9 +750,9 @@ def reset_tuning(self):
     def astep(self, q0):
         # same tuning scheme as DEMetropolis
         if not self.steps_until_tune and self.tune:
-            if self.tune_target == "scaling":
+            if self.tune_target == 'scaling':
                 self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
-            elif self.tune_target == "lambda":
+            elif self.tune_target == 'lambda':
                 self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
@@ -826,7 +769,7 @@ def astep(self, q0):
             iz2 = np.random.randint(it)
             while iz2 == iz1:
                 iz2 = np.random.randint(it)
-
+            
             z1 = self._history[iz1]
             z2 = self._history[iz2]
             # propose a jump
@@ -843,11 +786,11 @@ def astep(self, q0):
         self.steps_until_tune -= 1
 
         stats = {
-            "tune": self.tune,
-            "scaling": self.scaling,
-            "lambda": self.lamb,
-            "accept": np.exp(accept),
-            "accepted": accepted,
+            'tune': self.tune,
+            'scaling': self.scaling,
+            'lambda': self.lamb,
+            'accept': np.exp(accept),
+            'accepted': accepted
         }
 
         return q_new, [stats]
@@ -877,14 +820,14 @@ def sample_except(limit, excluded):
 
 def softmax(x):
     e_x = np.exp(x - np.max(x))
-    return e_x / np.sum(e_x, axis=0)
+    return e_x / np.sum(e_x, axis = 0)
 
 
 def delta_logp(logp, vars, shared):
     [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared)
 
     tensor_type = inarray0.type
-    inarray1 = tensor_type("inarray1")
+    inarray1 = tensor_type('inarray1')
 
     logp1 = pm.CallableTensor(logp0)(inarray1)
 

From 1e2db79b4e08d0ed5de6cb449bd73a5a404c6d3f Mon Sep 17 00:00:00 2001
From: Ali Akbar Septiandri <mail.septiandri@gmail.com>
Date: Sun, 27 Sep 2020 13:32:41 +0100
Subject: [PATCH 6/7] Revert blackened files

---
 pymc3/distributions/discrete.py     | 251 +++++++------
 pymc3/distributions/multivariate.py | 543 +++++++++++++---------------
 2 files changed, 393 insertions(+), 401 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index cfe87ed3d4..090949c905 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -24,23 +24,10 @@
 from ..theanof import floatX, intX, take_along_axis
 
 
-__all__ = [
-    "Binomial",
-    "BetaBinomial",
-    "Bernoulli",
-    "DiscreteWeibull",
-    "Poisson",
-    "NegativeBinomial",
-    "ConstantDist",
-    "Constant",
-    "ZeroInflatedPoisson",
-    "ZeroInflatedBinomial",
-    "ZeroInflatedNegativeBinomial",
-    "DiscreteUniform",
-    "Geometric",
-    "Categorical",
-    "OrderedLogistic",
-]
+__all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'DiscreteWeibull',
+           'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
+           'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
+           'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic']
 
 
 class Binomial(Discrete):
@@ -109,7 +96,9 @@ def random(self, point=None, size=None):
         array
         """
         n, p = draw_values([self.n, self.p], point=point, size=size)
-        return generate_samples(stats.binom.rvs, n=n, p=p, dist_shape=self.shape, size=size)
+        return generate_samples(stats.binom.rvs, n=n, p=p,
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -130,11 +119,8 @@ def logp(self, value):
 
         return bound(
             binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value),
-            0 <= value,
-            value <= n,
-            0 <= p,
-            p <= 1,
-        )
+            0 <= value, value <= n,
+            0 <= p, p <= 1)
 
 
 class BetaBinomial(Discrete):
@@ -197,7 +183,7 @@ def __init__(self, alpha, beta, n, *args, **kwargs):
         self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
         self.beta = beta = tt.as_tensor_variable(floatX(beta))
         self.n = n = tt.as_tensor_variable(intX(n))
-        self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8")
+        self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8')
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -211,11 +197,8 @@ def _random(self, alpha, beta, n, size=None):
 
         quotient, remainder = divmod(_p.shape[0], _n.shape[0])
         if remainder != 0:
-            raise TypeError(
-                "n has a bad size! Was cast to {}, must evenly divide {}".format(
-                    _n.shape[0], _p.shape[0]
-                )
-            )
+            raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format(
+                _n.shape[0], _p.shape[0]))
         if quotient != 1:
             _n = np.tile(_n, quotient)
         samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size)
@@ -238,10 +221,11 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size)
-        return generate_samples(
-            self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size
-        )
+        alpha, beta, n = \
+            draw_values([self.alpha, self.beta, self.n], point=point, size=size)
+        return generate_samples(self._random, alpha=alpha, beta=beta, n=n,
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -259,15 +243,11 @@ def logp(self, value):
         """
         alpha = self.alpha
         beta = self.beta
-        return bound(
-            binomln(self.n, value)
-            + betaln(value + alpha, self.n - value + beta)
-            - betaln(alpha, beta),
-            value >= 0,
-            value <= self.n,
-            alpha > 0,
-            beta > 0,
-        )
+        return bound(binomln(self.n, value)
+                     + betaln(value + alpha, self.n - value + beta)
+                     - betaln(alpha, beta),
+                     value >= 0, value <= self.n,
+                     alpha > 0, beta > 0)
 
 
 class Bernoulli(Discrete):
@@ -313,7 +293,7 @@ class Bernoulli(Discrete):
     def __init__(self, p=None, logit_p=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if sum(int(var is None) for var in [p, logit_p]) != 1:
-            raise ValueError("Specify one of p and logit_p")
+            raise ValueError('Specify one of p and logit_p')
         if p is not None:
             self._is_logit = False
             self.p = p = tt.as_tensor_variable(floatX(p))
@@ -323,7 +303,7 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs):
             self.p = tt.nnet.sigmoid(floatX(logit_p))
             self._logit_p = tt.as_tensor_variable(logit_p)
 
-        self.mode = tt.cast(tround(self.p), "int8")
+        self.mode = tt.cast(tround(self.p), 'int8')
 
     def random(self, point=None, size=None):
         r"""
@@ -343,7 +323,9 @@ def random(self, point=None, size=None):
         array
         """
         p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(stats.bernoulli.rvs, p, dist_shape=self.shape, size=size)
+        return generate_samples(stats.bernoulli.rvs, p,
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -365,8 +347,9 @@ def logp(self, value):
         else:
             p = self.p
             return bound(
-                tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1
-            )
+                tt.switch(value, tt.log(p), tt.log(1 - p)),
+                value >= 0, value <= 1,
+                p >= 0, p <= 1)
 
     def _distr_parameters_for_repr(self):
         return ["p"]
@@ -410,9 +393,8 @@ def DiscreteWeibull(q, b, x):
     Variance  :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2`
     ========  ======================
     """
-
     def __init__(self, q, beta, *args, **kwargs):
-        super().__init__(*args, defaults=("median",), **kwargs)
+        super().__init__(*args, defaults=('median',), **kwargs)
 
         self.q = q = tt.as_tensor_variable(floatX(q))
         self.beta = beta = tt.as_tensor_variable(floatX(beta))
@@ -436,13 +418,10 @@ def logp(self, value):
         q = self.q
         beta = self.beta
 
-        return bound(
-            tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
-            0 <= value,
-            0 < q,
-            q < 1,
-            0 < beta,
-        )
+        return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
+                     0 <= value,
+                     0 < q, q < 1,
+                     0 < beta)
 
     def _ppf(self, p):
         r"""
@@ -452,12 +431,12 @@ def _ppf(self, p):
         q = self.q
         beta = self.beta
 
-        return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64")
+        return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64')
 
     def _random(self, q, beta, size=None):
         p = np.random.uniform(size=size)
 
-        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1
+        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1
 
     def random(self, point=None, size=None):
         r"""
@@ -478,7 +457,9 @@ def random(self, point=None, size=None):
         """
         q, beta = draw_values([self.q, self.beta], point=point, size=size)
 
-        return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size)
+        return generate_samples(self._random, q, beta,
+                                dist_shape=self.shape,
+                                size=size)
 
 
 class Poisson(Discrete):
@@ -548,7 +529,9 @@ def random(self, point=None, size=None):
         array
         """
         mu = draw_values([self.mu], point=point, size=size)[0]
-        return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size)
+        return generate_samples(stats.poisson.rvs, mu,
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -565,9 +548,12 @@ def logp(self, value):
         TensorVariable
         """
         mu = self.mu
-        log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0)
+        log_prob = bound(
+            logpow(mu, value) - factln(value) - mu,
+            mu >= 0, value >= 0)
         # Return zero when mu and value are both zero
-        return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob)
+        return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0),
+                         0, log_prob)
 
 
 class NegativeBinomial(Discrete):
@@ -644,12 +630,14 @@ def random(self, point=None, size=None):
         array
         """
         mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size)
-        g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
+        g = generate_samples(self._random, mu=mu, alpha=alpha,
+                             dist_shape=self.shape,
+                             size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
         return np.asarray(stats.poisson.rvs(g)).reshape(g.shape)
 
     def _random(self, mu, alpha, size):
-        r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's
+        r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's
         parametrization to scipy.gamma. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -676,17 +664,15 @@ def logp(self, value):
         """
         mu = self.mu
         alpha = self.alpha
-        negbinom = bound(
-            binomln(value + alpha - 1, value)
-            + logpow(mu / (mu + alpha), value)
-            + logpow(alpha / (mu + alpha), alpha),
-            value >= 0,
-            mu > 0,
-            alpha > 0,
-        )
+        negbinom = bound(binomln(value + alpha - 1, value)
+                         + logpow(mu / (mu + alpha), value)
+                         + logpow(alpha / (mu + alpha), alpha),
+                         value >= 0, mu > 0, alpha > 0)
 
         # Return Poisson when alpha gets very large.
-        return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom)
+        return tt.switch(tt.gt(alpha, 1e10),
+                         Poisson.dist(self.mu).logp(value),
+                         negbinom)
 
 
 class Geometric(Discrete):
@@ -749,7 +735,9 @@ def random(self, point=None, size=None):
         array
         """
         p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size)
+        return generate_samples(np.random.geometric, p,
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -766,7 +754,8 @@ def logp(self, value):
         TensorVariable
         """
         p = self.p
-        return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
+        return bound(tt.log(p) + logpow(1 - p, value - 1),
+                     0 <= p, p <= 1, value >= 1)
 
 
 class DiscreteUniform(Discrete):
@@ -812,7 +801,8 @@ def __init__(self, lower, upper, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.lower = intX(tt.floor(lower))
         self.upper = intX(tt.floor(upper))
-        self.mode = tt.maximum(intX(tt.floor((upper + lower) / 2.0)), self.lower)
+        self.mode = tt.maximum(
+            intX(tt.floor((upper + lower) / 2.)), self.lower)
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -838,7 +828,10 @@ def random(self, point=None, size=None):
         array
         """
         lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
-        return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size)
+        return generate_samples(self._random,
+                                lower, upper,
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -856,7 +849,8 @@ def logp(self, value):
         """
         upper = self.upper
         lower = self.lower
-        return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper)
+        return bound(-tt.log(upper - lower + 1),
+                     lower <= value, value <= upper)
 
 
 class Categorical(Discrete):
@@ -929,13 +923,11 @@ def random(self, point=None, size=None):
         p, k = draw_values([self.p, self.k], point=point, size=size)
         p = p / np.sum(p, axis=-1, keepdims=True)
 
-        return generate_samples(
-            random_choice,
-            p=p,
-            broadcast_shape=p.shape[:-1] or (1,),
-            dist_shape=self.shape,
-            size=size,
-        )
+        return generate_samples(random_choice,
+                                p=p,
+                                broadcast_shape=p.shape[:-1] or (1,),
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         r"""
@@ -961,9 +953,13 @@ def logp(self, value):
 
         if p.ndim > 1:
             if p.ndim > value_clip.ndim:
-                value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
+                value_clip = tt.shape_padleft(
+                    value_clip, p_.ndim - value_clip.ndim
+                )
             elif p.ndim < value_clip.ndim:
-                p = tt.shape_padleft(p, value_clip.ndim - p_.ndim)
+                p = tt.shape_padleft(
+                    p, value_clip.ndim - p_.ndim
+                )
             pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
             a = tt.log(
                 take_along_axis(
@@ -974,9 +970,8 @@ def logp(self, value):
         else:
             a = tt.log(p[value_clip])
 
-        return bound(
-            a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)
-        )
+        return bound(a, value >= 0, value <= (k - 1),
+                     tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
 
 
 class Constant(Discrete):
@@ -990,10 +985,8 @@ class Constant(Discrete):
     """
 
     def __init__(self, c, *args, **kwargs):
-        warnings.warn(
-            "Constant has been deprecated. We recommend using a Deterministic object instead.",
-            DeprecationWarning,
-        )
+        warnings.warn("Constant has been deprecated. We recommend using a Deterministic object instead.",
+                    DeprecationWarning)
         super().__init__(*args, **kwargs)
         self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c)
 
@@ -1020,7 +1013,8 @@ def random(self, point=None, size=None):
         def _random(c, dtype=dtype, size=None):
             return np.full(size, fill_value=c, dtype=dtype)
 
-        return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype)
+        return generate_samples(_random, c=c, dist_shape=self.shape,
+                                size=size).astype(dtype)
 
     def logp(self, value):
         r"""
@@ -1118,7 +1112,9 @@ def random(self, point=None, size=None):
         array
         """
         theta, psi = draw_values([self.theta, self.psi], point=point, size=size)
-        g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size)
+        g = generate_samples(stats.poisson.rvs, theta,
+                             dist_shape=self.shape,
+                             size=size)
         g, psi = broadcast_distribution_samples([g, psi], size=size)
         return g * (np.random.random(g.shape) < psi)
 
@@ -1142,10 +1138,13 @@ def logp(self, value):
         logp_val = tt.switch(
             tt.gt(value, 0),
             tt.log(psi) + self.pois.logp(value),
-            logaddexp(tt.log1p(-psi), tt.log(psi) - theta),
-        )
+            logaddexp(tt.log1p(-psi), tt.log(psi) - theta))
 
-        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta)
+        return bound(
+            logp_val,
+            0 <= value,
+            0 <= psi, psi <= 1,
+            0 <= theta)
 
 
 class ZeroInflatedBinomial(Discrete):
@@ -1225,7 +1224,9 @@ def random(self, point=None, size=None):
         array
         """
         n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size)
-        g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size)
+        g = generate_samples(stats.binom.rvs, n, p,
+                             dist_shape=self.shape,
+                             size=size)
         g, psi = broadcast_distribution_samples([g, psi], size=size)
         return g * (np.random.random(g.shape) < psi)
 
@@ -1250,10 +1251,13 @@ def logp(self, value):
         logp_val = tt.switch(
             tt.gt(value, 0),
             tt.log(psi) + self.bin.logp(value),
-            logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)),
-        )
+            logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)))
 
-        return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1)
+        return bound(
+            logp_val,
+            0 <= value, value <= n,
+            0 <= psi, psi <= 1,
+            0 <= p, p <= 1)
 
 
 class ZeroInflatedNegativeBinomial(Discrete):
@@ -1349,14 +1353,21 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size)
-        g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
+        mu, alpha, psi = draw_values(
+            [self.mu, self.alpha, self.psi], point=point, size=size)
+        g = generate_samples(
+            self._random,
+            mu=mu,
+            alpha=alpha,
+            dist_shape=self.shape,
+            size=size
+        )
         g[g == 0] = np.finfo(float).eps  # Just in case
         g, psi = broadcast_distribution_samples([g, psi], size=size)
         return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi)
 
     def _random(self, mu, alpha, size):
-        r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's
+        r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's
         parametrization to scipy.gamma. All parameter arrays should have
         been broadcasted properly by generate_samples at this point and size is
         the scipy.rvs representation.
@@ -1387,12 +1398,19 @@ def logp(self, value):
 
         logp_other = tt.log(psi) + self.nb.logp(value)
         logp_0 = logaddexp(
-            tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))
-        )
+            tt.log1p(-psi),
+            tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)))
 
-        logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0)
+        logp_val = tt.switch(
+            tt.gt(value, 0),
+            logp_other,
+            logp_0)
 
-        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0)
+        return bound(
+            logp_val,
+            0 <= value,
+            0 <= psi, psi <= 1,
+            mu > 0, alpha > 0)
 
 
 class OrderedLogistic(Categorical):
@@ -1466,14 +1484,11 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
         self.cutpoints = tt.as_tensor_variable(cutpoints)
 
         pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta))
-        p_cum = tt.concatenate(
-            [
-                tt.zeros_like(tt.shape_padright(pa[..., 0])),
-                pa,
-                tt.ones_like(tt.shape_padright(pa[..., 0])),
-            ],
-            axis=-1,
-        )
+        p_cum = tt.concatenate([
+            tt.zeros_like(tt.shape_padright(pa[..., 0])),
+            pa,
+            tt.ones_like(tt.shape_padright(pa[..., 0]))
+        ], axis=-1)
         p = p_cum[..., 1:] - p_cum[..., :-1]
 
         super().__init__(p=p, *args, **kwargs)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 01783d35e5..b22ffd7ce4 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -30,7 +30,8 @@
 
 from pymc3.theanof import floatX
 from . import transforms
-from .distribution import Continuous, Discrete, draw_values, generate_samples, _DrawValuesContext
+from .distribution import (Continuous, Discrete, draw_values, generate_samples,
+                           _DrawValuesContext)
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
 from .special import gammaln, multigammaln
@@ -39,22 +40,15 @@
 from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker
 
 
-__all__ = [
-    "MvNormal",
-    "MvStudentT",
-    "Dirichlet",
-    "Multinomial",
-    "Wishart",
-    "WishartBartlett",
-    "LKJCorr",
-    "LKJCholeskyCov",
-    "MatrixNormal",
-    "KroneckerNormal",
-]
+__all__ = ['MvNormal', 'MvStudentT', 'Dirichlet',
+           'Multinomial', 'Wishart', 'WishartBartlett',
+           'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal',
+           'KroneckerNormal']
 
 
 class _QuadFormBase(Continuous):
-    def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **kwargs):
+    def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True,
+                 *args, **kwargs):
         super().__init__(*args, **kwargs)
         if len(self.shape) > 2:
             raise ValueError("Only 1 or 2 dimensions are allowed.")
@@ -62,40 +56,40 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **
         if chol is not None and not lower:
             chol = chol.T
         if len([i for i in [tau, cov, chol] if i is not None]) != 1:
-            raise ValueError(
-                "Incompatible parameterization. " "Specify exactly one of tau, cov, " "or chol."
-            )
+            raise ValueError('Incompatible parameterization. '
+                             'Specify exactly one of tau, cov, '
+                             'or chol.')
         self.mu = mu = tt.as_tensor_variable(mu)
         self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
         # Step methods and advi do not catch LinAlgErrors at the
         # moment. We work around that by using a cholesky op
         # that returns a nan as first entry instead of raising
         # an error.
-        cholesky = Cholesky(lower=True, on_error="nan")
+        cholesky = Cholesky(lower=True, on_error='nan')
 
         if cov is not None:
             self.k = cov.shape[0]
-            self._cov_type = "cov"
+            self._cov_type = 'cov'
             cov = tt.as_tensor_variable(cov)
             if cov.ndim != 2:
-                raise ValueError("cov must be two dimensional.")
+                raise ValueError('cov must be two dimensional.')
             self.chol_cov = cholesky(cov)
             self.cov = cov
             self._n = self.cov.shape[-1]
         elif tau is not None:
             self.k = tau.shape[0]
-            self._cov_type = "tau"
+            self._cov_type = 'tau'
             tau = tt.as_tensor_variable(tau)
             if tau.ndim != 2:
-                raise ValueError("tau must be two dimensional.")
+                raise ValueError('tau must be two dimensional.')
             self.chol_tau = cholesky(tau)
             self.tau = tau
             self._n = self.tau.shape[-1]
         else:
             self.k = chol.shape[0]
-            self._cov_type = "chol"
+            self._cov_type = 'chol'
             if chol.ndim != 2:
-                raise ValueError("chol must be two dimensional.")
+                raise ValueError('chol must be two dimensional.')
             self.chol_cov = tt.as_tensor_variable(chol)
             self._n = self.chol_cov.shape[-1]
 
@@ -103,7 +97,7 @@ def _quaddist(self, value):
         """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma."""
         mu = self.mu
         if value.ndim > 2 or value.ndim == 0:
-            raise ValueError("Invalid dimension for value: %s" % value.ndim)
+            raise ValueError('Invalid dimension for value: %s' % value.ndim)
         if value.ndim == 1:
             onedim = True
             value = value[None, :]
@@ -112,11 +106,11 @@ def _quaddist(self, value):
 
         delta = value - mu
 
-        if self._cov_type == "cov":
+        if self._cov_type == 'cov':
             # Use this when Theano#5908 is released.
             # return MvNormalLogp()(self.cov, delta)
             dist, logdet, ok = self._quaddist_cov(delta)
-        elif self._cov_type == "tau":
+        elif self._cov_type == 'tau':
             dist, logdet, ok = self._quaddist_tau(delta)
         else:
             dist, logdet, ok = self._quaddist_chol(delta)
@@ -228,7 +222,8 @@ class MvNormal(_QuadFormBase):
         vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T)
     """
 
-    def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs):
+    def __init__(self, mu, cov=None, tau=None, chol=None, lower=True,
+                 *args, **kwargs):
         super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs)
         self.mean = self.median = self.mode = self.mu = self.mu
 
@@ -258,35 +253,40 @@ def random(self, point=None, size=None):
                 except TypeError:
                     size = (size,)
 
-        if self._cov_type == "cov":
+        if self._cov_type == 'cov':
             mu, cov = draw_values([self.mu, self.cov], point=point, size=size)
             if mu.shape[-1] != cov.shape[-1]:
                 raise ValueError("Shapes for mu and cov don't match")
 
             try:
-                dist = stats.multivariate_normal(mean=mu, cov=cov, allow_singular=True)
+                dist = stats.multivariate_normal(
+                    mean=mu, cov=cov, allow_singular=True)
             except ValueError:
                 size += (mu.shape[-1],)
                 return np.nan * np.zeros(size)
             return dist.rvs(size)
-        elif self._cov_type == "chol":
-            mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size)
+        elif self._cov_type == 'chol':
+            mu, chol = draw_values([self.mu, self.chol_cov],
+                                   point=point, size=size)
             if size and mu.ndim == len(size) and mu.shape == size:
                 mu = mu[..., np.newaxis]
             if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1:
                 raise ValueError("Shapes for mu and chol don't match")
-            broadcast_shape = np.broadcast(np.empty(mu.shape[:-1]), np.empty(chol.shape[:-2])).shape
+            broadcast_shape = (
+                np.broadcast(np.empty(mu.shape[:-1]),
+                             np.empty(chol.shape[:-2])).shape
+            )
 
             mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],))
             chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:])
             # If mu and chol were fixed by the point, only the standard normal
             # should change
-            if mu.shape[: len(size)] != size:
+            if mu.shape[:len(size)] != size:
                 std_norm_shape = size + mu.shape
             else:
                 std_norm_shape = mu.shape
             standard_normal = np.random.standard_normal(std_norm_shape)
-            return mu + np.einsum("...ij,...j->...i", chol, standard_normal)
+            return mu + np.einsum('...ij,...j->...i', chol, standard_normal)
         else:
             mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
             if mu.shape[-1] != tau[0].shape[-1]:
@@ -299,7 +299,8 @@ def random(self, point=None, size=None):
                 return np.nan * np.zeros(size)
 
             standard_normal = np.random.standard_normal(size)
-            transformed = linalg.solve_triangular(chol, standard_normal.T, lower=True)
+            transformed = linalg.solve_triangular(
+                chol, standard_normal.T, lower=True)
             return mu + transformed.T
 
     def logp(self, value):
@@ -318,7 +319,7 @@ def logp(self, value):
         """
         quaddist, logdet, ok = self._quaddist(value)
         k = floatX(value.shape[-1])
-        norm = -0.5 * k * pm.floatX(np.log(2 * np.pi))
+        norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi))
         return bound(norm - 0.5 * quaddist - logdet, ok)
 
     def _distr_parameters_for_repr(self):
@@ -366,12 +367,11 @@ class MvStudentT(_QuadFormBase):
         Whether the cholesky fatcor is given as a lower triangular matrix.
     """
 
-    def __init__(
-        self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, lower=True, *args, **kwargs
-    ):
+    def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None,
+                 lower=True, *args, **kwargs):
         if Sigma is not None:
             if cov is not None:
-                raise ValueError("Specify only one of cov and Sigma")
+                raise ValueError('Specify only one of cov and Sigma')
             cov = Sigma
         super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs)
         self.nu = nu = tt.as_tensor_variable(nu)
@@ -396,14 +396,14 @@ def random(self, point=None, size=None):
         """
         with _DrawValuesContext():
             nu, mu = draw_values([self.nu, self.mu], point=point, size=size)
-            if self._cov_type == "cov":
-                (cov,) = draw_values([self.cov], point=point, size=size)
+            if self._cov_type == 'cov':
+                cov, = draw_values([self.cov], point=point, size=size)
                 dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov)
-            elif self._cov_type == "tau":
-                (tau,) = draw_values([self.tau], point=point, size=size)
+            elif self._cov_type == 'tau':
+                tau, = draw_values([self.tau], point=point, size=size)
                 dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau)
             else:
-                (chol,) = draw_values([self.chol_cov], point=point, size=size)
+                chol, = draw_values([self.chol_cov], point=point, size=size)
                 dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol)
 
             samples = dist.random(point, size)
@@ -428,12 +428,10 @@ def logp(self, value):
         quaddist, logdet, ok = self._quaddist(value)
         k = floatX(value.shape[-1])
 
-        norm = (
-            gammaln((self.nu + k) / 2.0)
-            - gammaln(self.nu / 2.0)
-            - 0.5 * k * floatX(np.log(self.nu * np.pi))
-        )
-        inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu)
+        norm = (gammaln((self.nu + k) / 2.)
+                - gammaln(self.nu / 2.)
+                - 0.5 * k * floatX(np.log(self.nu * np.pi)))
+        inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu)
         return bound(norm + inner - logdet, ok)
 
     def _distr_parameters_for_repr(self):
@@ -464,19 +462,20 @@ class Dirichlet(Continuous):
         Concentration parameters (a > 0).
     """
 
-    def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs):
+    def __init__(self, a, transform=transforms.stick_breaking,
+                 *args, **kwargs):
 
-        if kwargs.get("shape") is None:
+        if kwargs.get('shape') is None:
             warnings.warn(
                 (
                     "Shape not explicitly set. "
                     "Please, set the value using the `shape` keyword argument. "
                     "Using the test value to infer the shape."
                 ),
-                DeprecationWarning,
+                DeprecationWarning
             )
             try:
-                kwargs["shape"] = np.shape(get_test_value(a))
+                kwargs['shape'] = np.shape(get_test_value(a))
             except AttributeError:
                 pass
 
@@ -486,13 +485,15 @@ def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs):
         self.a = a = tt.as_tensor_variable(a)
         self.mean = a / tt.sum(a)
 
-        self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan)
+        self.mode = tt.switch(tt.all(a > 1),
+                              (a - 1) / tt.sum(a - 1),
+                              np.nan)
 
     def _random(self, a, size=None):
         gen = stats.dirichlet.rvs
         shape = tuple(np.atleast_1d(self.shape))
-        if size[-len(shape) :] == shape:
-            real_size = size[: -len(shape)]
+        if size[-len(shape):] == shape:
+            real_size = size[:-len(shape)]
         else:
             real_size = size
         if self.size_prefix:
@@ -527,7 +528,10 @@ def random(self, point=None, size=None):
         array
         """
         a = draw_values([self.a], point=point, size=size)[0]
-        samples = generate_samples(self._random, a=a, dist_shape=self.shape, size=size)
+        samples = generate_samples(self._random,
+                                   a=a,
+                                   dist_shape=self.shape,
+                                   size=size)
         return samples
 
     def logp(self, value):
@@ -547,14 +551,11 @@ def logp(self, value):
         a = self.a
 
         # only defined for sum(value) == 1
-        return bound(
-            tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)),
-            tt.all(value >= 0),
-            tt.all(value <= 1),
-            np.logical_not(a.broadcastable),
-            tt.all(a > 0),
-            broadcast_conditions=False,
-        )
+        return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1)
+                     + gammaln(tt.sum(a, axis=-1)),
+                     tt.all(value >= 0), tt.all(value <= 1),
+                     np.logical_not(a.broadcastable), tt.all(a > 0),
+                     broadcast_conditions=False)
 
     def _distr_parameters_for_repr(self):
         return ["a"]
@@ -597,7 +598,7 @@ def __init__(self, n, p, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         p = p / tt.sum(p, axis=-1, keepdims=True)
-        n = np.squeeze(n)  # works also if n is a tensor
+        n = np.squeeze(n) # works also if n is a tensor
 
         if len(self.shape) > 1:
             self.n = tt.shape_padright(n)
@@ -611,16 +612,17 @@ def __init__(self, n, p, *args, **kwargs):
             self.p = tt.as_tensor_variable(p)
 
         self.mean = self.n * self.p
-        mode = tt.cast(tt.round(self.mean), "int32")
+        mode = tt.cast(tt.round(self.mean), 'int32')
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
-        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
+        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
+                                diff[inc_bool_arr.nonzero()])
         self.mode = mode
 
     def _random(self, n, p, size=None, raw_size=None):
         original_dtype = p.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
-        p = p.astype("float64")
+        p = p.astype('float64')
         # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
         p /= np.sum(p, axis=-1, keepdims=True)
 
@@ -633,19 +635,23 @@ def _random(self, n, p, size=None, raw_size=None):
         # np.random.multinomial needs `n` to be a scalar int and `p` a
         # sequence so we semi flatten them and iterate over them
         size_ = to_tuple(raw_size)
-        if p.ndim > len(size_) and p.shape[: len(size_)] == size_:
+        if p.ndim > len(size_) and p.shape[:len(size_)] == size_:
             # p and n have the size_ prepend so we don't need it in np.random
             n_ = n.reshape([-1])
             p_ = p.reshape([-1, p.shape[-1]])
-            samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
+            samples = np.array([
+                np.random.multinomial(nn, pp)
+                for nn, pp in zip(n_, p_)
+            ])
             samples = samples.reshape(p.shape)
         else:
             # p and n don't have the size prepend
             n_ = n.reshape([-1])
             p_ = p.reshape([-1, p.shape[-1]])
-            samples = np.array(
-                [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)]
-            )
+            samples = np.array([
+                np.random.multinomial(nn, pp, size=size_)
+                for nn, pp in zip(n_, p_)
+            ])
             samples = np.moveaxis(samples, 0, -1)
             samples = samples.reshape(size + p.shape)
         # We cast back to the original dtype
@@ -669,14 +675,10 @@ def random(self, point=None, size=None):
         array
         """
         n, p = draw_values([self.n, self.p], point=point, size=size)
-        samples = generate_samples(
-            self._random,
-            n,
-            p,
-            dist_shape=self.shape,
-            not_broadcast_kwargs={"raw_size": size},
-            size=size,
-        )
+        samples = generate_samples(self._random, n, p,
+                                   dist_shape=self.shape,
+                                   not_broadcast_kwargs={'raw_size': size},
+                                   size=size)
         return samples
 
     def logp(self, x):
@@ -703,7 +705,7 @@ def logp(self, x):
             tt.all(p <= 1),
             tt.all(tt.eq(tt.sum(p, axis=-1), 1)),
             tt.all(tt.ge(n, 0)),
-            broadcast_conditions=False,
+            broadcast_conditions=False
         )
 
 
@@ -729,7 +731,7 @@ class PosDefMatrix(theano.Op):
     def make_node(self, x):
         x = tt.as_tensor_variable(x)
         assert x.ndim == 2
-        o = tt.TensorType(dtype="int8", broadcastable=[])()
+        o = tt.TensorType(dtype='int8', broadcastable=[])()
         return theano.Apply(self, [x], [o])
 
     # Python implementation:
@@ -738,22 +740,21 @@ def perform(self, node, inputs, outputs):
         (x,) = inputs
         (z,) = outputs
         try:
-            z[0] = np.array(posdef(x), dtype="int8")
+            z[0] = np.array(posdef(x), dtype='int8')
         except Exception:
-            pm._log.exception("Failed to check if %s positive definite", x)
+            pm._log.exception('Failed to check if %s positive definite', x)
             raise
 
     def infer_shape(self, node, shapes):
         return [[]]
 
     def grad(self, inp, grads):
-        (x,) = inp
+        x, = inp
         return [x.zeros_like(theano.config.floatX)]
 
     def __str__(self):
         return "MatrixIsPositiveDefinite"
 
-
 matrix_pos_def = PosDefMatrix()
 
 
@@ -796,20 +797,20 @@ class Wishart(Continuous):
 
     def __init__(self, nu, V, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        warnings.warn(
-            "The Wishart distribution can currently not be used "
-            "for MCMC sampling. The probability of sampling a "
-            "symmetric matrix is basically zero. Instead, please "
-            "use LKJCholeskyCov or LKJCorr. For more information "
-            "on the issues surrounding the Wishart see here: "
-            "https://github.com/pymc-devs/pymc3/issues/538.",
-            UserWarning,
-        )
+        warnings.warn('The Wishart distribution can currently not be used '
+                      'for MCMC sampling. The probability of sampling a '
+                      'symmetric matrix is basically zero. Instead, please '
+                      'use LKJCholeskyCov or LKJCorr. For more information '
+                      'on the issues surrounding the Wishart see here: '
+                      'https://github.com/pymc-devs/pymc3/issues/538.',
+                      UserWarning)
         self.nu = nu = tt.as_tensor_variable(nu)
         self.p = p = tt.as_tensor_variable(V.shape[0])
         self.V = V = tt.as_tensor_variable(V)
         self.mean = nu * V
-        self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan)
+        self.mode = tt.switch(tt.ge(nu, p + 1),
+                              (nu - p - 1) * V,
+                              np.nan)
 
     def random(self, point=None, size=None):
         """
@@ -829,8 +830,9 @@ def random(self, point=None, size=None):
         array
         """
         nu, V = draw_values([self.nu, self.V], point=point, size=size)
-        size = 1 if size is None else size
-        return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, broadcast_shape=(size,))
+        size= 1 if size is None else size
+        return generate_samples(stats.wishart.rvs, np.asscalar(nu), V,
+                                    broadcast_shape=(size,))
 
     def logp(self, X):
         """
@@ -853,19 +855,14 @@ def logp(self, X):
         IVI = det(V)
         IXI = det(X)
 
-        return bound(
-            (
-                (nu - p - 1) * tt.log(IXI)
-                - trace(matrix_inverse(V).dot(X))
-                - nu * p * tt.log(2)
-                - nu * tt.log(IVI)
-                - 2 * multigammaln(nu / 2.0, p)
-            )
-            / 2,
-            matrix_pos_def(X),
-            tt.eq(X, X.T),
-            nu > (p - 1),
-            broadcast_conditions=False,
+        return bound(((nu - p - 1) * tt.log(IXI)
+                      - trace(matrix_inverse(V).dot(X))
+                      - nu * p * tt.log(2) - nu * tt.log(IVI)
+                      - 2 * multigammaln(nu / 2., p)) / 2,
+                     matrix_pos_def(X),
+                     tt.eq(X, X.T),
+                     nu > (p - 1),
+                     broadcast_conditions=False
         )
 
 
@@ -927,18 +924,17 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
         # Inverse transform
         testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T))
         testval = linalg.cholesky(testval, lower=True)
-        diag_testval = testval[diag_idx] ** 2
+        diag_testval = testval[diag_idx]**2
         tril_testval = testval[tril_idx]
     else:
         diag_testval = None
         tril_testval = None
 
-    c = tt.sqrt(
-        ChiSquared("%s_c" % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval)
-    )
-    pm._log.info("Added new variable %s_c to model diagonal of Wishart." % name)
-    z = Normal("%s_z" % name, 0.0, 1.0, shape=n_tril, testval=tril_testval)
-    pm._log.info("Added new variable %s_z to model off-diagonals of Wishart." % name)
+    c = tt.sqrt(ChiSquared('%s_c' % name, nu - np.arange(2, 2 + n_diag), shape=n_diag,
+                           testval=diag_testval))
+    pm._log.info('Added new variable %s_c to model diagonal of Wishart.' % name)
+    z = Normal('%s_z' % name, 0., 1., shape=n_tril, testval=tril_testval)
+    pm._log.info('Added new variable %s_z to model off-diagonals of Wishart.' % name)
     # Construct A matrix
     A = tt.zeros(S.shape, dtype=np.float32)
     A = tt.set_subtensor(A[diag_idx], c)
@@ -953,24 +949,20 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
 
 def _lkj_normalizing_constant(eta, n):
     if eta == 1:
-        result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum()
+        result = gammaln(2. * tt.arange(1, int((n - 1) / 2) + 1)).sum()
         if n % 2 == 1:
-            result += (
-                0.25 * (n ** 2 - 1) * tt.log(np.pi)
-                - 0.25 * (n - 1) ** 2 * tt.log(2.0)
-                - (n - 1) * gammaln(int((n + 1) / 2))
-            )
+            result += (0.25 * (n ** 2 - 1) * tt.log(np.pi)
+                       - 0.25 * (n - 1) ** 2 * tt.log(2.)
+                       - (n - 1) * gammaln(int((n + 1) / 2)))
         else:
-            result += (
-                0.25 * n * (n - 2) * tt.log(np.pi)
-                + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0)
-                + n * gammaln(n / 2)
-                - (n - 1) * gammaln(n)
-            )
+            result += (0.25 * n * (n - 2) * tt.log(np.pi)
+                       + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.)
+                       + n * gammaln(n / 2) - (n - 1) * gammaln(n))
     else:
         result = -(n - 1) * gammaln(eta + 0.5 * (n - 1))
         k = tt.arange(1, n)
-        result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum()
+        result += (0.5 * k * tt.log(np.pi)
+                   + gammaln(eta + 0.5 * (n - 1 - k))).sum()
     return result
 
 
@@ -978,25 +970,24 @@ class _LKJCholeskyCov(Continuous):
     R"""Underlying class for covariance matrix with LKJ distributed correlations.
     See docs for LKJCholeskyCov function for more details on how to use it in models.
     """
-
     def __init__(self, eta, n, sd_dist, *args, **kwargs):
         self.n = n
         self.eta = eta
 
-        if "transform" in kwargs and kwargs["transform"] is not None:
-            raise ValueError("Invalid parameter: transform.")
-        if "shape" in kwargs:
-            raise ValueError("Invalid parameter: shape.")
+        if 'transform' in kwargs and kwargs['transform'] is not None:
+            raise ValueError('Invalid parameter: transform.')
+        if 'shape' in kwargs:
+            raise ValueError('Invalid parameter: shape.')
 
         shape = n * (n + 1) // 2
 
         if sd_dist.shape.ndim not in [0, 1]:
-            raise ValueError("Invalid shape for sd_dist.")
+            raise ValueError('Invalid shape for sd_dist.')
 
         transform = transforms.CholeskyCovPacked(n)
 
-        kwargs["shape"] = shape
-        kwargs["transform"] = transform
+        kwargs['shape'] = shape
+        kwargs['transform'] = transform
         super().__init__(*args, **kwargs)
 
         self.sd_dist = sd_dist
@@ -1026,7 +1017,9 @@ def logp(self, x):
         cumsum = tt.cumsum(x ** 2)
         variance = tt.zeros(n)
         variance = tt.inc_subtensor(variance[0], x[0] ** 2)
-        variance = tt.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
+        variance = tt.inc_subtensor(
+            variance[1:],
+            cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
         sd_vals = tt.sqrt(variance)
 
         logp_sd = self.sd_dist.logp(sd_vals).sum()
@@ -1050,40 +1043,40 @@ def _random(self, n, eta, size=1):
         P = np.eye(n) * np.ones(eta_sample_shape + (n, n))
         # original implementation in R see:
         # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
-        beta = eta - 1.0 + n / 2.0
-        r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.0
+        beta = eta - 1. + n/2.
+        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.
         P[..., 0, 1] = r12
-        P[..., 1, 1] = np.sqrt(1.0 - r12 ** 2)
+        P[..., 1, 1] = np.sqrt(1. - r12**2)
         for mp1 in range(2, n):
             beta -= 0.5
-            y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=eta_sample_shape)
+            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape)
             z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,))
-            z = z / np.sqrt(np.einsum("ij,ij->j", z, z))
+            z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
             P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z
-            P[..., mp1, mp1] = np.sqrt(1.0 - y)
-        C = np.einsum("...ji,...jk->...ik", P, P)
+            P[..., mp1, mp1] = np.sqrt(1. - y)
+        C = np.einsum('...ji,...jk->...ik', P, P)
         D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2]))
         if D.shape in [tuple(), (1,)]:
             D = self.sd_dist.random(size=P.shape[:-1])
         elif D.ndim < C.ndim - 1:
-            D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)]
+            D = [D] + [self.sd_dist.random(size=P.shape[:-2])
+                       for _ in range(n - 1)]
             D = np.moveaxis(np.array(D), 0, C.ndim - 2)
         elif D.ndim == C.ndim - 1:
             if D.shape[-1] == 1:
-                D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)]
+                D = [D] + [self.sd_dist.random(size=P.shape[:-2])
+                           for _ in range(n - 1)]
                 D = np.concatenate(D, axis=-1)
             elif D.shape[-1] != n:
-                raise ValueError(
-                    "The size of the samples drawn from the "
-                    "supplied sd_dist.random have the wrong "
-                    "size. Expected {} but got {} instead.".format(n, D.shape[-1])
-                )
+                raise ValueError('The size of the samples drawn from the '
+                                 'supplied sd_dist.random have the wrong '
+                                 'size. Expected {} but got {} instead.'.
+                                 format(n, D.shape[-1]))
         else:
-            raise ValueError(
-                "Supplied sd_dist.random generates samples with "
-                "too many dimensions. It must yield samples "
-                "with 0 or 1 dimensions. Got {} instead".format(D.ndim - C.ndim - 2)
-            )
+            raise ValueError('Supplied sd_dist.random generates samples with '
+                             'too many dimensions. It must yield samples '
+                             'with 0 or 1 dimensions. Got {} instead'.
+                             format(D.ndim - C.ndim - 2))
         C *= D[..., :, np.newaxis] * D[..., np.newaxis, :]
         tril_idx = np.tril_indices(n, k=0)
         return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]]
@@ -1112,7 +1105,7 @@ def random(self, point=None, size=None):
         # We can only handle cov matrices with a constant n per random call
         n = np.unique(n)
         if len(n) > 1:
-            raise RuntimeError("Varying n is not supported for LKJCholeskyCov")
+            raise RuntimeError('Varying n is not supported for LKJCholeskyCov')
         n = int(n[0])
         dist_shape = ((n * (n + 1)) // 2,)
         # We make sure that eta and the drawn n get their shapes broadcasted
@@ -1129,10 +1122,10 @@ def random(self, point=None, size=None):
                 size = None
             elif size == broadcast_shape:
                 size = None
-            elif size[-len(sample_shape) :] == sample_shape:
-                size = size[: len(size) - len(sample_shape)]
-            elif size[-len(broadcast_shape) :] == broadcast_shape:
-                size = size[: len(size) - len(broadcast_shape)]
+            elif size[-len(sample_shape):] == sample_shape:
+                size = size[:len(size) - len(sample_shape)]
+            elif size[-len(broadcast_shape):] == broadcast_shape:
+                size = size[:len(size) - len(broadcast_shape)]
         # We will always provide _random with an integer size and then reshape
         # the output to get the correct size
         if size is not None:
@@ -1147,7 +1140,9 @@ def random(self, point=None, size=None):
         return samples
 
 
-def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs):
+def LKJCholeskyCov(
+        name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs
+):
     R"""Wrapper function for covariance matrix with LKJ distributed correlations.
 
     This defines a distribution over Cholesky decomposed covariance
@@ -1342,14 +1337,12 @@ class LKJCorr(Continuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwargs):
+    def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs):
         if (p is not None) and (n is not None) and (eta is None):
-            warnings.warn(
-                "Parameters to LKJCorr have changed: shape parameter n -> eta "
-                "dimension parameter p -> n. Please update your code. "
-                "Automatically re-assigning parameters for backwards compatibility.",
-                DeprecationWarning,
-            )
+            warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta '
+                          'dimension parameter p -> n. Please update your code. '
+                          'Automatically re-assigning parameters for backwards compatibility.',
+                          DeprecationWarning)
             self.n = p
             self.eta = n
             eta = self.eta
@@ -1358,24 +1351,20 @@ def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwar
             self.n = n
             self.eta = eta
         else:
-            raise ValueError(
-                "Invalid parameter: please use eta as the shape parameter and "
-                "n as the dimension parameter."
-            )
+            raise ValueError('Invalid parameter: please use eta as the shape parameter and '
+                             'n as the dimension parameter.')
 
         shape = n * (n - 1) // 2
         self.mean = floatX(np.zeros(shape))
 
-        if transform == "interval":
+        if transform == 'interval':
             transform = transforms.interval(-1, 1)
 
         super().__init__(shape=shape, transform=transform, *args, **kwargs)
-        warnings.warn(
-            "Parameters in LKJCorr have been rename: shape parameter n -> eta "
-            "dimension parameter p -> n. Please double check your initialization.",
-            DeprecationWarning,
-        )
-        self.tri_index = np.zeros([n, n], dtype="int32")
+        warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta '
+                      'dimension parameter p -> n. Please double check your initialization.',
+                      DeprecationWarning)
+        self.tri_index = np.zeros([n, n], dtype='int32')
         self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape)
         self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape)
 
@@ -1383,19 +1372,19 @@ def _random(self, n, eta, size=None):
         size = size if isinstance(size, tuple) else (size,)
         # original implementation in R see:
         # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
-        beta = eta - 1.0 + n / 2.0
-        r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=size) - 1.0
+        beta = eta - 1. + n/2.
+        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=size) - 1.
         P = np.eye(n)[:, :, np.newaxis] * np.ones(size)
         P[0, 1] = r12
-        P[1, 1] = np.sqrt(1.0 - r12 ** 2)
+        P[1, 1] = np.sqrt(1. - r12**2)
         for mp1 in range(2, n):
             beta -= 0.5
-            y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=size)
-            z = stats.norm.rvs(loc=0, scale=1, size=(mp1,) + size)
-            z = z / np.sqrt(np.einsum("ij,ij->j", z, z))
+            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=size)
+            z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + size)
+            z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
             P[0:mp1, mp1] = np.sqrt(y) * z
-            P[mp1, mp1] = np.sqrt(1.0 - y)
-        C = np.einsum("ji...,jk...->...ik", P, P)
+            P[mp1, mp1] = np.sqrt(1. - y)
+        C = np.einsum('ji...,jk...->...ik', P, P)
         triu_idx = np.triu_indices(n, k=1)
         return C[..., triu_idx[0], triu_idx[1]]
 
@@ -1417,8 +1406,9 @@ def random(self, point=None, size=None):
         array
         """
         n, eta = draw_values([self.n, self.eta], point=point, size=size)
-        size = 1 if size is None else size
-        samples = generate_samples(self._random, n, eta, broadcast_shape=(size,))
+        size= 1 if size is None else size
+        samples = generate_samples(self._random, n, eta,
+                                   broadcast_shape=(size,))
         return samples
 
     def logp(self, x):
@@ -1442,14 +1432,12 @@ def logp(self, x):
         X = tt.fill_diagonal(X, 1)
 
         result = _lkj_normalizing_constant(eta, n)
-        result += (eta - 1.0) * tt.log(det(X))
-        return bound(
-            result,
-            tt.all(X <= 1),
-            tt.all(X >= -1),
-            matrix_pos_def(X),
-            eta > 0,
-            broadcast_conditions=False,
+        result += (eta - 1.) * tt.log(det(X))
+        return bound(result,
+                     tt.all(X <= 1), tt.all(X >= -1),
+                     matrix_pos_def(X),
+                     eta > 0,
+                     broadcast_conditions=False
         )
 
 
@@ -1542,22 +1530,12 @@ class MatrixNormal(Continuous):
                                    observed=data, shape=(m, n))
     """
 
-    def __init__(
-        self,
-        mu=0,
-        rowcov=None,
-        rowchol=None,
-        rowtau=None,
-        colcov=None,
-        colchol=None,
-        coltau=None,
-        shape=None,
-        *args,
-        **kwargs,
-    ):
+    def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None,
+                 colcov=None, colchol=None, coltau=None, shape=None, *args,
+                 **kwargs):
         self._setup_matrices(colcov, colchol, coltau, rowcov, rowchol, rowtau)
         if shape is None:
-            raise TypeError("shape is a required argument")
+            raise TypeError('shape is a required argument')
         assert len(shape) == 2, "shape must have length 2: mxn"
         self.shape = shape
         super().__init__(shape=shape, *args, **kwargs)
@@ -1567,68 +1545,64 @@ def __init__(
         self.solve_upper = tt.slinalg.solve_upper_triangular
 
     def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
-        cholesky = Cholesky(lower=True, on_error="raise")
+        cholesky = Cholesky(lower=True, on_error='raise')
 
         # Among-row matrices
         if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1:
-            raise ValueError(
-                "Incompatible parameterization. "
-                "Specify exactly one of rowtau, rowcov, "
-                "or rowchol."
-            )
+            raise ValueError('Incompatible parameterization. '
+                             'Specify exactly one of rowtau, rowcov, '
+                             'or rowchol.')
         if rowcov is not None:
             self.m = rowcov.shape[0]
-            self._rowcov_type = "cov"
+            self._rowcov_type = 'cov'
             rowcov = tt.as_tensor_variable(rowcov)
             if rowcov.ndim != 2:
-                raise ValueError("rowcov must be two dimensional.")
+                raise ValueError('rowcov must be two dimensional.')
             self.rowchol_cov = cholesky(rowcov)
             self.rowcov = rowcov
         elif rowtau is not None:
-            raise ValueError("rowtau not supported at this time")
+            raise ValueError('rowtau not supported at this time')
             self.m = rowtau.shape[0]
-            self._rowcov_type = "tau"
+            self._rowcov_type = 'tau'
             rowtau = tt.as_tensor_variable(rowtau)
             if rowtau.ndim != 2:
-                raise ValueError("rowtau must be two dimensional.")
+                raise ValueError('rowtau must be two dimensional.')
             self.rowchol_tau = cholesky(rowtau)
             self.rowtau = rowtau
         else:
             self.m = rowchol.shape[0]
-            self._rowcov_type = "chol"
+            self._rowcov_type = 'chol'
             if rowchol.ndim != 2:
-                raise ValueError("rowchol must be two dimensional.")
+                raise ValueError('rowchol must be two dimensional.')
             self.rowchol_cov = tt.as_tensor_variable(rowchol)
 
         # Among-column matrices
         if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1:
-            raise ValueError(
-                "Incompatible parameterization. "
-                "Specify exactly one of coltau, colcov, "
-                "or colchol."
-            )
+            raise ValueError('Incompatible parameterization. '
+                             'Specify exactly one of coltau, colcov, '
+                             'or colchol.')
         if colcov is not None:
             self.n = colcov.shape[0]
-            self._colcov_type = "cov"
+            self._colcov_type = 'cov'
             colcov = tt.as_tensor_variable(colcov)
             if colcov.ndim != 2:
-                raise ValueError("colcov must be two dimensional.")
+                raise ValueError('colcov must be two dimensional.')
             self.colchol_cov = cholesky(colcov)
             self.colcov = colcov
         elif coltau is not None:
-            raise ValueError("coltau not supported at this time")
+            raise ValueError('coltau not supported at this time')
             self.n = coltau.shape[0]
-            self._colcov_type = "tau"
+            self._colcov_type = 'tau'
             coltau = tt.as_tensor_variable(coltau)
             if coltau.ndim != 2:
-                raise ValueError("coltau must be two dimensional.")
+                raise ValueError('coltau must be two dimensional.')
             self.colchol_tau = cholesky(coltau)
             self.coltau = coltau
         else:
             self.n = colchol.shape[0]
-            self._colcov_type = "chol"
+            self._colcov_type = 'chol'
             if colchol.ndim != 2:
-                raise ValueError("colchol must be two dimensional.")
+                raise ValueError('colchol must be two dimensional.')
             self.colchol_cov = tt.as_tensor_variable(colchol)
 
     def random(self, point=None, size=None):
@@ -1649,8 +1623,9 @@ def random(self, point=None, size=None):
         array
         """
         mu, colchol, rowchol = draw_values(
-            [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size
-        )
+                                [self.mu, self.colchol_cov, self.rowchol_cov],
+                                point=point,
+                                size=size)
         if size is None:
             size = ()
         if size in (None, ()):
@@ -1665,15 +1640,14 @@ def random(self, point=None, size=None):
                     samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)))
             else:
                 for j in range(np.prod(size)):
-                    standard_normal = np.random.standard_normal(
-                        (self.shape[0], colchol[j].shape[-1])
-                    )
-                    samples.append(
-                        mu[j] + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T))
-                    )
+                    standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1]))
+                    samples.append(mu[j] +
+                                np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)))
             samples = np.array(samples).reshape(size + tuple(self.shape))
         return samples
 
+
+
     def _trquaddist(self, value):
         """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and
         the logdet of colcov and rowcov."""
@@ -1712,8 +1686,8 @@ def logp(self, value):
         trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value)
         m = self.m
         n = self.n
-        norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi))
-        return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet
+        norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi))
+        return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet
 
 
 class KroneckerNormal(Continuous):
@@ -1805,23 +1779,24 @@ class KroneckerNormal(Continuous):
     .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models"
     """
 
-    def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs):
+    def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None,
+                 *args, **kwargs):
         self._setup(covs, chols, evds, sigma)
         super().__init__(*args, **kwargs)
         self.mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu
 
     def _setup(self, covs, chols, evds, sigma):
-        self.cholesky = Cholesky(lower=True, on_error="raise")
+        self.cholesky = Cholesky(lower=True, on_error='raise')
         if len([i for i in [covs, chols, evds] if i is not None]) != 1:
-            raise ValueError(
-                "Incompatible parameterization. " "Specify exactly one of covs, chols, " "or evds."
-            )
+            raise ValueError('Incompatible parameterization. '
+                             'Specify exactly one of covs, chols, '
+                             'or evds.')
         self._isEVD = False
         self.sigma = sigma
         self.is_noisy = self.sigma is not None and self.sigma != 0
         if covs is not None:
-            self._cov_type = "cov"
+            self._cov_type = 'cov'
             self.covs = covs
             if self.is_noisy:
                 # Noise requires eigendecomposition
@@ -1831,10 +1806,11 @@ def _setup(self, covs, chols, evds, sigma):
                 # Otherwise use cholesky as usual
                 self.chols = list(map(self.cholesky, self.covs))
                 self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
-                self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols])
+                self.sizes = tt.as_tensor_variable(
+                                [chol.shape[0] for chol in self.chols])
                 self.N = tt.prod(self.sizes)
         elif chols is not None:
-            self._cov_type = "chol"
+            self._cov_type = 'chol'
             if self.is_noisy:  # A strange case...
                 # Noise requires eigendecomposition
                 covs = [tt.dot(chol, chol.T) for chol in chols]
@@ -1843,10 +1819,11 @@ def _setup(self, covs, chols, evds, sigma):
             else:
                 self.chols = chols
                 self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
-                self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols])
+                self.sizes = tt.as_tensor_variable(
+                                [chol.shape[0] for chol in self.chols])
                 self.N = tt.prod(self.sizes)
         else:
-            self._cov_type = "evd"
+            self._cov_type = 'evd'
             self._setup_evd(evds)
 
     def _setup_evd(self, eigh_iterable):
@@ -1858,18 +1835,18 @@ def _setup_evd(self, eigh_iterable):
         self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep))
         self.eigs = kron_diag(*self.eigs_sep)  # Combine separate eigs
         if self.is_noisy:
-            self.eigs += self.sigma ** 2
+            self.eigs += self.sigma**2
         self.N = self.eigs.shape[0]
 
     def _setup_random(self):
-        if not hasattr(self, "mv_params"):
-            self.mv_params = {"mu": self.mu}
-            if self._cov_type == "cov":
+        if not hasattr(self, 'mv_params'):
+            self.mv_params = {'mu': self.mu}
+            if self._cov_type == 'cov':
                 cov = kronecker(*self.covs)
                 if self.is_noisy:
-                    cov = cov + self.sigma ** 2 * tt.identity_like(cov)
-                self.mv_params["cov"] = cov
-            elif self._cov_type == "chol":
+                    cov = cov + self.sigma**2 * tt.identity_like(cov)
+                self.mv_params['cov'] = cov
+            elif self._cov_type == 'chol':
                 if self.is_noisy:
                     covs = []
                     for eig, Q in zip(self.eigs_sep, self.Qs):
@@ -1877,19 +1854,19 @@ def _setup_random(self):
                         covs.append(cov_i)
                     cov = kronecker(*covs)
                     if self.is_noisy:
-                        cov = cov + self.sigma ** 2 * tt.identity_like(cov)
-                    self.mv_params["chol"] = self.cholesky(cov)
+                        cov = cov + self.sigma**2 * tt.identity_like(cov)
+                    self.mv_params['chol'] = self.cholesky(cov)
                 else:
-                    self.mv_params["chol"] = kronecker(*self.chols)
-            elif self._cov_type == "evd":
+                    self.mv_params['chol'] = kronecker(*self.chols)
+            elif self._cov_type == 'evd':
                 covs = []
                 for eig, Q in zip(self.eigs_sep, self.Qs):
                     cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
                     covs.append(cov_i)
                 cov = kronecker(*covs)
                 if self.is_noisy:
-                    cov = cov + self.sigma ** 2 * tt.identity_like(cov)
-                self.mv_params["cov"] = cov
+                    cov = cov + self.sigma**2 * tt.identity_like(cov)
+                self.mv_params['cov'] = cov
 
     def random(self, point=None, size=None):
         """
@@ -1917,7 +1894,7 @@ def random(self, point=None, size=None):
     def _quaddist(self, value):
         """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))"""
         if value.ndim > 2 or value.ndim == 0:
-            raise ValueError("Invalid dimension for value: %s" % value.ndim)
+            raise ValueError('Invalid dimension for value: %s' % value.ndim)
         if value.ndim == 1:
             onedim = True
             value = value[None, :]
@@ -1927,14 +1904,14 @@ def _quaddist(self, value):
         delta = value - self.mu
         if self._isEVD:
             sqrt_quad = kron_dot(self.QTs, delta.T)
-            sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None])
+            sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None])
             logdet = tt.sum(tt.log(self.eigs))
         else:
             sqrt_quad = kron_solve_lower(self.chols, delta.T)
             logdet = 0
             for chol_size, chol_diag in zip(self.sizes, self.chol_diags):
-                logchol = tt.log(chol_diag) * self.N / chol_size
-                logdet += tt.sum(2 * logchol)
+                logchol = tt.log(chol_diag) * self.N/chol_size
+                logdet += tt.sum(2*logchol)
         # Square each sample
         quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T)
         if onedim:
@@ -1956,4 +1933,4 @@ def logp(self, value):
         TensorVariable
         """
         quad, logdet = self._quaddist(value)
-        return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0
+        return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0

From 66f4885cb922be078783b5336ae987a28bef8c6f Mon Sep 17 00:00:00 2001
From: Ali Septiandri <ali.septiandri@revolut.com>
Date: Mon, 28 Sep 2020 11:41:58 +0100
Subject: [PATCH 7/7] Fix error in quadpotential.py

---
 pymc3/step_methods/hmc/quadpotential.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index 8750fd6484..c55f12a365 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -684,10 +684,3 @@ def random(self):
         def energy(self, x):
             """Compute kinetic energy at a position in parameter space."""
             return 0.5 * x.T.dot(self.velocity(x))
-            n = self.factor.solve_Lt(n)
-            n = self.factor.apply_Pt(n)
-            return n
-
-        def energy(self, x):
-            """Compute kinetic energy at a position in parameter space."""
-            return 0.5 * x.T.dot(self.velocity(x))