From d9b67bb300c03fd96d7e9fb42e6c7a239fc77e15 Mon Sep 17 00:00:00 2001 From: Ali Akbar Septiandri Date: Sun, 27 Sep 2020 12:09:19 +0100 Subject: [PATCH 1/7] Run black on distributions/ and */__init__.py --- pymc3/distributions/__init__.py | 151 ++++---- pymc3/distributions/bound.py | 41 +-- pymc3/distributions/continuous.py | 12 +- pymc3/distributions/discrete.py | 251 ++++++------- pymc3/distributions/multivariate.py | 543 +++++++++++++++------------- pymc3/distributions/shape_utils.py | 12 +- pymc3/distributions/simulator.py | 1 - pymc3/distributions/special.py | 38 +- pymc3/distributions/timeseries.py | 44 +-- pymc3/distributions/transforms.py | 7 +- pymc3/plots/__init__.py | 86 +++-- pymc3/stats/__init__.py | 1 + pymc3/variational/__init__.py | 27 +- 13 files changed, 586 insertions(+), 628 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index bfecad95ef..d396d61dd6 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -100,78 +100,79 @@ from .bound import Bound -__all__ = ['Uniform', - 'Flat', - 'HalfFlat', - 'TruncatedNormal', - 'Normal', - 'Beta', - 'Kumaraswamy', - 'Exponential', - 'Laplace', - 'StudentT', - 'Cauchy', - 'HalfCauchy', - 'Gamma', - 'Weibull', - 'Bound', - 'Lognormal', - 'HalfStudentT', - 'ChiSquared', - 'HalfNormal', - 'Wald', - 'Pareto', - 'InverseGamma', - 'ExGaussian', - 'VonMises', - 'Binomial', - 'BetaBinomial', - 'Bernoulli', - 'Poisson', - 'NegativeBinomial', - 'ConstantDist', - 'Constant', - 'ZeroInflatedPoisson', - 'ZeroInflatedNegativeBinomial', - 'ZeroInflatedBinomial', - 'DiscreteUniform', - 'Geometric', - 'Categorical', - 'OrderedLogistic', - 'DensityDist', - 'Distribution', - 'Continuous', - 'Discrete', - 'NoDistribution', - 'TensorType', - 'MvNormal', - 'MatrixNormal', - 'KroneckerNormal', - 'MvStudentT', - 'Dirichlet', - 'Multinomial', - 'Wishart', - 'WishartBartlett', - 'LKJCholeskyCov', - 'LKJCorr', - 'AR1', - 'AR', - 'GaussianRandomWalk', - 'MvGaussianRandomWalk', - 'MvStudentTRandomWalk', - 'GARCH11', - 'SkewNormal', - 'Mixture', - 'NormalMixture', - 'Triangular', - 'DiscreteWeibull', - 'Gumbel', - 'Logistic', - 'LogitNormal', - 'Interpolated', - 'Bound', - 'Rice', - 'Moyal', - 'Simulator', - 'fast_sample_posterior_predictive' - ] +__all__ = [ + "Uniform", + "Flat", + "HalfFlat", + "TruncatedNormal", + "Normal", + "Beta", + "Kumaraswamy", + "Exponential", + "Laplace", + "StudentT", + "Cauchy", + "HalfCauchy", + "Gamma", + "Weibull", + "Bound", + "Lognormal", + "HalfStudentT", + "ChiSquared", + "HalfNormal", + "Wald", + "Pareto", + "InverseGamma", + "ExGaussian", + "VonMises", + "Binomial", + "BetaBinomial", + "Bernoulli", + "Poisson", + "NegativeBinomial", + "ConstantDist", + "Constant", + "ZeroInflatedPoisson", + "ZeroInflatedNegativeBinomial", + "ZeroInflatedBinomial", + "DiscreteUniform", + "Geometric", + "Categorical", + "OrderedLogistic", + "DensityDist", + "Distribution", + "Continuous", + "Discrete", + "NoDistribution", + "TensorType", + "MvNormal", + "MatrixNormal", + "KroneckerNormal", + "MvStudentT", + "Dirichlet", + "Multinomial", + "Wishart", + "WishartBartlett", + "LKJCholeskyCov", + "LKJCorr", + "AR1", + "AR", + "GaussianRandomWalk", + "MvGaussianRandomWalk", + "MvStudentTRandomWalk", + "GARCH11", + "SkewNormal", + "Mixture", + "NormalMixture", + "Triangular", + "DiscreteWeibull", + "Gumbel", + "Logistic", + "LogitNormal", + "Interpolated", + "Bound", + "Rice", + "Moyal", + "Simulator", + "fast_sample_posterior_predictive", +] diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py index 573b578957..b52dc905b1 100644 --- a/pymc3/distributions/bound.py +++ b/pymc3/distributions/bound.py @@ -82,16 +82,13 @@ def _random(self, lower, upper, point=None, size=None): upper = np.asarray(upper) if lower.size > 1 or upper.size > 1: raise ValueError( - "Drawing samples from distributions with " - "array-valued bounds is not supported." + "Drawing samples from distributions with " "array-valued bounds is not supported." ) total_size = np.prod(size).astype(np.int) samples = [] s = 0 while s < total_size: - sample = np.atleast_1d( - self._wrapped.random(point=point, size=total_size) - ).flatten() + sample = np.atleast_1d(self._wrapped.random(point=point, size=total_size)).flatten() select = sample[np.logical_and(sample >= lower, sample <= upper)] samples.append(select) @@ -128,7 +125,7 @@ def random(self, point=None, size=None): upper, dist_shape=self.shape, size=size, - not_broadcast_kwargs={'point': point}, + not_broadcast_kwargs={"point": point}, ) elif self.lower is not None: lower = draw_values([self.lower], point=point, size=size) @@ -138,7 +135,7 @@ def random(self, point=None, size=None): np.inf, dist_shape=self.shape, size=size, - not_broadcast_kwargs={'point': point}, + not_broadcast_kwargs={"point": point}, ) else: upper = draw_values([self.upper], point=point, size=size) @@ -148,7 +145,7 @@ def random(self, point=None, size=None): upper, dist_shape=self.shape, size=size, - not_broadcast_kwargs={'point': point}, + not_broadcast_kwargs={"point": point}, ) @@ -168,9 +165,7 @@ def __init__(self, distribution, lower, upper, transform="infer", *args, **kwarg if lower is not None: default = lower + 1 - super().__init__( - distribution, lower, upper, default, *args, transform=transform, **kwargs - ) + super().__init__(distribution, lower, upper, default, *args, transform=transform, **kwargs) class _ContinuousBounded(_Bounded, Continuous): @@ -215,9 +210,7 @@ def __init__(self, distribution, lower, upper, transform="infer", *args, **kwarg else: default = None - super().__init__( - distribution, lower, upper, default, *args, transform=transform, **kwargs - ) + super().__init__(distribution, lower, upper, default, *args, transform=transform, **kwargs) class Bound: @@ -283,23 +276,11 @@ def __call__(self, name, *args, **kwargs): transform = kwargs.pop("transform", "infer") if issubclass(self.distribution, Continuous): return _ContinuousBounded( - name, - self.distribution, - self.lower, - self.upper, - transform, - *args, - **kwargs + name, self.distribution, self.lower, self.upper, transform, *args, **kwargs ) elif issubclass(self.distribution, Discrete): return _DiscreteBounded( - name, - self.distribution, - self.lower, - self.upper, - transform, - *args, - **kwargs + name, self.distribution, self.lower, self.upper, transform, *args, **kwargs ) else: raise ValueError("Distribution is neither continuous nor discrete.") @@ -311,8 +292,6 @@ def dist(self, *args, **kwargs): ) elif issubclass(self.distribution, Discrete): - return _DiscreteBounded.dist( - self.distribution, self.lower, self.upper, *args, **kwargs - ) + return _DiscreteBounded.dist(self.distribution, self.lower, self.upper, *args, **kwargs) else: raise ValueError("Distribution is neither continuous nor discrete.") diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 5ccdb7f5d2..80fc6a0b3e 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -134,9 +134,7 @@ def assert_negative_support(var, label, distname, value=-1e-6): support = False if np.any(support): - msg = "The variable specified for {} has negative support for {}, ".format( - label, distname - ) + msg = "The variable specified for {} has negative support for {}, ".format(label, distname) msg += "likely making it unsuitable for this parameter." warnings.warn(msg) @@ -712,7 +710,7 @@ def random(self, point=None, size=None): ) def _random(self, mu, sigma, lower, upper, size): - """ Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's + """Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's parametrization to scipy.truncnorm. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -3447,7 +3445,7 @@ def random(self, point=None, size=None): ) def _random(self, c, lower, upper, size): - """ Wrapper around stats.triang.rvs that converts Triangular's + """Wrapper around stats.triang.rvs that converts Triangular's parametrization to scipy.triang. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -3706,7 +3704,7 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs): self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma)) self.b = b = tt.as_tensor_variable(floatX(b)) - nu_sigma_ratio = -nu ** 2 / (2 * sigma ** 2) + nu_sigma_ratio = -(nu ** 2) / (2 * sigma ** 2) self.mean = ( sigma * np.sqrt(np.pi / 2) @@ -3762,7 +3760,7 @@ def random(self, point=None, size=None): return generate_samples(self._random, nu=nu, sigma=sigma, dist_shape=self.shape, size=size) def _random(self, nu, sigma, size): - """ Wrapper around stats.rice.rvs that converts Rice's + """Wrapper around stats.rice.rvs that converts Rice's parametrization to scipy.rice. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 090949c905..cfe87ed3d4 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -24,10 +24,23 @@ from ..theanof import floatX, intX, take_along_axis -__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull', - 'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant', - 'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial', - 'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic'] +__all__ = [ + "Binomial", + "BetaBinomial", + "Bernoulli", + "DiscreteWeibull", + "Poisson", + "NegativeBinomial", + "ConstantDist", + "Constant", + "ZeroInflatedPoisson", + "ZeroInflatedBinomial", + "ZeroInflatedNegativeBinomial", + "DiscreteUniform", + "Geometric", + "Categorical", + "OrderedLogistic", +] class Binomial(Discrete): @@ -96,9 +109,7 @@ def random(self, point=None, size=None): array """ n, p = draw_values([self.n, self.p], point=point, size=size) - return generate_samples(stats.binom.rvs, n=n, p=p, - dist_shape=self.shape, - size=size) + return generate_samples(stats.binom.rvs, n=n, p=p, dist_shape=self.shape, size=size) def logp(self, value): r""" @@ -119,8 +130,11 @@ def logp(self, value): return bound( binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value), - 0 <= value, value <= n, - 0 <= p, p <= 1) + 0 <= value, + value <= n, + 0 <= p, + p <= 1, + ) class BetaBinomial(Discrete): @@ -183,7 +197,7 @@ def __init__(self, alpha, beta, n, *args, **kwargs): self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) self.beta = beta = tt.as_tensor_variable(floatX(beta)) self.n = n = tt.as_tensor_variable(intX(n)) - self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8') + self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8") def _random(self, alpha, beta, n, size=None): size = size or 1 @@ -197,8 +211,11 @@ def _random(self, alpha, beta, n, size=None): quotient, remainder = divmod(_p.shape[0], _n.shape[0]) if remainder != 0: - raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format( - _n.shape[0], _p.shape[0])) + raise TypeError( + "n has a bad size! Was cast to {}, must evenly divide {}".format( + _n.shape[0], _p.shape[0] + ) + ) if quotient != 1: _n = np.tile(_n, quotient) samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size) @@ -221,11 +238,10 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta, n = \ - draw_values([self.alpha, self.beta, self.n], point=point, size=size) - return generate_samples(self._random, alpha=alpha, beta=beta, n=n, - dist_shape=self.shape, - size=size) + alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size) + return generate_samples( + self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size + ) def logp(self, value): r""" @@ -243,11 +259,15 @@ def logp(self, value): """ alpha = self.alpha beta = self.beta - return bound(binomln(self.n, value) - + betaln(value + alpha, self.n - value + beta) - - betaln(alpha, beta), - value >= 0, value <= self.n, - alpha > 0, beta > 0) + return bound( + binomln(self.n, value) + + betaln(value + alpha, self.n - value + beta) + - betaln(alpha, beta), + value >= 0, + value <= self.n, + alpha > 0, + beta > 0, + ) class Bernoulli(Discrete): @@ -293,7 +313,7 @@ class Bernoulli(Discrete): def __init__(self, p=None, logit_p=None, *args, **kwargs): super().__init__(*args, **kwargs) if sum(int(var is None) for var in [p, logit_p]) != 1: - raise ValueError('Specify one of p and logit_p') + raise ValueError("Specify one of p and logit_p") if p is not None: self._is_logit = False self.p = p = tt.as_tensor_variable(floatX(p)) @@ -303,7 +323,7 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs): self.p = tt.nnet.sigmoid(floatX(logit_p)) self._logit_p = tt.as_tensor_variable(logit_p) - self.mode = tt.cast(tround(self.p), 'int8') + self.mode = tt.cast(tround(self.p), "int8") def random(self, point=None, size=None): r""" @@ -323,9 +343,7 @@ def random(self, point=None, size=None): array """ p = draw_values([self.p], point=point, size=size)[0] - return generate_samples(stats.bernoulli.rvs, p, - dist_shape=self.shape, - size=size) + return generate_samples(stats.bernoulli.rvs, p, dist_shape=self.shape, size=size) def logp(self, value): r""" @@ -347,9 +365,8 @@ def logp(self, value): else: p = self.p return bound( - tt.switch(value, tt.log(p), tt.log(1 - p)), - value >= 0, value <= 1, - p >= 0, p <= 1) + tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1 + ) def _distr_parameters_for_repr(self): return ["p"] @@ -393,8 +410,9 @@ def DiscreteWeibull(q, b, x): Variance :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2` ======== ====================== """ + def __init__(self, q, beta, *args, **kwargs): - super().__init__(*args, defaults=('median',), **kwargs) + super().__init__(*args, defaults=("median",), **kwargs) self.q = q = tt.as_tensor_variable(floatX(q)) self.beta = beta = tt.as_tensor_variable(floatX(beta)) @@ -418,10 +436,13 @@ def logp(self, value): q = self.q beta = self.beta - return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), - 0 <= value, - 0 < q, q < 1, - 0 < beta) + return bound( + tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), + 0 <= value, + 0 < q, + q < 1, + 0 < beta, + ) def _ppf(self, p): r""" @@ -431,12 +452,12 @@ def _ppf(self, p): q = self.q beta = self.beta - return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64') + return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64") def _random(self, q, beta, size=None): p = np.random.uniform(size=size) - return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1 + return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1 def random(self, point=None, size=None): r""" @@ -457,9 +478,7 @@ def random(self, point=None, size=None): """ q, beta = draw_values([self.q, self.beta], point=point, size=size) - return generate_samples(self._random, q, beta, - dist_shape=self.shape, - size=size) + return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size) class Poisson(Discrete): @@ -529,9 +548,7 @@ def random(self, point=None, size=None): array """ mu = draw_values([self.mu], point=point, size=size)[0] - return generate_samples(stats.poisson.rvs, mu, - dist_shape=self.shape, - size=size) + return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size) def logp(self, value): r""" @@ -548,12 +565,9 @@ def logp(self, value): TensorVariable """ mu = self.mu - log_prob = bound( - logpow(mu, value) - factln(value) - mu, - mu >= 0, value >= 0) + log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0) # Return zero when mu and value are both zero - return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), - 0, log_prob) + return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob) class NegativeBinomial(Discrete): @@ -630,14 +644,12 @@ def random(self, point=None, size=None): array """ mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size) - g = generate_samples(self._random, mu=mu, alpha=alpha, - dist_shape=self.shape, - size=size) + g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size) g[g == 0] = np.finfo(float).eps # Just in case return np.asarray(stats.poisson.rvs(g)).reshape(g.shape) def _random(self, mu, alpha, size): - r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's + r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's parametrization to scipy.gamma. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -664,15 +676,17 @@ def logp(self, value): """ mu = self.mu alpha = self.alpha - negbinom = bound(binomln(value + alpha - 1, value) - + logpow(mu / (mu + alpha), value) - + logpow(alpha / (mu + alpha), alpha), - value >= 0, mu > 0, alpha > 0) + negbinom = bound( + binomln(value + alpha - 1, value) + + logpow(mu / (mu + alpha), value) + + logpow(alpha / (mu + alpha), alpha), + value >= 0, + mu > 0, + alpha > 0, + ) # Return Poisson when alpha gets very large. - return tt.switch(tt.gt(alpha, 1e10), - Poisson.dist(self.mu).logp(value), - negbinom) + return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom) class Geometric(Discrete): @@ -735,9 +749,7 @@ def random(self, point=None, size=None): array """ p = draw_values([self.p], point=point, size=size)[0] - return generate_samples(np.random.geometric, p, - dist_shape=self.shape, - size=size) + return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size) def logp(self, value): r""" @@ -754,8 +766,7 @@ def logp(self, value): TensorVariable """ p = self.p - return bound(tt.log(p) + logpow(1 - p, value - 1), - 0 <= p, p <= 1, value >= 1) + return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) class DiscreteUniform(Discrete): @@ -801,8 +812,7 @@ def __init__(self, lower, upper, *args, **kwargs): super().__init__(*args, **kwargs) self.lower = intX(tt.floor(lower)) self.upper = intX(tt.floor(upper)) - self.mode = tt.maximum( - intX(tt.floor((upper + lower) / 2.)), self.lower) + self.mode = tt.maximum(intX(tt.floor((upper + lower) / 2.0)), self.lower) def _random(self, lower, upper, size=None): # This way seems to be the only to deal with lower and upper @@ -828,10 +838,7 @@ def random(self, point=None, size=None): array """ lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - return generate_samples(self._random, - lower, upper, - dist_shape=self.shape, - size=size) + return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size) def logp(self, value): r""" @@ -849,8 +856,7 @@ def logp(self, value): """ upper = self.upper lower = self.lower - return bound(-tt.log(upper - lower + 1), - lower <= value, value <= upper) + return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper) class Categorical(Discrete): @@ -923,11 +929,13 @@ def random(self, point=None, size=None): p, k = draw_values([self.p, self.k], point=point, size=size) p = p / np.sum(p, axis=-1, keepdims=True) - return generate_samples(random_choice, - p=p, - broadcast_shape=p.shape[:-1] or (1,), - dist_shape=self.shape, - size=size) + return generate_samples( + random_choice, + p=p, + broadcast_shape=p.shape[:-1] or (1,), + dist_shape=self.shape, + size=size, + ) def logp(self, value): r""" @@ -953,13 +961,9 @@ def logp(self, value): if p.ndim > 1: if p.ndim > value_clip.ndim: - value_clip = tt.shape_padleft( - value_clip, p_.ndim - value_clip.ndim - ) + value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim) elif p.ndim < value_clip.ndim: - p = tt.shape_padleft( - p, value_clip.ndim - p_.ndim - ) + p = tt.shape_padleft(p, value_clip.ndim - p_.ndim) pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) a = tt.log( take_along_axis( @@ -970,8 +974,9 @@ def logp(self, value): else: a = tt.log(p[value_clip]) - return bound(a, value >= 0, value <= (k - 1), - tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) + return bound( + a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1) + ) class Constant(Discrete): @@ -985,8 +990,10 @@ class Constant(Discrete): """ def __init__(self, c, *args, **kwargs): - warnings.warn("Constant has been deprecated. We recommend using a Deterministic object instead.", - DeprecationWarning) + warnings.warn( + "Constant has been deprecated. We recommend using a Deterministic object instead.", + DeprecationWarning, + ) super().__init__(*args, **kwargs) self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c) @@ -1013,8 +1020,7 @@ def random(self, point=None, size=None): def _random(c, dtype=dtype, size=None): return np.full(size, fill_value=c, dtype=dtype) - return generate_samples(_random, c=c, dist_shape=self.shape, - size=size).astype(dtype) + return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype) def logp(self, value): r""" @@ -1112,9 +1118,7 @@ def random(self, point=None, size=None): array """ theta, psi = draw_values([self.theta, self.psi], point=point, size=size) - g = generate_samples(stats.poisson.rvs, theta, - dist_shape=self.shape, - size=size) + g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size) g, psi = broadcast_distribution_samples([g, psi], size=size) return g * (np.random.random(g.shape) < psi) @@ -1138,13 +1142,10 @@ def logp(self, value): logp_val = tt.switch( tt.gt(value, 0), tt.log(psi) + self.pois.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) - theta)) + logaddexp(tt.log1p(-psi), tt.log(psi) - theta), + ) - return bound( - logp_val, - 0 <= value, - 0 <= psi, psi <= 1, - 0 <= theta) + return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta) class ZeroInflatedBinomial(Discrete): @@ -1224,9 +1225,7 @@ def random(self, point=None, size=None): array """ n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size) - g = generate_samples(stats.binom.rvs, n, p, - dist_shape=self.shape, - size=size) + g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size) g, psi = broadcast_distribution_samples([g, psi], size=size) return g * (np.random.random(g.shape) < psi) @@ -1251,13 +1250,10 @@ def logp(self, value): logp_val = tt.switch( tt.gt(value, 0), tt.log(psi) + self.bin.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p))) + logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)), + ) - return bound( - logp_val, - 0 <= value, value <= n, - 0 <= psi, psi <= 1, - 0 <= p, p <= 1) + return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1) class ZeroInflatedNegativeBinomial(Discrete): @@ -1353,21 +1349,14 @@ def random(self, point=None, size=None): ------- array """ - mu, alpha, psi = draw_values( - [self.mu, self.alpha, self.psi], point=point, size=size) - g = generate_samples( - self._random, - mu=mu, - alpha=alpha, - dist_shape=self.shape, - size=size - ) + mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size) + g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size) g[g == 0] = np.finfo(float).eps # Just in case g, psi = broadcast_distribution_samples([g, psi], size=size) return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi) def _random(self, mu, alpha, size): - r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's + r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's parametrization to scipy.gamma. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -1398,19 +1387,12 @@ def logp(self, value): logp_other = tt.log(psi) + self.nb.logp(value) logp_0 = logaddexp( - tt.log1p(-psi), - tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))) + tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)) + ) - logp_val = tt.switch( - tt.gt(value, 0), - logp_other, - logp_0) + logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0) - return bound( - logp_val, - 0 <= value, - 0 <= psi, psi <= 1, - mu > 0, alpha > 0) + return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0) class OrderedLogistic(Categorical): @@ -1484,11 +1466,14 @@ def __init__(self, eta, cutpoints, *args, **kwargs): self.cutpoints = tt.as_tensor_variable(cutpoints) pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta)) - p_cum = tt.concatenate([ - tt.zeros_like(tt.shape_padright(pa[..., 0])), - pa, - tt.ones_like(tt.shape_padright(pa[..., 0])) - ], axis=-1) + p_cum = tt.concatenate( + [ + tt.zeros_like(tt.shape_padright(pa[..., 0])), + pa, + tt.ones_like(tt.shape_padright(pa[..., 0])), + ], + axis=-1, + ) p = p_cum[..., 1:] - p_cum[..., :-1] super().__init__(p=p, *args, **kwargs) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index b22ffd7ce4..01783d35e5 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -30,8 +30,7 @@ from pymc3.theanof import floatX from . import transforms -from .distribution import (Continuous, Discrete, draw_values, generate_samples, - _DrawValuesContext) +from .distribution import Continuous, Discrete, draw_values, generate_samples, _DrawValuesContext from ..model import Deterministic from .continuous import ChiSquared, Normal from .special import gammaln, multigammaln @@ -40,15 +39,22 @@ from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker -__all__ = ['MvNormal', 'MvStudentT', 'Dirichlet', - 'Multinomial', 'Wishart', 'WishartBartlett', - 'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal', - 'KroneckerNormal'] +__all__ = [ + "MvNormal", + "MvStudentT", + "Dirichlet", + "Multinomial", + "Wishart", + "WishartBartlett", + "LKJCorr", + "LKJCholeskyCov", + "MatrixNormal", + "KroneckerNormal", +] class _QuadFormBase(Continuous): - def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, - *args, **kwargs): + def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **kwargs): super().__init__(*args, **kwargs) if len(self.shape) > 2: raise ValueError("Only 1 or 2 dimensions are allowed.") @@ -56,40 +62,40 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, if chol is not None and not lower: chol = chol.T if len([i for i in [tau, cov, chol] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of tau, cov, ' - 'or chol.') + raise ValueError( + "Incompatible parameterization. " "Specify exactly one of tau, cov, " "or chol." + ) self.mu = mu = tt.as_tensor_variable(mu) self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") # Step methods and advi do not catch LinAlgErrors at the # moment. We work around that by using a cholesky op # that returns a nan as first entry instead of raising # an error. - cholesky = Cholesky(lower=True, on_error='nan') + cholesky = Cholesky(lower=True, on_error="nan") if cov is not None: self.k = cov.shape[0] - self._cov_type = 'cov' + self._cov_type = "cov" cov = tt.as_tensor_variable(cov) if cov.ndim != 2: - raise ValueError('cov must be two dimensional.') + raise ValueError("cov must be two dimensional.") self.chol_cov = cholesky(cov) self.cov = cov self._n = self.cov.shape[-1] elif tau is not None: self.k = tau.shape[0] - self._cov_type = 'tau' + self._cov_type = "tau" tau = tt.as_tensor_variable(tau) if tau.ndim != 2: - raise ValueError('tau must be two dimensional.') + raise ValueError("tau must be two dimensional.") self.chol_tau = cholesky(tau) self.tau = tau self._n = self.tau.shape[-1] else: self.k = chol.shape[0] - self._cov_type = 'chol' + self._cov_type = "chol" if chol.ndim != 2: - raise ValueError('chol must be two dimensional.') + raise ValueError("chol must be two dimensional.") self.chol_cov = tt.as_tensor_variable(chol) self._n = self.chol_cov.shape[-1] @@ -97,7 +103,7 @@ def _quaddist(self, value): """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma.""" mu = self.mu if value.ndim > 2 or value.ndim == 0: - raise ValueError('Invalid dimension for value: %s' % value.ndim) + raise ValueError("Invalid dimension for value: %s" % value.ndim) if value.ndim == 1: onedim = True value = value[None, :] @@ -106,11 +112,11 @@ def _quaddist(self, value): delta = value - mu - if self._cov_type == 'cov': + if self._cov_type == "cov": # Use this when Theano#5908 is released. # return MvNormalLogp()(self.cov, delta) dist, logdet, ok = self._quaddist_cov(delta) - elif self._cov_type == 'tau': + elif self._cov_type == "tau": dist, logdet, ok = self._quaddist_tau(delta) else: dist, logdet, ok = self._quaddist_chol(delta) @@ -222,8 +228,7 @@ class MvNormal(_QuadFormBase): vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T) """ - def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, - *args, **kwargs): + def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs): super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs) self.mean = self.median = self.mode = self.mu = self.mu @@ -253,40 +258,35 @@ def random(self, point=None, size=None): except TypeError: size = (size,) - if self._cov_type == 'cov': + if self._cov_type == "cov": mu, cov = draw_values([self.mu, self.cov], point=point, size=size) if mu.shape[-1] != cov.shape[-1]: raise ValueError("Shapes for mu and cov don't match") try: - dist = stats.multivariate_normal( - mean=mu, cov=cov, allow_singular=True) + dist = stats.multivariate_normal(mean=mu, cov=cov, allow_singular=True) except ValueError: size += (mu.shape[-1],) return np.nan * np.zeros(size) return dist.rvs(size) - elif self._cov_type == 'chol': - mu, chol = draw_values([self.mu, self.chol_cov], - point=point, size=size) + elif self._cov_type == "chol": + mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size) if size and mu.ndim == len(size) and mu.shape == size: mu = mu[..., np.newaxis] if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1: raise ValueError("Shapes for mu and chol don't match") - broadcast_shape = ( - np.broadcast(np.empty(mu.shape[:-1]), - np.empty(chol.shape[:-2])).shape - ) + broadcast_shape = np.broadcast(np.empty(mu.shape[:-1]), np.empty(chol.shape[:-2])).shape mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],)) chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:]) # If mu and chol were fixed by the point, only the standard normal # should change - if mu.shape[:len(size)] != size: + if mu.shape[: len(size)] != size: std_norm_shape = size + mu.shape else: std_norm_shape = mu.shape standard_normal = np.random.standard_normal(std_norm_shape) - return mu + np.einsum('...ij,...j->...i', chol, standard_normal) + return mu + np.einsum("...ij,...j->...i", chol, standard_normal) else: mu, tau = draw_values([self.mu, self.tau], point=point, size=size) if mu.shape[-1] != tau[0].shape[-1]: @@ -299,8 +299,7 @@ def random(self, point=None, size=None): return np.nan * np.zeros(size) standard_normal = np.random.standard_normal(size) - transformed = linalg.solve_triangular( - chol, standard_normal.T, lower=True) + transformed = linalg.solve_triangular(chol, standard_normal.T, lower=True) return mu + transformed.T def logp(self, value): @@ -319,7 +318,7 @@ def logp(self, value): """ quaddist, logdet, ok = self._quaddist(value) k = floatX(value.shape[-1]) - norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi)) + norm = -0.5 * k * pm.floatX(np.log(2 * np.pi)) return bound(norm - 0.5 * quaddist - logdet, ok) def _distr_parameters_for_repr(self): @@ -367,11 +366,12 @@ class MvStudentT(_QuadFormBase): Whether the cholesky fatcor is given as a lower triangular matrix. """ - def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, - lower=True, *args, **kwargs): + def __init__( + self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, lower=True, *args, **kwargs + ): if Sigma is not None: if cov is not None: - raise ValueError('Specify only one of cov and Sigma') + raise ValueError("Specify only one of cov and Sigma") cov = Sigma super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs) self.nu = nu = tt.as_tensor_variable(nu) @@ -396,14 +396,14 @@ def random(self, point=None, size=None): """ with _DrawValuesContext(): nu, mu = draw_values([self.nu, self.mu], point=point, size=size) - if self._cov_type == 'cov': - cov, = draw_values([self.cov], point=point, size=size) + if self._cov_type == "cov": + (cov,) = draw_values([self.cov], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov) - elif self._cov_type == 'tau': - tau, = draw_values([self.tau], point=point, size=size) + elif self._cov_type == "tau": + (tau,) = draw_values([self.tau], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau) else: - chol, = draw_values([self.chol_cov], point=point, size=size) + (chol,) = draw_values([self.chol_cov], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol) samples = dist.random(point, size) @@ -428,10 +428,12 @@ def logp(self, value): quaddist, logdet, ok = self._quaddist(value) k = floatX(value.shape[-1]) - norm = (gammaln((self.nu + k) / 2.) - - gammaln(self.nu / 2.) - - 0.5 * k * floatX(np.log(self.nu * np.pi))) - inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu) + norm = ( + gammaln((self.nu + k) / 2.0) + - gammaln(self.nu / 2.0) + - 0.5 * k * floatX(np.log(self.nu * np.pi)) + ) + inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu) return bound(norm + inner - logdet, ok) def _distr_parameters_for_repr(self): @@ -462,20 +464,19 @@ class Dirichlet(Continuous): Concentration parameters (a > 0). """ - def __init__(self, a, transform=transforms.stick_breaking, - *args, **kwargs): + def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): - if kwargs.get('shape') is None: + if kwargs.get("shape") is None: warnings.warn( ( "Shape not explicitly set. " "Please, set the value using the `shape` keyword argument. " "Using the test value to infer the shape." ), - DeprecationWarning + DeprecationWarning, ) try: - kwargs['shape'] = np.shape(get_test_value(a)) + kwargs["shape"] = np.shape(get_test_value(a)) except AttributeError: pass @@ -485,15 +486,13 @@ def __init__(self, a, transform=transforms.stick_breaking, self.a = a = tt.as_tensor_variable(a) self.mean = a / tt.sum(a) - self.mode = tt.switch(tt.all(a > 1), - (a - 1) / tt.sum(a - 1), - np.nan) + self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan) def _random(self, a, size=None): gen = stats.dirichlet.rvs shape = tuple(np.atleast_1d(self.shape)) - if size[-len(shape):] == shape: - real_size = size[:-len(shape)] + if size[-len(shape) :] == shape: + real_size = size[: -len(shape)] else: real_size = size if self.size_prefix: @@ -528,10 +527,7 @@ def random(self, point=None, size=None): array """ a = draw_values([self.a], point=point, size=size)[0] - samples = generate_samples(self._random, - a=a, - dist_shape=self.shape, - size=size) + samples = generate_samples(self._random, a=a, dist_shape=self.shape, size=size) return samples def logp(self, value): @@ -551,11 +547,14 @@ def logp(self, value): a = self.a # only defined for sum(value) == 1 - return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) - + gammaln(tt.sum(a, axis=-1)), - tt.all(value >= 0), tt.all(value <= 1), - np.logical_not(a.broadcastable), tt.all(a > 0), - broadcast_conditions=False) + return bound( + tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)), + tt.all(value >= 0), + tt.all(value <= 1), + np.logical_not(a.broadcastable), + tt.all(a > 0), + broadcast_conditions=False, + ) def _distr_parameters_for_repr(self): return ["a"] @@ -598,7 +597,7 @@ def __init__(self, n, p, *args, **kwargs): super().__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) - n = np.squeeze(n) # works also if n is a tensor + n = np.squeeze(n) # works also if n is a tensor if len(self.shape) > 1: self.n = tt.shape_padright(n) @@ -612,17 +611,16 @@ def __init__(self, n, p, *args, **kwargs): self.p = tt.as_tensor_variable(p) self.mean = self.n * self.p - mode = tt.cast(tt.round(self.mean), 'int32') + mode = tt.cast(tt.round(self.mean), "int32") diff = self.n - tt.sum(mode, axis=-1, keepdims=True) inc_bool_arr = tt.abs_(diff) > 0 - mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], - diff[inc_bool_arr.nonzero()]) + mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) self.mode = mode def _random(self, n, p, size=None, raw_size=None): original_dtype = p.dtype # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317) - p = p.astype('float64') + p = p.astype("float64") # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals p /= np.sum(p, axis=-1, keepdims=True) @@ -635,23 +633,19 @@ def _random(self, n, p, size=None, raw_size=None): # np.random.multinomial needs `n` to be a scalar int and `p` a # sequence so we semi flatten them and iterate over them size_ = to_tuple(raw_size) - if p.ndim > len(size_) and p.shape[:len(size_)] == size_: + if p.ndim > len(size_) and p.shape[: len(size_)] == size_: # p and n have the size_ prepend so we don't need it in np.random n_ = n.reshape([-1]) p_ = p.reshape([-1, p.shape[-1]]) - samples = np.array([ - np.random.multinomial(nn, pp) - for nn, pp in zip(n_, p_) - ]) + samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)]) samples = samples.reshape(p.shape) else: # p and n don't have the size prepend n_ = n.reshape([-1]) p_ = p.reshape([-1, p.shape[-1]]) - samples = np.array([ - np.random.multinomial(nn, pp, size=size_) - for nn, pp in zip(n_, p_) - ]) + samples = np.array( + [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)] + ) samples = np.moveaxis(samples, 0, -1) samples = samples.reshape(size + p.shape) # We cast back to the original dtype @@ -675,10 +669,14 @@ def random(self, point=None, size=None): array """ n, p = draw_values([self.n, self.p], point=point, size=size) - samples = generate_samples(self._random, n, p, - dist_shape=self.shape, - not_broadcast_kwargs={'raw_size': size}, - size=size) + samples = generate_samples( + self._random, + n, + p, + dist_shape=self.shape, + not_broadcast_kwargs={"raw_size": size}, + size=size, + ) return samples def logp(self, x): @@ -705,7 +703,7 @@ def logp(self, x): tt.all(p <= 1), tt.all(tt.eq(tt.sum(p, axis=-1), 1)), tt.all(tt.ge(n, 0)), - broadcast_conditions=False + broadcast_conditions=False, ) @@ -731,7 +729,7 @@ class PosDefMatrix(theano.Op): def make_node(self, x): x = tt.as_tensor_variable(x) assert x.ndim == 2 - o = tt.TensorType(dtype='int8', broadcastable=[])() + o = tt.TensorType(dtype="int8", broadcastable=[])() return theano.Apply(self, [x], [o]) # Python implementation: @@ -740,21 +738,22 @@ def perform(self, node, inputs, outputs): (x,) = inputs (z,) = outputs try: - z[0] = np.array(posdef(x), dtype='int8') + z[0] = np.array(posdef(x), dtype="int8") except Exception: - pm._log.exception('Failed to check if %s positive definite', x) + pm._log.exception("Failed to check if %s positive definite", x) raise def infer_shape(self, node, shapes): return [[]] def grad(self, inp, grads): - x, = inp + (x,) = inp return [x.zeros_like(theano.config.floatX)] def __str__(self): return "MatrixIsPositiveDefinite" + matrix_pos_def = PosDefMatrix() @@ -797,20 +796,20 @@ class Wishart(Continuous): def __init__(self, nu, V, *args, **kwargs): super().__init__(*args, **kwargs) - warnings.warn('The Wishart distribution can currently not be used ' - 'for MCMC sampling. The probability of sampling a ' - 'symmetric matrix is basically zero. Instead, please ' - 'use LKJCholeskyCov or LKJCorr. For more information ' - 'on the issues surrounding the Wishart see here: ' - 'https://github.com/pymc-devs/pymc3/issues/538.', - UserWarning) + warnings.warn( + "The Wishart distribution can currently not be used " + "for MCMC sampling. The probability of sampling a " + "symmetric matrix is basically zero. Instead, please " + "use LKJCholeskyCov or LKJCorr. For more information " + "on the issues surrounding the Wishart see here: " + "https://github.com/pymc-devs/pymc3/issues/538.", + UserWarning, + ) self.nu = nu = tt.as_tensor_variable(nu) self.p = p = tt.as_tensor_variable(V.shape[0]) self.V = V = tt.as_tensor_variable(V) self.mean = nu * V - self.mode = tt.switch(tt.ge(nu, p + 1), - (nu - p - 1) * V, - np.nan) + self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan) def random(self, point=None, size=None): """ @@ -830,9 +829,8 @@ def random(self, point=None, size=None): array """ nu, V = draw_values([self.nu, self.V], point=point, size=size) - size= 1 if size is None else size - return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, - broadcast_shape=(size,)) + size = 1 if size is None else size + return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, broadcast_shape=(size,)) def logp(self, X): """ @@ -855,14 +853,19 @@ def logp(self, X): IVI = det(V) IXI = det(X) - return bound(((nu - p - 1) * tt.log(IXI) - - trace(matrix_inverse(V).dot(X)) - - nu * p * tt.log(2) - nu * tt.log(IVI) - - 2 * multigammaln(nu / 2., p)) / 2, - matrix_pos_def(X), - tt.eq(X, X.T), - nu > (p - 1), - broadcast_conditions=False + return bound( + ( + (nu - p - 1) * tt.log(IXI) + - trace(matrix_inverse(V).dot(X)) + - nu * p * tt.log(2) + - nu * tt.log(IVI) + - 2 * multigammaln(nu / 2.0, p) + ) + / 2, + matrix_pos_def(X), + tt.eq(X, X.T), + nu > (p - 1), + broadcast_conditions=False, ) @@ -924,17 +927,18 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv # Inverse transform testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T)) testval = linalg.cholesky(testval, lower=True) - diag_testval = testval[diag_idx]**2 + diag_testval = testval[diag_idx] ** 2 tril_testval = testval[tril_idx] else: diag_testval = None tril_testval = None - c = tt.sqrt(ChiSquared('%s_c' % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, - testval=diag_testval)) - pm._log.info('Added new variable %s_c to model diagonal of Wishart.' % name) - z = Normal('%s_z' % name, 0., 1., shape=n_tril, testval=tril_testval) - pm._log.info('Added new variable %s_z to model off-diagonals of Wishart.' % name) + c = tt.sqrt( + ChiSquared("%s_c" % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval) + ) + pm._log.info("Added new variable %s_c to model diagonal of Wishart." % name) + z = Normal("%s_z" % name, 0.0, 1.0, shape=n_tril, testval=tril_testval) + pm._log.info("Added new variable %s_z to model off-diagonals of Wishart." % name) # Construct A matrix A = tt.zeros(S.shape, dtype=np.float32) A = tt.set_subtensor(A[diag_idx], c) @@ -949,20 +953,24 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv def _lkj_normalizing_constant(eta, n): if eta == 1: - result = gammaln(2. * tt.arange(1, int((n - 1) / 2) + 1)).sum() + result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum() if n % 2 == 1: - result += (0.25 * (n ** 2 - 1) * tt.log(np.pi) - - 0.25 * (n - 1) ** 2 * tt.log(2.) - - (n - 1) * gammaln(int((n + 1) / 2))) + result += ( + 0.25 * (n ** 2 - 1) * tt.log(np.pi) + - 0.25 * (n - 1) ** 2 * tt.log(2.0) + - (n - 1) * gammaln(int((n + 1) / 2)) + ) else: - result += (0.25 * n * (n - 2) * tt.log(np.pi) - + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.) - + n * gammaln(n / 2) - (n - 1) * gammaln(n)) + result += ( + 0.25 * n * (n - 2) * tt.log(np.pi) + + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0) + + n * gammaln(n / 2) + - (n - 1) * gammaln(n) + ) else: result = -(n - 1) * gammaln(eta + 0.5 * (n - 1)) k = tt.arange(1, n) - result += (0.5 * k * tt.log(np.pi) - + gammaln(eta + 0.5 * (n - 1 - k))).sum() + result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum() return result @@ -970,24 +978,25 @@ class _LKJCholeskyCov(Continuous): R"""Underlying class for covariance matrix with LKJ distributed correlations. See docs for LKJCholeskyCov function for more details on how to use it in models. """ + def __init__(self, eta, n, sd_dist, *args, **kwargs): self.n = n self.eta = eta - if 'transform' in kwargs and kwargs['transform'] is not None: - raise ValueError('Invalid parameter: transform.') - if 'shape' in kwargs: - raise ValueError('Invalid parameter: shape.') + if "transform" in kwargs and kwargs["transform"] is not None: + raise ValueError("Invalid parameter: transform.") + if "shape" in kwargs: + raise ValueError("Invalid parameter: shape.") shape = n * (n + 1) // 2 if sd_dist.shape.ndim not in [0, 1]: - raise ValueError('Invalid shape for sd_dist.') + raise ValueError("Invalid shape for sd_dist.") transform = transforms.CholeskyCovPacked(n) - kwargs['shape'] = shape - kwargs['transform'] = transform + kwargs["shape"] = shape + kwargs["transform"] = transform super().__init__(*args, **kwargs) self.sd_dist = sd_dist @@ -1017,9 +1026,7 @@ def logp(self, x): cumsum = tt.cumsum(x ** 2) variance = tt.zeros(n) variance = tt.inc_subtensor(variance[0], x[0] ** 2) - variance = tt.inc_subtensor( - variance[1:], - cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) + variance = tt.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) sd_vals = tt.sqrt(variance) logp_sd = self.sd_dist.logp(sd_vals).sum() @@ -1043,40 +1050,40 @@ def _random(self, n, eta, size=1): P = np.eye(n) * np.ones(eta_sample_shape + (n, n)) # original implementation in R see: # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r - beta = eta - 1. + n/2. - r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1. + beta = eta - 1.0 + n / 2.0 + r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.0 P[..., 0, 1] = r12 - P[..., 1, 1] = np.sqrt(1. - r12**2) + P[..., 1, 1] = np.sqrt(1.0 - r12 ** 2) for mp1 in range(2, n): beta -= 0.5 - y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape) + y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=eta_sample_shape) z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,)) - z = z / np.sqrt(np.einsum('ij,ij->j', z, z)) + z = z / np.sqrt(np.einsum("ij,ij->j", z, z)) P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z - P[..., mp1, mp1] = np.sqrt(1. - y) - C = np.einsum('...ji,...jk->...ik', P, P) + P[..., mp1, mp1] = np.sqrt(1.0 - y) + C = np.einsum("...ji,...jk->...ik", P, P) D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2])) if D.shape in [tuple(), (1,)]: D = self.sd_dist.random(size=P.shape[:-1]) elif D.ndim < C.ndim - 1: - D = [D] + [self.sd_dist.random(size=P.shape[:-2]) - for _ in range(n - 1)] + D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)] D = np.moveaxis(np.array(D), 0, C.ndim - 2) elif D.ndim == C.ndim - 1: if D.shape[-1] == 1: - D = [D] + [self.sd_dist.random(size=P.shape[:-2]) - for _ in range(n - 1)] + D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)] D = np.concatenate(D, axis=-1) elif D.shape[-1] != n: - raise ValueError('The size of the samples drawn from the ' - 'supplied sd_dist.random have the wrong ' - 'size. Expected {} but got {} instead.'. - format(n, D.shape[-1])) + raise ValueError( + "The size of the samples drawn from the " + "supplied sd_dist.random have the wrong " + "size. Expected {} but got {} instead.".format(n, D.shape[-1]) + ) else: - raise ValueError('Supplied sd_dist.random generates samples with ' - 'too many dimensions. It must yield samples ' - 'with 0 or 1 dimensions. Got {} instead'. - format(D.ndim - C.ndim - 2)) + raise ValueError( + "Supplied sd_dist.random generates samples with " + "too many dimensions. It must yield samples " + "with 0 or 1 dimensions. Got {} instead".format(D.ndim - C.ndim - 2) + ) C *= D[..., :, np.newaxis] * D[..., np.newaxis, :] tril_idx = np.tril_indices(n, k=0) return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]] @@ -1105,7 +1112,7 @@ def random(self, point=None, size=None): # We can only handle cov matrices with a constant n per random call n = np.unique(n) if len(n) > 1: - raise RuntimeError('Varying n is not supported for LKJCholeskyCov') + raise RuntimeError("Varying n is not supported for LKJCholeskyCov") n = int(n[0]) dist_shape = ((n * (n + 1)) // 2,) # We make sure that eta and the drawn n get their shapes broadcasted @@ -1122,10 +1129,10 @@ def random(self, point=None, size=None): size = None elif size == broadcast_shape: size = None - elif size[-len(sample_shape):] == sample_shape: - size = size[:len(size) - len(sample_shape)] - elif size[-len(broadcast_shape):] == broadcast_shape: - size = size[:len(size) - len(broadcast_shape)] + elif size[-len(sample_shape) :] == sample_shape: + size = size[: len(size) - len(sample_shape)] + elif size[-len(broadcast_shape) :] == broadcast_shape: + size = size[: len(size) - len(broadcast_shape)] # We will always provide _random with an integer size and then reshape # the output to get the correct size if size is not None: @@ -1140,9 +1147,7 @@ def random(self, point=None, size=None): return samples -def LKJCholeskyCov( - name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs -): +def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs): R"""Wrapper function for covariance matrix with LKJ distributed correlations. This defines a distribution over Cholesky decomposed covariance @@ -1337,12 +1342,14 @@ class LKJCorr(Continuous): 100(9), pp.1989-2001. """ - def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs): + def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwargs): if (p is not None) and (n is not None) and (eta is None): - warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta ' - 'dimension parameter p -> n. Please update your code. ' - 'Automatically re-assigning parameters for backwards compatibility.', - DeprecationWarning) + warnings.warn( + "Parameters to LKJCorr have changed: shape parameter n -> eta " + "dimension parameter p -> n. Please update your code. " + "Automatically re-assigning parameters for backwards compatibility.", + DeprecationWarning, + ) self.n = p self.eta = n eta = self.eta @@ -1351,20 +1358,24 @@ def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwar self.n = n self.eta = eta else: - raise ValueError('Invalid parameter: please use eta as the shape parameter and ' - 'n as the dimension parameter.') + raise ValueError( + "Invalid parameter: please use eta as the shape parameter and " + "n as the dimension parameter." + ) shape = n * (n - 1) // 2 self.mean = floatX(np.zeros(shape)) - if transform == 'interval': + if transform == "interval": transform = transforms.interval(-1, 1) super().__init__(shape=shape, transform=transform, *args, **kwargs) - warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta ' - 'dimension parameter p -> n. Please double check your initialization.', - DeprecationWarning) - self.tri_index = np.zeros([n, n], dtype='int32') + warnings.warn( + "Parameters in LKJCorr have been rename: shape parameter n -> eta " + "dimension parameter p -> n. Please double check your initialization.", + DeprecationWarning, + ) + self.tri_index = np.zeros([n, n], dtype="int32") self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape) self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape) @@ -1372,19 +1383,19 @@ def _random(self, n, eta, size=None): size = size if isinstance(size, tuple) else (size,) # original implementation in R see: # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r - beta = eta - 1. + n/2. - r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=size) - 1. + beta = eta - 1.0 + n / 2.0 + r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=size) - 1.0 P = np.eye(n)[:, :, np.newaxis] * np.ones(size) P[0, 1] = r12 - P[1, 1] = np.sqrt(1. - r12**2) + P[1, 1] = np.sqrt(1.0 - r12 ** 2) for mp1 in range(2, n): beta -= 0.5 - y = stats.beta.rvs(a=mp1 / 2., b=beta, size=size) - z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + size) - z = z / np.sqrt(np.einsum('ij,ij->j', z, z)) + y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=size) + z = stats.norm.rvs(loc=0, scale=1, size=(mp1,) + size) + z = z / np.sqrt(np.einsum("ij,ij->j", z, z)) P[0:mp1, mp1] = np.sqrt(y) * z - P[mp1, mp1] = np.sqrt(1. - y) - C = np.einsum('ji...,jk...->...ik', P, P) + P[mp1, mp1] = np.sqrt(1.0 - y) + C = np.einsum("ji...,jk...->...ik", P, P) triu_idx = np.triu_indices(n, k=1) return C[..., triu_idx[0], triu_idx[1]] @@ -1406,9 +1417,8 @@ def random(self, point=None, size=None): array """ n, eta = draw_values([self.n, self.eta], point=point, size=size) - size= 1 if size is None else size - samples = generate_samples(self._random, n, eta, - broadcast_shape=(size,)) + size = 1 if size is None else size + samples = generate_samples(self._random, n, eta, broadcast_shape=(size,)) return samples def logp(self, x): @@ -1432,12 +1442,14 @@ def logp(self, x): X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) - result += (eta - 1.) * tt.log(det(X)) - return bound(result, - tt.all(X <= 1), tt.all(X >= -1), - matrix_pos_def(X), - eta > 0, - broadcast_conditions=False + result += (eta - 1.0) * tt.log(det(X)) + return bound( + result, + tt.all(X <= 1), + tt.all(X >= -1), + matrix_pos_def(X), + eta > 0, + broadcast_conditions=False, ) @@ -1530,12 +1542,22 @@ class MatrixNormal(Continuous): observed=data, shape=(m, n)) """ - def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None, - colcov=None, colchol=None, coltau=None, shape=None, *args, - **kwargs): + def __init__( + self, + mu=0, + rowcov=None, + rowchol=None, + rowtau=None, + colcov=None, + colchol=None, + coltau=None, + shape=None, + *args, + **kwargs, + ): self._setup_matrices(colcov, colchol, coltau, rowcov, rowchol, rowtau) if shape is None: - raise TypeError('shape is a required argument') + raise TypeError("shape is a required argument") assert len(shape) == 2, "shape must have length 2: mxn" self.shape = shape super().__init__(shape=shape, *args, **kwargs) @@ -1545,64 +1567,68 @@ def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None, self.solve_upper = tt.slinalg.solve_upper_triangular def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): - cholesky = Cholesky(lower=True, on_error='raise') + cholesky = Cholesky(lower=True, on_error="raise") # Among-row matrices if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of rowtau, rowcov, ' - 'or rowchol.') + raise ValueError( + "Incompatible parameterization. " + "Specify exactly one of rowtau, rowcov, " + "or rowchol." + ) if rowcov is not None: self.m = rowcov.shape[0] - self._rowcov_type = 'cov' + self._rowcov_type = "cov" rowcov = tt.as_tensor_variable(rowcov) if rowcov.ndim != 2: - raise ValueError('rowcov must be two dimensional.') + raise ValueError("rowcov must be two dimensional.") self.rowchol_cov = cholesky(rowcov) self.rowcov = rowcov elif rowtau is not None: - raise ValueError('rowtau not supported at this time') + raise ValueError("rowtau not supported at this time") self.m = rowtau.shape[0] - self._rowcov_type = 'tau' + self._rowcov_type = "tau" rowtau = tt.as_tensor_variable(rowtau) if rowtau.ndim != 2: - raise ValueError('rowtau must be two dimensional.') + raise ValueError("rowtau must be two dimensional.") self.rowchol_tau = cholesky(rowtau) self.rowtau = rowtau else: self.m = rowchol.shape[0] - self._rowcov_type = 'chol' + self._rowcov_type = "chol" if rowchol.ndim != 2: - raise ValueError('rowchol must be two dimensional.') + raise ValueError("rowchol must be two dimensional.") self.rowchol_cov = tt.as_tensor_variable(rowchol) # Among-column matrices if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of coltau, colcov, ' - 'or colchol.') + raise ValueError( + "Incompatible parameterization. " + "Specify exactly one of coltau, colcov, " + "or colchol." + ) if colcov is not None: self.n = colcov.shape[0] - self._colcov_type = 'cov' + self._colcov_type = "cov" colcov = tt.as_tensor_variable(colcov) if colcov.ndim != 2: - raise ValueError('colcov must be two dimensional.') + raise ValueError("colcov must be two dimensional.") self.colchol_cov = cholesky(colcov) self.colcov = colcov elif coltau is not None: - raise ValueError('coltau not supported at this time') + raise ValueError("coltau not supported at this time") self.n = coltau.shape[0] - self._colcov_type = 'tau' + self._colcov_type = "tau" coltau = tt.as_tensor_variable(coltau) if coltau.ndim != 2: - raise ValueError('coltau must be two dimensional.') + raise ValueError("coltau must be two dimensional.") self.colchol_tau = cholesky(coltau) self.coltau = coltau else: self.n = colchol.shape[0] - self._colcov_type = 'chol' + self._colcov_type = "chol" if colchol.ndim != 2: - raise ValueError('colchol must be two dimensional.') + raise ValueError("colchol must be two dimensional.") self.colchol_cov = tt.as_tensor_variable(colchol) def random(self, point=None, size=None): @@ -1623,9 +1649,8 @@ def random(self, point=None, size=None): array """ mu, colchol, rowchol = draw_values( - [self.mu, self.colchol_cov, self.rowchol_cov], - point=point, - size=size) + [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size + ) if size is None: size = () if size in (None, ()): @@ -1640,14 +1665,15 @@ def random(self, point=None, size=None): samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))) else: for j in range(np.prod(size)): - standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1])) - samples.append(mu[j] + - np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T))) + standard_normal = np.random.standard_normal( + (self.shape[0], colchol[j].shape[-1]) + ) + samples.append( + mu[j] + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)) + ) samples = np.array(samples).reshape(size + tuple(self.shape)) return samples - - def _trquaddist(self, value): """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and the logdet of colcov and rowcov.""" @@ -1686,8 +1712,8 @@ def logp(self, value): trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value) m = self.m n = self.n - norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi)) - return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet + norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi)) + return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet class KroneckerNormal(Continuous): @@ -1779,24 +1805,23 @@ class KroneckerNormal(Continuous): .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models" """ - def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, - *args, **kwargs): + def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs): self._setup(covs, chols, evds, sigma) super().__init__(*args, **kwargs) self.mu = tt.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu def _setup(self, covs, chols, evds, sigma): - self.cholesky = Cholesky(lower=True, on_error='raise') + self.cholesky = Cholesky(lower=True, on_error="raise") if len([i for i in [covs, chols, evds] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of covs, chols, ' - 'or evds.') + raise ValueError( + "Incompatible parameterization. " "Specify exactly one of covs, chols, " "or evds." + ) self._isEVD = False self.sigma = sigma self.is_noisy = self.sigma is not None and self.sigma != 0 if covs is not None: - self._cov_type = 'cov' + self._cov_type = "cov" self.covs = covs if self.is_noisy: # Noise requires eigendecomposition @@ -1806,11 +1831,10 @@ def _setup(self, covs, chols, evds, sigma): # Otherwise use cholesky as usual self.chols = list(map(self.cholesky, self.covs)) self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) - self.sizes = tt.as_tensor_variable( - [chol.shape[0] for chol in self.chols]) + self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols]) self.N = tt.prod(self.sizes) elif chols is not None: - self._cov_type = 'chol' + self._cov_type = "chol" if self.is_noisy: # A strange case... # Noise requires eigendecomposition covs = [tt.dot(chol, chol.T) for chol in chols] @@ -1819,11 +1843,10 @@ def _setup(self, covs, chols, evds, sigma): else: self.chols = chols self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) - self.sizes = tt.as_tensor_variable( - [chol.shape[0] for chol in self.chols]) + self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols]) self.N = tt.prod(self.sizes) else: - self._cov_type = 'evd' + self._cov_type = "evd" self._setup_evd(evds) def _setup_evd(self, eigh_iterable): @@ -1835,18 +1858,18 @@ def _setup_evd(self, eigh_iterable): self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep)) self.eigs = kron_diag(*self.eigs_sep) # Combine separate eigs if self.is_noisy: - self.eigs += self.sigma**2 + self.eigs += self.sigma ** 2 self.N = self.eigs.shape[0] def _setup_random(self): - if not hasattr(self, 'mv_params'): - self.mv_params = {'mu': self.mu} - if self._cov_type == 'cov': + if not hasattr(self, "mv_params"): + self.mv_params = {"mu": self.mu} + if self._cov_type == "cov": cov = kronecker(*self.covs) if self.is_noisy: - cov = cov + self.sigma**2 * tt.identity_like(cov) - self.mv_params['cov'] = cov - elif self._cov_type == 'chol': + cov = cov + self.sigma ** 2 * tt.identity_like(cov) + self.mv_params["cov"] = cov + elif self._cov_type == "chol": if self.is_noisy: covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): @@ -1854,19 +1877,19 @@ def _setup_random(self): covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma**2 * tt.identity_like(cov) - self.mv_params['chol'] = self.cholesky(cov) + cov = cov + self.sigma ** 2 * tt.identity_like(cov) + self.mv_params["chol"] = self.cholesky(cov) else: - self.mv_params['chol'] = kronecker(*self.chols) - elif self._cov_type == 'evd': + self.mv_params["chol"] = kronecker(*self.chols) + elif self._cov_type == "evd": covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma**2 * tt.identity_like(cov) - self.mv_params['cov'] = cov + cov = cov + self.sigma ** 2 * tt.identity_like(cov) + self.mv_params["cov"] = cov def random(self, point=None, size=None): """ @@ -1894,7 +1917,7 @@ def random(self, point=None, size=None): def _quaddist(self, value): """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))""" if value.ndim > 2 or value.ndim == 0: - raise ValueError('Invalid dimension for value: %s' % value.ndim) + raise ValueError("Invalid dimension for value: %s" % value.ndim) if value.ndim == 1: onedim = True value = value[None, :] @@ -1904,14 +1927,14 @@ def _quaddist(self, value): delta = value - self.mu if self._isEVD: sqrt_quad = kron_dot(self.QTs, delta.T) - sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None]) + sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None]) logdet = tt.sum(tt.log(self.eigs)) else: sqrt_quad = kron_solve_lower(self.chols, delta.T) logdet = 0 for chol_size, chol_diag in zip(self.sizes, self.chol_diags): - logchol = tt.log(chol_diag) * self.N/chol_size - logdet += tt.sum(2*logchol) + logchol = tt.log(chol_diag) * self.N / chol_size + logdet += tt.sum(2 * logchol) # Square each sample quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T) if onedim: @@ -1933,4 +1956,4 @@ def logp(self, value): TensorVariable """ quad, logdet = self._quaddist(value) - return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0 + return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0 diff --git a/pymc3/distributions/shape_utils.py b/pymc3/distributions/shape_utils.py index b0ff463a05..c41507620f 100644 --- a/pymc3/distributions/shape_utils.py +++ b/pymc3/distributions/shape_utils.py @@ -64,9 +64,7 @@ def _check_shape_type(shape): raise TypeError(f"Value {s} is not a valid integer") out.append(o) except Exception: - raise TypeError( - f"Supplied value {shape} does not represent a valid shape" - ) + raise TypeError(f"Supplied value {shape} does not represent a valid shape") return tuple(out) @@ -172,10 +170,7 @@ def broadcast_dist_samples_shape(shapes, size=None): shapes = [_check_shape_type(s) for s in shapes] _size = to_tuple(size) # samples shapes without the size prepend - sp_shapes = [ - s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s - for s in shapes - ] + sp_shapes = [s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s for s in shapes] try: broadcast_shape = shapes_broadcasting(*sp_shapes, raise_exception=True) except ValueError: @@ -277,8 +272,7 @@ def get_broadcastable_dist_samples( out_shape = broadcast_dist_samples_shape(p_shapes, size=size) # samples shapes without the size prepend sp_shapes = [ - s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s - for s in p_shapes + s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s for s in p_shapes ] broadcast_shape = shapes_broadcasting(*sp_shapes, raise_exception=True) broadcastable_samples = [] diff --git a/pymc3/distributions/simulator.py b/pymc3/distributions/simulator.py index 026fc2f9bb..218611a577 100644 --- a/pymc3/distributions/simulator.py +++ b/pymc3/distributions/simulator.py @@ -130,7 +130,6 @@ def _str_repr(self, name=None, dist=None, formatting="plain"): return f"{name} ~ Simulator({function}({params}), {distance}, {sum_stat})" - def identity(x): """Identity function, used as a summary statistics.""" return x diff --git a/pymc3/distributions/special.py b/pymc3/distributions/special.py index 2521e190d8..1401596486 100644 --- a/pymc3/distributions/special.py +++ b/pymc3/distributions/special.py @@ -17,10 +17,10 @@ from theano.scalar.basic_scipy import GammaLn, Psi from theano import scalar -__all__ = ['gammaln', 'multigammaln', 'psi', 'log_i0'] +__all__ = ["gammaln", "multigammaln", "psi", "log_i0"] -scalar_gammaln = GammaLn(scalar.upgrade_to_float, name='scalar_gammaln') -gammaln = tt.Elemwise(scalar_gammaln, name='gammaln') +scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln") +gammaln = tt.Elemwise(scalar_gammaln, name="gammaln") def multigammaln(a, p): @@ -33,21 +33,33 @@ def multigammaln(a, p): degrees of freedom. p > 0 """ i = tt.arange(1, p + 1) - return (p * (p - 1) * tt.log(np.pi) / 4. - + tt.sum(gammaln(a + (1. - i) / 2.), axis=0)) + return p * (p - 1) * tt.log(np.pi) / 4.0 + tt.sum(gammaln(a + (1.0 - i) / 2.0), axis=0) def log_i0(x): """ Calculates the logarithm of the 0 order modified Bessel function of the first kind"" """ - return tt.switch(tt.lt(x, 5), tt.log1p(x**2. / 4. + x**4. / 64. + x**6. / 2304. - + x**8. / 147456. + x**10. / 14745600. - + x**12. / 2123366400.), - x - 0.5 * tt.log(2. * np.pi * x) + tt.log1p(1. / (8. * x) - + 9. / (128. * x**2.) + 225. / (3072. * x**3.) - + 11025. / (98304. * x**4.))) + return tt.switch( + tt.lt(x, 5), + tt.log1p( + x ** 2.0 / 4.0 + + x ** 4.0 / 64.0 + + x ** 6.0 / 2304.0 + + x ** 8.0 / 147456.0 + + x ** 10.0 / 14745600.0 + + x ** 12.0 / 2123366400.0 + ), + x + - 0.5 * tt.log(2.0 * np.pi * x) + + tt.log1p( + 1.0 / (8.0 * x) + + 9.0 / (128.0 * x ** 2.0) + + 225.0 / (3072.0 * x ** 3.0) + + 11025.0 / (98304.0 * x ** 4.0) + ), + ) -scalar_psi = Psi(scalar.upgrade_to_float, name='scalar_psi') -psi = tt.Elemwise(scalar_psi, name='psi') +scalar_psi = Psi(scalar.upgrade_to_float, name="scalar_psi") +psi = tt.Elemwise(scalar_psi, name="psi") diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 3956550d82..ab637cd547 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -112,15 +112,7 @@ class AR(distribution.Continuous): """ def __init__( - self, - rho, - sigma=None, - tau=None, - constant=False, - init=Flat.dist(), - sd=None, - *args, - **kwargs + self, rho, sigma=None, tau=None, constant=False, init=Flat.dist(), sd=None, *args, **kwargs ): super().__init__(*args, **kwargs) if sd is not None: @@ -170,10 +162,7 @@ def logp(self, value): """ if self.constant: x = tt.add( - *[ - self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] - for i in range(self.p) - ] + *[self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)] ) eps = value[self.p :] - self.rho[0] - x else: @@ -181,10 +170,7 @@ def logp(self, value): x = self.rho * value[:-1] else: x = tt.add( - *[ - self.rho[i] * value[self.p - (i + 1) : -(i + 1)] - for i in range(self.p) - ] + *[self.rho[i] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)] ) eps = value[self.p :] - x @@ -219,15 +205,11 @@ class GaussianRandomWalk(distribution.Continuous): distribution for initial value (Defaults to Flat()) """ - def __init__( - self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *args, **kwargs - ): + def __init__(self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *args, **kwargs): kwargs.setdefault("shape", 1) super().__init__(*args, **kwargs) if sum(self.shape) == 0: - raise TypeError( - "GaussianRandomWalk must be supplied a non-zero shape argument!" - ) + raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!") if sd is not None: sigma = sd warnings.warn("sd is deprecated, use sigma instead", DeprecationWarning) @@ -284,9 +266,7 @@ def random(self, point=None, size=None): ------- array """ - sigma, mu = distribution.draw_values( - [self.sigma, self.mu], point=point, size=size - ) + sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size) return distribution.generate_samples( self._random, sigma=sigma, @@ -309,7 +289,7 @@ def _random(self, sigma, mu, size, sample_shape): rv = stats.norm(mu, sigma) data = rv.rvs(size).cumsum(axis=axis) data = np.array(data) - if len(data.shape)>1: + if len(data.shape) > 1: for i in range(data.shape[0]): data[i] = data[i] - data[i][0] else: @@ -454,15 +434,7 @@ class MvGaussianRandomWalk(distribution.Continuous): """ def __init__( - self, - mu=0.0, - cov=None, - tau=None, - chol=None, - lower=True, - init=Flat.dist(), - *args, - **kwargs + self, mu=0.0, cov=None, tau=None, chol=None, lower=True, init=Flat.dist(), *args, **kwargs ): super().__init__(*args, **kwargs) diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index d9381a48a8..81a2c79ad6 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -235,8 +235,8 @@ def backward(self, x): def forward(self, x): """Inverse operation of softplus. - y = Log(Exp(x) - 1) - = Log(1 - Exp(-x)) + x + y = Log(Exp(x) - 1) + = Log(1 - Exp(-x)) + x """ return tt.log(1.0 - tt.exp(-x)) + x @@ -500,8 +500,7 @@ def t_stick_breaking(eps: float) -> StickBreaking: class Circular(ElemwiseTransform): - """Transforms a linear space into a circular one. - """ + """Transforms a linear space into a circular one.""" name = "circular" diff --git a/pymc3/plots/__init__.py b/pymc3/plots/__init__.py index 56a3442678..219b9e75cf 100644 --- a/pymc3/plots/__init__.py +++ b/pymc3/plots/__init__.py @@ -24,19 +24,23 @@ import arviz as az + def map_args(func): - swaps = [ - ('varnames', 'var_names') - ] + swaps = [("varnames", "var_names")] + @functools.wraps(func) def wrapped(*args, **kwargs): for (old, new) in swaps: if old in kwargs and new not in kwargs: - warnings.warn(f'Keyword argument `{old}` renamed to `{new}`, and will be removed in pymc3 3.8') + warnings.warn( + f"Keyword argument `{old}` renamed to `{new}`, and will be removed in pymc3 3.8" + ) kwargs[new] = kwargs.pop(old) return func(*args, **kwargs) + return wrapped + # pymc3 custom plots: override these names for custom behavior autocorrplot = map_args(az.plot_autocorr) forestplot = map_args(az.plot_forest) @@ -51,42 +55,52 @@ def wrapped(*args, **kwargs): @functools.wraps(az.plot_trace) def traceplot(*args, **kwargs): try: - kwargs.setdefault('compact', True) + kwargs.setdefault("compact", True) return az.plot_trace(*args, **kwargs) except TypeError: - kwargs.pop('compact') + kwargs.pop("compact") return az.plot_trace(*args, **kwargs) + # addition arg mapping for compare plot @functools.wraps(az.plot_compare) def compareplot(*args, **kwargs): - if 'comp_df' in kwargs: - comp_df = kwargs['comp_df'].copy() + if "comp_df" in kwargs: + comp_df = kwargs["comp_df"].copy() else: args = list(args) comp_df = args[0].copy() - if 'WAIC' in comp_df.columns: - comp_df = comp_df.rename(index=str, - columns={'WAIC': 'waic', - 'pWAIC': 'p_waic', - 'dWAIC': 'd_waic', - 'SE': 'se', - 'dSE': 'dse', - 'var_warn': 'warning'}) - elif 'LOO' in comp_df.columns: - comp_df = comp_df.rename(index=str, - columns={'LOO': 'loo', - 'pLOO': 'p_loo', - 'dLOO': 'd_loo', - 'SE': 'se', - 'dSE': 'dse', - 'shape_warn': 'warning'}) - if 'comp_df' in kwargs: - kwargs['comp_df'] = comp_df + if "WAIC" in comp_df.columns: + comp_df = comp_df.rename( + index=str, + columns={ + "WAIC": "waic", + "pWAIC": "p_waic", + "dWAIC": "d_waic", + "SE": "se", + "dSE": "dse", + "var_warn": "warning", + }, + ) + elif "LOO" in comp_df.columns: + comp_df = comp_df.rename( + index=str, + columns={ + "LOO": "loo", + "pLOO": "p_loo", + "dLOO": "d_loo", + "SE": "se", + "dSE": "dse", + "shape_warn": "warning", + }, + ) + if "comp_df" in kwargs: + kwargs["comp_df"] = comp_df else: args[0] = comp_df return az.plot_compare(*args, **kwargs) + from .posteriorplot import plot_posterior_predictive_glm @@ -95,14 +109,14 @@ def compareplot(*args, **kwargs): setattr(sys.modules[__name__], plot, map_args(getattr(az.plots, plot))) __all__ = tuple(az.plots.__all__) + ( - 'autocorrplot', - 'compareplot', - 'forestplot', - 'kdeplot', - 'plot_posterior', - 'traceplot', - 'energyplot', - 'densityplot', - 'pairplot', - 'plot_posterior_predictive_glm', + "autocorrplot", + "compareplot", + "forestplot", + "kdeplot", + "plot_posterior", + "traceplot", + "energyplot", + "densityplot", + "pairplot", + "plot_posterior_predictive_glm", ) diff --git a/pymc3/stats/__init__.py b/pymc3/stats/__init__.py index 9b059b4dcb..d4670d67ce 100644 --- a/pymc3/stats/__init__.py +++ b/pymc3/stats/__init__.py @@ -23,6 +23,7 @@ import arviz as az + def map_args(func): swaps = [("varnames", "var_names")] diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index a53ab44ba9..9dbe93a94a 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -27,35 +27,16 @@ adam, adamax, norm_constraint, - total_norm_constraint + total_norm_constraint, ) from . import inference -from .inference import ( - ADVI, - FullRankADVI, - SVGD, - ASVGD, - NFVI, - Inference, - KLqp, - ImplicitGradient, - fit -) +from .inference import ADVI, FullRankADVI, SVGD, ASVGD, NFVI, Inference, KLqp, ImplicitGradient, fit from . import approximations -from .approximations import ( - MeanField, - FullRank, - Empirical, - NormalizingFlow, - sample_approx -) +from .approximations import MeanField, FullRank, Empirical, NormalizingFlow, sample_approx from . import opvi -from .opvi import ( - Group, - Approximation -) +from .opvi import Group, Approximation # special from .stein import Stein From 154ea6384908fec6ff642923f7e26fba3c5ce1d2 Mon Sep 17 00:00:00 2001 From: Ali Akbar Septiandri Date: Sun, 27 Sep 2020 12:17:42 +0100 Subject: [PATCH 2/7] Run black on step_methods/ --- pymc3/step_methods/arraystep.py | 47 ++-- pymc3/step_methods/compound.py | 13 +- pymc3/step_methods/elliptical_slice.py | 7 +- pymc3/step_methods/gibbs.py | 27 ++- pymc3/step_methods/hmc/base_hmc.py | 24 +- pymc3/step_methods/hmc/hmc.py | 71 +++--- pymc3/step_methods/hmc/integration.py | 13 +- pymc3/step_methods/hmc/nuts.py | 38 +-- pymc3/step_methods/hmc/quadpotential.py | 113 +++++---- pymc3/step_methods/metropolis.py | 309 ++++++++++++++---------- pymc3/step_methods/sgmcmc.py | 63 +++-- pymc3/step_methods/slicer.py | 27 ++- pymc3/step_methods/step_sizes.py | 24 +- 13 files changed, 410 insertions(+), 366 deletions(-) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 863c7a8075..d3fc891270 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -21,8 +21,7 @@ from numpy.random import uniform from enum import IntEnum, unique -__all__ = [ - 'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence'] +__all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"] @unique @@ -34,6 +33,7 @@ class Competence(IntEnum): 2: PREFERRED 3: IDEAL """ + INCOMPATIBLE = 0 COMPATIBLE = 1 PREFERRED = 2 @@ -45,21 +45,21 @@ class BlockedStep: generates_stats = False def __new__(cls, *args, **kwargs): - blocked = kwargs.get('blocked') + blocked = kwargs.get("blocked") if blocked is None: # Try to look up default value from class - blocked = getattr(cls, 'default_blocked', True) - kwargs['blocked'] = blocked + blocked = getattr(cls, "default_blocked", True) + kwargs["blocked"] = blocked - model = modelcontext(kwargs.get('model')) - kwargs.update({'model':model}) + model = modelcontext(kwargs.get("model")) + kwargs.update({"model": model}) # vars can either be first arg or a kwarg - if 'vars' not in kwargs and len(args) >= 1: + if "vars" not in kwargs and len(args) >= 1: vars = args[0] args = args[1:] - elif 'vars' in kwargs: - vars = kwargs.pop('vars') + elif "vars" in kwargs: + vars = kwargs.pop("vars") else: # Assume all model variables vars = model.vars @@ -67,7 +67,7 @@ def __new__(cls, *args, **kwargs): vars = inputvars(vars) if len(vars) == 0: - raise ValueError('No free random variables to sample.') + raise ValueError("No free random variables to sample.") if not blocked and len(vars) > 1: # In this case we create a separate sampler for each var @@ -79,14 +79,14 @@ def __new__(cls, *args, **kwargs): # call __init__ step.__init__([var], *args, **kwargs) # Hack for creating the class correctly when unpickling. - step.__newargs = ([var], ) + args, kwargs + step.__newargs = ([var],) + args, kwargs steps.append(step) return CompoundStep(steps) else: step = super().__new__(cls) # Hack for creating the class correctly when unpickling. - step.__newargs = (vars, ) + args, kwargs + step.__newargs = (vars,) + args, kwargs return step # Hack for creating the class correctly when unpickling. @@ -119,7 +119,7 @@ def vars_shape_dtype(self): return shape_dtypes def stop_tuning(self): - if hasattr(self, 'tune'): + if hasattr(self, "tune"): self.tune = False @@ -228,22 +228,24 @@ def link_population(self, population, chain_index): self.other_chains = [c for c in range(len(population)) if c != chain_index] if not len(self.other_chains) > 1: raise ValueError( - 'Population is just {} + {}. ' \ - 'This is too small and the error should have been raised earlier.'.format(self.this_chain, self.other_chains) + "Population is just {} + {}. " + "This is too small and the error should have been raised earlier.".format( + self.this_chain, self.other_chains + ) ) return class GradientSharedStep(BlockedStep): - def __init__(self, vars, model=None, blocked=True, - dtype=None, logp_dlogp_func=None, **theano_kwargs): + def __init__( + self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **theano_kwargs + ): model = modelcontext(model) self.vars = vars self.blocked = blocked if logp_dlogp_func is None: - func = model.logp_dlogp_function( - vars, dtype=dtype, **theano_kwargs) + func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) else: func = logp_dlogp_func @@ -255,9 +257,8 @@ def __init__(self, vars, model=None, blocked=True, except ValueError: if logp_dlogp_func is not None: raise - theano_kwargs.update(mode='FAST_COMPILE') - func = model.logp_dlogp_function( - vars, dtype=dtype, **theano_kwargs) + theano_kwargs.update(mode="FAST_COMPILE") + func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) self._logp_dlogp_func = func diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py index f3d7337ac2..15809ccda4 100644 --- a/pymc3/step_methods/compound.py +++ b/pymc3/step_methods/compound.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -''' +""" Created on Mar 7, 2011 @author: johnsalvatier -''' +""" from collections import namedtuple import numpy as np @@ -27,8 +27,7 @@ class CompoundStep: def __init__(self, methods): self.methods = list(methods) - self.generates_stats = any( - method.generates_stats for method in self.methods) + self.generates_stats = any(method.generates_stats for method in self.methods) self.stats_dtypes = [] for method in self.methods: if method.generates_stats: @@ -47,7 +46,7 @@ def step(self, point): # one. Pop all others (if dict), or set to np.nan (if namedtuple). for state in states[:-1]: if isinstance(state, dict): - state.pop('model_logp', None) + state.pop("model_logp", None) elif isinstance(state, namedtuple): state = state._replace(logp=np.nan) return point, states @@ -59,7 +58,7 @@ def step(self, point): def warnings(self): warns = [] for method in self.methods: - if hasattr(method, 'warnings'): + if hasattr(method, "warnings"): warns.extend(method.warnings()) return warns @@ -69,7 +68,7 @@ def stop_tuning(self): def reset_tuning(self): for method in self.methods: - if hasattr(method, 'reset_tuning'): + if hasattr(method, "reset_tuning"): method.reset_tuning() @property diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py index 31904e8105..65944cdf87 100644 --- a/pymc3/step_methods/elliptical_slice.py +++ b/pymc3/step_methods/elliptical_slice.py @@ -21,7 +21,7 @@ from ..theanof import inputvars from ..distributions import draw_values -__all__ = ['EllipticalSlice'] +__all__ = ["EllipticalSlice"] def get_chol(cov, chol): @@ -41,7 +41,7 @@ def get_chol(cov, chol): """ if len([i for i in [cov, chol] if i is not None]) != 1: - raise ValueError('Must pass exactly one of cov or chol') + raise ValueError("Must pass exactly one of cov or chol") if cov is not None: chol = tt.slinalg.cholesky(cov) @@ -83,8 +83,7 @@ class EllipticalSlice(ArrayStep): default_blocked = True - def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, - **kwargs): + def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs): self.model = modelcontext(model) chol = get_chol(prior_cov, prior_chol) self.prior_chol = tt.as_tensor_variable(chol) diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py index 9a72172907..4f1dc09f50 100644 --- a/pymc3/step_methods/gibbs.py +++ b/pymc3/step_methods/gibbs.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -''' +""" Created on May 12, 2012 @author: john -''' +""" from .arraystep import ArrayStep, Competence from ..distributions.discrete import Categorical from numpy import array, max, exp, cumsum, nested_iters, empty, searchsorted, ones, arange @@ -26,7 +26,8 @@ from theano.gof.graph import inputs from theano.tensor import add from ..model import modelcontext -__all__ = ['ElemwiseCategorical'] + +__all__ = ["ElemwiseCategorical"] class ElemwiseCategorical(ArrayStep): @@ -35,13 +36,17 @@ class ElemwiseCategorical(ArrayStep): the variable can't be indexed into or transposed or anything otherwise that will mess things up """ + # TODO: It would be great to come up with a way to make # ElemwiseCategorical more general (handling more complex elementwise # variables) def __init__(self, vars, values=None, model=None): - warn('ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.', - DeprecationWarning, stacklevel = 2) + warn( + "ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.", + DeprecationWarning, + stacklevel=2, + ) model = modelcontext(model) self.var = vars[0] self.sh = ones(self.var.dshape, self.var.dtype) @@ -64,8 +69,7 @@ def competence(var, has_grad): def elemwise_logp(model, var): - terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([ - v.logpt])] + terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([v.logpt])] return model.fn(add(*terms)) @@ -73,9 +77,12 @@ def categorical(prob, shape): out = empty([1] + list(shape)) n = len(shape) - it0, it1 = nested_iters([prob, out], [list(range(1, n + 1)), [0]], - op_flags=[['readonly'], ['readwrite']], - flags=['reduce_ok']) + it0, it1 = nested_iters( + [prob, out], + [list(range(1, n + 1)), [0]], + op_flags=[["readonly"], ["readwrite"]], + flags=["reduce_ok"], + ) for _ in it0: p, o = it1.itviews diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 8551e45962..734f51e055 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -29,15 +29,9 @@ logger = logging.getLogger("pymc3") -HMCStepData = namedtuple( - "HMCStepData", - "end, accept_stat, divergence_info, stats" -) +HMCStepData = namedtuple("HMCStepData", "end, accept_stat, divergence_info, stats") -DivergenceInfo = namedtuple( - "DivergenceInfo", - "message, exec_info, state, state_div" -) +DivergenceInfo = namedtuple("DivergenceInfo", "message, exec_info, state, state_div") class BaseHMC(arraystep.GradientSharedStep): @@ -121,9 +115,7 @@ def __init__( else: self.potential = quad_potential(scaling, is_cov) - self.integrator = integration.CpuLeapfrogIntegrator( - self.potential, self._logp_dlogp_func - ) + self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func) self._step_rand = step_rand self._warnings = [] @@ -154,8 +146,7 @@ def astep(self, q0): self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap) message_energy = ( "Bad initial energy, check any log probabilities that " - "are inf or -inf, nan or very small:\n{}" - .format(error_logp.to_string()) + "are inf or -inf, nan or very small:\n{}".format(error_logp.to_string()) ) warning = SamplerWarning( WarningType.BAD_ENERGY, @@ -194,9 +185,7 @@ def astep(self, q0): if self._num_divs_sample < 100 and info.state is not None: point = self._logp_dlogp_func.array_to_dict(info.state.q) if self._num_divs_sample < 100 and info.state_div is not None: - point_dest = self._logp_dlogp_func.array_to_dict( - info.state_div.q - ) + point_dest = self._logp_dlogp_func.array_to_dict(info.state_div.q) if self._num_divs_sample < 100: info_store = info warning = SamplerWarning( @@ -246,8 +235,7 @@ def warnings(self): n_divs = self._num_divs_sample if n_divs and self._samples_after_tune == n_divs: message = ( - "The chain contains only diverging samples. The model " - "is probably misspecified." + "The chain contains only diverging samples. The model " "is probably misspecified." ) elif n_divs == 1: message = ( diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py index 6b68662fc2..621e7454ce 100644 --- a/pymc3/step_methods/hmc/hmc.py +++ b/pymc3/step_methods/hmc/hmc.py @@ -20,10 +20,10 @@ from pymc3.step_methods.hmc.base_hmc import BaseHMC, HMCStepData, DivergenceInfo -__all__ = ['HamiltonianMC'] +__all__ = ["HamiltonianMC"] -def unif(step_size, elow=.85, ehigh=1.15): +def unif(step_size, elow=0.85, ehigh=1.15): return np.random.uniform(elow, ehigh) * step_size @@ -33,27 +33,29 @@ class HamiltonianMC(BaseHMC): See NUTS sampler for automatically tuned stopping time and step size scaling. """ - name = 'hmc' + name = "hmc" default_blocked = True generates_stats = True - stats_dtypes = [{ - 'step_size': np.float64, - 'n_steps': np.int64, - 'tune': np.bool, - 'step_size_bar': np.float64, - 'accept': np.float64, - 'diverging': np.bool, - 'energy_error': np.float64, - 'energy': np.float64, - 'path_length': np.float64, - 'accepted': np.bool, - 'model_logp': np.float64, - 'process_time_diff': np.float64, - 'perf_counter_diff': np.float64, - 'perf_counter_start': np.float64, - }] - - def __init__(self, vars=None, path_length=2., max_steps=1024, **kwargs): + stats_dtypes = [ + { + "step_size": np.float64, + "n_steps": np.int64, + "tune": np.bool, + "step_size_bar": np.float64, + "accept": np.float64, + "diverging": np.bool, + "energy_error": np.float64, + "energy": np.float64, + "path_length": np.float64, + "accepted": np.bool, + "model_logp": np.float64, + "process_time_diff": np.float64, + "perf_counter_diff": np.float64, + "perf_counter_start": np.float64, + } + ] + + def __init__(self, vars=None, path_length=2.0, max_steps=1024, **kwargs): """Set up the Hamiltonian Monte Carlo sampler. Parameters @@ -104,8 +106,8 @@ def __init__(self, vars=None, path_length=2., max_steps=1024, **kwargs): The model **kwargs: passed to BaseHMC """ - kwargs.setdefault('step_rand', unif) - kwargs.setdefault('target_accept', 0.65) + kwargs.setdefault("step_rand", unif) + kwargs.setdefault("target_accept", 0.65) super().__init__(vars, **kwargs) self.path_length = path_length self.max_steps = max_steps @@ -123,18 +125,17 @@ def _hamiltonian_step(self, start, p0, step_size): last = state state = self.integrator.step(step_size, state) except IntegrationError as e: - div_info = DivergenceInfo('Integration failed.', e, last, None) + div_info = DivergenceInfo("Integration failed.", e, last, None) else: if not np.isfinite(state.energy): - div_info = DivergenceInfo( - 'Divergence encountered, bad energy.', None, last, state) + div_info = DivergenceInfo("Divergence encountered, bad energy.", None, last, state) energy_change = start.energy - state.energy if np.isnan(energy_change): energy_change = -np.inf if np.abs(energy_change) > self.Emax: div_info = DivergenceInfo( - 'Divergence encountered, large integration error.', - None, last, state) + "Divergence encountered, large integration error.", None, last, state + ) accept_stat = min(1, np.exp(energy_change)) @@ -146,13 +147,13 @@ def _hamiltonian_step(self, start, p0, step_size): accepted = True stats = { - 'path_length': self.path_length, - 'n_steps': n_steps, - 'accept': accept_stat, - 'energy_error': energy_change, - 'energy': state.energy, - 'accepted': accepted, - 'model_logp': state.model_logp, + "path_length": self.path_length, + "n_steps": n_steps, + "accept": accept_stat, + "energy_error": energy_change, + "energy": state.energy, + "accepted": accepted, + "model_logp": state.model_logp, } return HMCStepData(end, accept_stat, div_info, stats) diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py index 2e52e094be..9cf37978a4 100644 --- a/pymc3/step_methods/hmc/integration.py +++ b/pymc3/step_methods/hmc/integration.py @@ -18,7 +18,7 @@ from scipy import linalg -State = namedtuple("State", 'q, p, v, q_grad, energy, model_logp') +State = namedtuple("State", "q, p, v, q_grad, energy, model_logp") class IntegrationError(RuntimeError): @@ -32,14 +32,15 @@ def __init__(self, potential, logp_dlogp_func): self._logp_dlogp_func = logp_dlogp_func self._dtype = self._logp_dlogp_func.dtype if self._potential.dtype != self._dtype: - raise ValueError("dtypes of potential (%s) and logp function (%s)" - "don't match." - % (self._potential.dtype, self._dtype)) + raise ValueError( + "dtypes of potential (%s) and logp function (%s)" + "don't match." % (self._potential.dtype, self._dtype) + ) def compute_state(self, q, p): """Compute Hamiltonian functions using a position and momentum.""" if q.dtype != self._dtype or p.dtype != self._dtype: - raise ValueError('Invalid dtype. Must be %s' % self._dtype) + raise ValueError("Invalid dtype. Must be %s" % self._dtype) logp, dlogp = self._logp_dlogp_func(q) v = self._potential.velocity(p) kinetic = self._potential.energy(p, velocity=v) @@ -79,7 +80,7 @@ def step(self, epsilon, state): raise def _step(self, epsilon, state): - axpy = linalg.blas.get_blas_funcs('axpy', dtype=self._dtype) + axpy = linalg.blas.get_blas_funcs("axpy", dtype=self._dtype) pot = self._potential q_new = state.q.copy() diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py index 65079d582d..782a350654 100644 --- a/pymc3/step_methods/hmc/nuts.py +++ b/pymc3/step_methods/hmc/nuts.py @@ -210,7 +210,7 @@ def warnings(self): "The chain reached the maximum tree depth. Increase " "max_treedepth, increase target_accept or reparameterize." ) - warn = SamplerWarning(WarningType.TREEDEPTH, msg, 'warn') + warn = SamplerWarning(WarningType.TREEDEPTH, msg, "warn") warnings.append(warn) return warnings @@ -249,9 +249,7 @@ def __init__(self, ndim, integrator, start, step_size, Emax): self.start_energy = np.array(start.energy) self.left = self.right = start - self.proposal = Proposal( - start.q, start.q_grad, start.energy, 1.0, start.model_logp - ) + self.proposal = Proposal(start.q, start.q_grad, start.energy, 1.0, start.model_logp) self.depth = 0 self.log_size = 0 self.log_weighted_accept_sum = -np.inf @@ -313,13 +311,9 @@ def extend(self, direction): p_sum = self.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) p_sum1 = leftmost_p_sum + rightmost_begin.p - turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or ( - p_sum1.dot(rightmost_begin.v) <= 0 - ) + turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (p_sum1.dot(rightmost_begin.v) <= 0) p_sum2 = leftmost_end.p + rightmost_p_sum - turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or ( - p_sum2.dot(rightmost_end.v) <= 0 - ) + turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot(rightmost_end.v) <= 0) turning = turning | turning1 | turning2 return diverging, turning @@ -354,14 +348,10 @@ def _single_step(self, left, epsilon): log_p_accept_weighted, right.model_logp, ) - tree = Subtree( - right, right, right.p, proposal, log_size, log_p_accept_weighted, 1 - ) + tree = Subtree(right, right, right.p, proposal, log_size, log_p_accept_weighted, 1) return tree, None, False else: - error_msg = ( - "Energy change in leapfrog step is too large: %s." % energy_change - ) + error_msg = "Energy change in leapfrog step is too large: %s." % energy_change error = None tree = Subtree(None, None, None, None, -np.inf, -np.inf, 1) divergance_info = DivergenceInfo(error_msg, error, left, right) @@ -385,13 +375,9 @@ def _build_subtree(self, left, depth, epsilon): # Additional U turn check only when depth > 1 to avoid redundant work. if depth - 1 > 0: p_sum1 = tree1.p_sum + tree2.left.p - turning1 = (p_sum1.dot(tree1.left.v) <= 0) or ( - p_sum1.dot(tree2.left.v) <= 0 - ) + turning1 = (p_sum1.dot(tree1.left.v) <= 0) or (p_sum1.dot(tree2.left.v) <= 0) p_sum2 = tree1.right.p + tree2.p_sum - turning2 = (p_sum2.dot(tree1.right.v) <= 0) or ( - p_sum2.dot(tree2.right.v) <= 0 - ) + turning2 = (p_sum2.dot(tree1.right.v) <= 0) or (p_sum2.dot(tree2.right.v) <= 0) turning = turning | turning1 | turning2 log_size = np.logaddexp(tree1.log_size, tree2.log_size) @@ -410,9 +396,7 @@ def _build_subtree(self, left, depth, epsilon): n_proposals = tree1.n_proposals + tree2.n_proposals - tree = Subtree( - left, right, p_sum, proposal, log_size, log_weighted_accept_sum, n_proposals - ) + tree = Subtree(left, right, p_sum, proposal, log_size, log_weighted_accept_sum, n_proposals) return tree, diverging, turning def stats(self): @@ -421,9 +405,7 @@ def stats(self): # Remove contribution from initial state which is always a perfect # accept log_sum_weight = logdiffexp_numpy(self.log_size, 0.0) - self.mean_tree_accept = np.exp( - self.log_weighted_accept_sum - log_sum_weight - ) + self.mean_tree_accept = np.exp(self.log_weighted_accept_sum - log_sum_weight) return { "depth": self.depth, "mean_tree_accept": self.mean_tree_accept, diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index c55f12a365..fd962f48cb 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -22,9 +22,15 @@ from pymc3.theanof import floatX -__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull', - 'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', - 'QuadPotentialFullAdapt', 'isquadpotential'] +__all__ = [ + "quad_potential", + "QuadPotentialDiag", + "QuadPotentialFull", + "QuadPotentialFullInv", + "QuadPotentialDiagAdapt", + "QuadPotentialFullAdapt", + "isquadpotential", +] def quad_potential(C, is_cov): @@ -56,7 +62,7 @@ def quad_potential(C, is_cov): if is_cov: return QuadPotentialDiag(C) else: - return QuadPotentialDiag(1. / C) + return QuadPotentialDiag(1.0 / C) else: if is_cov: return QuadPotentialFull(C) @@ -70,11 +76,10 @@ def partial_check_positive_definite(C): d = C else: d = np.diag(C) - i, = np.nonzero(np.logical_or(np.isnan(d), d <= 0)) + (i,) = np.nonzero(np.logical_or(np.isnan(d), d <= 0)) if len(i): - raise PositiveDefiniteError( - "Simple check failed. Diagonal contains negatives", i) + raise PositiveDefiniteError("Simple check failed. Diagonal contains negatives", i) class PositiveDefiniteError(ValueError): @@ -84,23 +89,22 @@ def __init__(self, msg, idx): self.msg = msg def __str__(self): - return ("Scaling is not positive definite: %s. Check indexes %s." - % (self.msg, self.idx)) + return "Scaling is not positive definite: %s. Check indexes %s." % (self.msg, self.idx) class QuadPotential: def velocity(self, x, out=None): """Compute the current velocity at a position in parameter space.""" - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def energy(self, x, velocity=None): - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def random(self, x): - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def velocity_energy(self, x, v_out): - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning. @@ -152,15 +156,17 @@ def __init__( ): """Set up a diagonal mass matrix.""" if initial_diag is not None and initial_diag.ndim != 1: - raise ValueError('Initial diagonal must be one-dimensional.') + raise ValueError("Initial diagonal must be one-dimensional.") if initial_mean.ndim != 1: - raise ValueError('Initial mean must be one-dimensional.') + raise ValueError("Initial mean must be one-dimensional.") if initial_diag is not None and len(initial_diag) != n: - raise ValueError('Wrong shape for initial_diag: expected %s got %s' - % (n, len(initial_diag))) + raise ValueError( + "Wrong shape for initial_diag: expected %s got %s" % (n, len(initial_diag)) + ) if len(initial_mean) != n: - raise ValueError('Wrong shape for initial_mean: expected %s got %s' - % (n, len(initial_mean))) + raise ValueError( + "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean)) + ) if dtype is None: dtype = theano.config.floatX @@ -184,9 +190,10 @@ def reset(self): self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True) self._var_theano = theano.shared(self._var) self._stds = np.sqrt(self._initial_diag) - self._inv_stds = floatX(1.) / self._stds + self._inv_stds = floatX(1.0) / self._stds self._foreground_var = _WeightedVariance( - self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype) + self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype + ) self._background_var = _WeightedVariance(self._n, dtype=self.dtype) self._n_samples = 0 @@ -256,11 +263,12 @@ def raise_ok(self, vmap): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(self._stds == 0)[0] - errmsg = ['Mass matrix contains zeros on the diagonal. '] + errmsg = ["Mass matrix contains zeros on the diagonal. "] for ii in index: - errmsg.append('The derivative of RV `{}`.ravel()[{}]' - ' is zero.'.format(*name_slc[ii])) - raise ValueError('\n'.join(errmsg)) + errmsg.append( + "The derivative of RV `{}`.ravel()[{}]" " is zero.".format(*name_slc[ii]) + ) + raise ValueError("\n".join(errmsg)) if np.any(~np.isfinite(self._stds)): name_slc = [] @@ -270,11 +278,12 @@ def raise_ok(self, vmap): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(~np.isfinite(self._stds))[0] - errmsg = ['Mass matrix contains non-finite values on the diagonal. '] + errmsg = ["Mass matrix contains non-finite values on the diagonal. "] for ii in index: - errmsg.append('The derivative of RV `{}`.ravel()[{}]' - ' is non-finite.'.format(*name_slc[ii])) - raise ValueError('\n'.join(errmsg)) + errmsg.append( + "The derivative of RV `{}`.ravel()[{}]" " is non-finite.".format(*name_slc[ii]) + ) + raise ValueError("\n".join(errmsg)) class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt): @@ -321,25 +330,26 @@ def update(self, sample, grad, tune): class _WeightedVariance: """Online algorithm for computing mean of variance.""" - def __init__(self, nelem, initial_mean=None, initial_variance=None, - initial_weight=0, dtype='d'): + def __init__( + self, nelem, initial_mean=None, initial_variance=None, initial_weight=0, dtype="d" + ): self._dtype = dtype self.n_samples = float(initial_weight) if initial_mean is None: - self.mean = np.zeros(nelem, dtype='d') + self.mean = np.zeros(nelem, dtype="d") else: - self.mean = np.array(initial_mean, dtype='d', copy=True) + self.mean = np.array(initial_mean, dtype="d", copy=True) if initial_variance is None: - self.raw_var = np.zeros(nelem, dtype='d') + self.raw_var = np.zeros(nelem, dtype="d") else: - self.raw_var = np.array(initial_variance, dtype='d', copy=True) + self.raw_var = np.array(initial_variance, dtype="d", copy=True) self.raw_var[:] *= self.n_samples if self.raw_var.shape != (nelem,): - raise ValueError('Invalid shape for initial variance.') + raise ValueError("Invalid shape for initial variance.") if self.mean.shape != (nelem,): - raise ValueError('Invalid shape for initial mean.') + raise ValueError("Invalid shape for initial mean.") def add_sample(self, x, weight): x = np.asarray(x) @@ -347,11 +357,11 @@ def add_sample(self, x, weight): old_diff = x - self.mean self.mean[:] += old_diff / self.n_samples new_diff = x - self.mean - self.raw_var[:] += weight * old_diff * new_diff + self.raw_var[:] += weight * old_diff * new_diff def current_variance(self, out=None): if self.n_samples == 0: - raise ValueError('Can not compute variance without samples.') + raise ValueError("Can not compute variance without samples.") if out is not None: return np.divide(self.raw_var, self.n_samples, out=out) else: @@ -376,10 +386,10 @@ def __init__(self, v, dtype=None): dtype = theano.config.floatX self.dtype = dtype v = v.astype(self.dtype) - s = v ** .5 + s = v ** 0.5 self.s = s - self.inv_s = 1. / s + self.inv_s = 1.0 / s self.v = v def velocity(self, x, out=None): @@ -397,7 +407,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is not None: return 0.5 * np.dot(x, velocity) - return .5 * x.dot(self.v * x) + return 0.5 * x.dot(self.v * x) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -437,7 +447,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is None: velocity = self.velocity(x) - return .5 * x.dot(velocity) + return 0.5 * x.dot(velocity) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -470,8 +480,7 @@ def velocity(self, x, out=None): def random(self): """Draw random value from QuadPotential.""" vals = np.random.normal(size=self._n).astype(self.dtype) - return scipy.linalg.solve_triangular(self._chol.T, vals, - overwrite_b=True) + return scipy.linalg.solve_triangular(self._chol.T, vals, overwrite_b=True) def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" @@ -489,6 +498,7 @@ def velocity_energy(self, x, v_out): class QuadPotentialFullAdapt(QuadPotentialFull): """Adapt a dense mass matrix using the sample covariances.""" + def __init__( self, n, @@ -508,13 +518,11 @@ def __init__( raise ValueError("Initial mean must be one-dimensional.") if initial_cov is not None and initial_cov.shape != (n, n): raise ValueError( - "Wrong shape for initial_cov: expected %s got %s" - % (n, initial_cov.shape) + "Wrong shape for initial_cov: expected %s got %s" % (n, initial_cov.shape) ) if len(initial_mean) != n: raise ValueError( - "Wrong shape for initial_mean: expected %s got %s" - % (n, len(initial_mean)) + "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean)) ) if dtype is None: @@ -573,9 +581,7 @@ def update(self, sample, grad, tune): # window. if delta >= self.adaptation_window: self._foreground_cov = self._background_cov - self._background_cov = _WeightedCovariance( - self._n, dtype=self.dtype - ) + self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype) self._previous_update = self._n_samples self.adaptation_window = int(self.adaptation_window * self.adaptation_window_multiplier) @@ -645,12 +651,13 @@ def current_mean(self): try: import sksparse.cholmod as cholmod + chol_available = True except ImportError: chol_available = False if chol_available: - __all__ += ['QuadPotentialSparse'] + __all__ += ["QuadPotentialSparse"] import theano.sparse diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 383536725d..89b0ff63af 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -18,13 +18,29 @@ import scipy.linalg from ..distributions import draw_values -from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence +from .arraystep import ( + ArrayStepShared, + PopulationArrayStepShared, + ArrayStep, + metrop_select, + Competence, +) import pymc3 as pm from pymc3.theanof import floatX -__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis', - 'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal', - 'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal'] +__all__ = [ + "Metropolis", + "DEMetropolis", + "DEMetropolisZ", + "BinaryMetropolis", + "BinaryGibbsMetropolis", + "CategoricalGibbsMetropolis", + "NormalProposal", + "CauchyProposal", + "LaplaceProposal", + "PoissonProposal", + "MultivariateNormalProposal", +] # Available proposal distributions for Metropolis @@ -101,19 +117,32 @@ class Metropolis(ArrayStepShared): mode: string or `Mode` instance. compilation mode passed to Theano functions """ - name = 'metropolis' + + name = "metropolis" default_blocked = False generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'accepted': np.bool, - 'tune': np.bool, - 'scaling': np.float64, - }] - - def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., - tune=True, tune_interval=100, model=None, mode=None, **kwargs): + stats_dtypes = [ + { + "accept": np.float64, + "accepted": np.bool, + "tune": np.bool, + "scaling": np.float64, + } + ] + + def __init__( + self, + vars=None, + S=None, + proposal_dist=None, + scaling=1.0, + tune=True, + tune_interval=100, + model=None, + mode=None, + **kwargs + ): model = pm.modelcontext(model) @@ -133,7 +162,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., else: raise ValueError("Invalid rank for variance: %s" % S.ndim) - self.scaling = np.atleast_1d(scaling).astype('d') + self.scaling = np.atleast_1d(scaling).astype("d") self.tune = tune self.tune_interval = tune_interval self.steps_until_tune = tune_interval @@ -141,15 +170,14 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., # Determine type of variables self.discrete = np.concatenate( - [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]) + [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars] + ) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # remember initial settings before tuning so they can be reset self._untuned_settings = dict( - scaling=self.scaling, - steps_until_tune=tune_interval, - accepted=self.accepted + scaling=self.scaling, steps_until_tune=tune_interval, accepted=self.accepted ) self.mode = mode @@ -167,8 +195,7 @@ def reset_tuning(self): def astep(self, q0): if not self.steps_until_tune and self.tune: # Tune scaling parameter - self.scaling = tune( - self.scaling, self.accepted / float(self.tune_interval)) + self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 @@ -177,13 +204,12 @@ def astep(self, q0): if self.any_discrete: if self.all_discrete: - delta = np.round(delta, 0).astype('int64') - q0 = q0.astype('int64') - q = (q0 + delta).astype('int64') + delta = np.round(delta, 0).astype("int64") + q0 = q0.astype("int64") + q = (q0 + delta).astype("int64") else: - delta[self.discrete] = np.round( - delta[self.discrete], 0) - q = (q0 + delta) + delta[self.discrete] = np.round(delta[self.discrete], 0) + q = q0 + delta else: q = floatX(q0 + delta) @@ -194,10 +220,10 @@ def astep(self, q0): self.steps_until_tune -= 1 stats = { - 'tune': self.tune, - 'scaling': self.scaling, - 'accept': np.exp(accept), - 'accepted': accepted, + "tune": self.tune, + "scaling": self.scaling, + "accept": np.exp(accept), + "accepted": accepted, } return q_new, [stats] @@ -261,16 +287,19 @@ class BinaryMetropolis(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - name = 'binary_metropolis' + + name = "binary_metropolis" generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'tune': np.bool, - 'p_jump': np.float64, - }] + stats_dtypes = [ + { + "accept": np.float64, + "tune": np.bool, + "p_jump": np.float64, + } + ] - def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None): + def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None): model = pm.modelcontext(model) @@ -281,20 +310,19 @@ def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None): self.accepted = 0 if not all([v.dtype in pm.discrete_types for v in vars]): - raise ValueError( - 'All variables must be Bernoulli for BinaryMetropolis') + raise ValueError("All variables must be Bernoulli for BinaryMetropolis") super().__init__(vars, [model.fastlogp]) def astep(self, q0, logp): # Convert adaptive_scale_factor to a jump probability - p_jump = 1. - .5 ** self.scaling + p_jump = 1.0 - 0.5 ** self.scaling rand_array = nr.random(q0.shape) q = np.copy(q0) # Locations where switches occur, according to p_jump - switch_locs = (rand_array < p_jump) + switch_locs = rand_array < p_jump q[switch_locs] = True - q[switch_locs] accept = logp(q) - logp(q0) @@ -302,21 +330,20 @@ def astep(self, q0, logp): self.accepted += accepted stats = { - 'tune': self.tune, - 'accept': np.exp(accept), - 'p_jump': p_jump, + "tune": self.tune, + "accept": np.exp(accept), + "p_jump": p_jump, } return q_new, [stats] @staticmethod def competence(var): - ''' + """ BinaryMetropolis is only suitable for binary (bool) and Categorical variables with k=1. - ''' - distribution = getattr( - var.distribution, 'parent_dist', var.distribution) + """ + distribution = getattr(var.distribution, "parent_dist", var.distribution) if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types): return Competence.COMPATIBLE elif isinstance(distribution, pm.Categorical) and (distribution.k == 2): @@ -341,9 +368,10 @@ class BinaryGibbsMetropolis(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - name = 'binary_gibbs_metropolis' - def __init__(self, vars, order='random', transit_p=.8, model=None): + name = "binary_gibbs_metropolis" + + def __init__(self, vars, order="random", transit_p=0.8, model=None): model = pm.modelcontext(model) @@ -352,18 +380,17 @@ def __init__(self, vars, order='random', transit_p=.8, model=None): self.dim = sum(v.dsize for v in vars) - if order == 'random': + if order == "random": self.shuffle_dims = True self.order = list(range(self.dim)) else: if sorted(order) != list(range(self.dim)): - raise ValueError('Argument \'order\' has to be a permutation') + raise ValueError("Argument 'order' has to be a permutation") self.shuffle_dims = False self.order = order if not all([v.dtype in pm.discrete_types for v in vars]): - raise ValueError( - 'All variables must be binary for BinaryGibbsMetropolis') + raise ValueError("All variables must be binary for BinaryGibbsMetropolis") super().__init__(vars, [model.fastlogp]) @@ -389,12 +416,11 @@ def astep(self, q0, logp): @staticmethod def competence(var): - ''' + """ BinaryMetropolis is only suitable for Bernoulli and Categorical variables with k=2. - ''' - distribution = getattr( - var.distribution, 'parent_dist', var.distribution) + """ + distribution = getattr(var.distribution, "parent_dist", var.distribution) if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types): return Competence.IDEAL elif isinstance(distribution, pm.Categorical) and (distribution.k == 2): @@ -404,15 +430,16 @@ def competence(var): class CategoricalGibbsMetropolis(ArrayStep): """A Metropolis-within-Gibbs step method optimized for categorical variables. - This step method works for Bernoulli variables as well, but it is not - optimized for them, like BinaryGibbsMetropolis is. Step method supports - two types of proposals: A uniform proposal and a proportional proposal, - which was introduced by Liu in his 1996 technical report - "Metropolized Gibbs Sampler: An Improvement". + This step method works for Bernoulli variables as well, but it is not + optimized for them, like BinaryGibbsMetropolis is. Step method supports + two types of proposals: A uniform proposal and a proportional proposal, + which was introduced by Liu in his 1996 technical report + "Metropolized Gibbs Sampler: An Improvement". """ - name = 'categorical_gibbs_metropolis' - def __init__(self, vars, proposal='uniform', order='random', model=None): + name = "categorical_gibbs_metropolis" + + def __init__(self, vars, proposal="uniform", order="random", model=None): model = pm.modelcontext(model) vars = pm.inputvars(vars) @@ -423,34 +450,36 @@ def __init__(self, vars, proposal='uniform', order='random', model=None): # variable with M categories and y being a 3-D variable with N # categories, we will have dimcats = [(0, M), (1, M), (2, N), (3, N), (4, N)]. for v in vars: - distr = getattr(v.distribution, 'parent_dist', v.distribution) + distr = getattr(v.distribution, "parent_dist", v.distribution) if isinstance(distr, pm.Categorical): k = draw_values([distr.k])[0] elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types): k = 2 else: - raise ValueError('All variables must be categorical or binary' + - 'for CategoricalGibbsMetropolis') + raise ValueError( + "All variables must be categorical or binary" + "for CategoricalGibbsMetropolis" + ) start = len(dimcats) dimcats += [(dim, k) for dim in range(start, start + v.dsize)] - if order == 'random': + if order == "random": self.shuffle_dims = True self.dimcats = dimcats else: if sorted(order) != list(range(len(dimcats))): - raise ValueError('Argument \'order\' has to be a permutation') + raise ValueError("Argument 'order' has to be a permutation") self.shuffle_dims = False self.dimcats = [dimcats[j] for j in order] - if proposal == 'uniform': + if proposal == "uniform": self.astep = self.astep_unif - elif proposal == 'proportional': + elif proposal == "proportional": # Use the optimized "Metropolized Gibbs Sampler" described in Liu96. self.astep = self.astep_prop else: - raise ValueError('Argument \'proposal\' should either be ' + - '\'uniform\' or \'proportional\'') + raise ValueError( + "Argument 'proposal' should either be " + "'uniform' or 'proportional'" + ) super().__init__(vars, [model.fastlogp]) @@ -494,8 +523,8 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): log_probs[candidate_cat] = logp(q) probs = softmax(log_probs) prob_curr, probs[given_cat] = probs[given_cat], 0.0 - probs /= (1.0 - prob_curr) - proposed_cat = nr.choice(candidates, p = probs) + probs /= 1.0 - prob_curr + proposed_cat = nr.choice(candidates, p=probs) accept_ratio = (1.0 - prob_curr) / (1.0 - probs[proposed_cat]) if not np.isfinite(accept_ratio) or nr.uniform() >= accept_ratio: q[dim] = given_cat @@ -505,12 +534,11 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): @staticmethod def competence(var): - ''' + """ CategoricalGibbsMetropolis is only suitable for Bernoulli and Categorical variables. - ''' - distribution = getattr( - var.distribution, 'parent_dist', var.distribution) + """ + distribution = getattr(var.distribution, "parent_dist", var.distribution) if isinstance(distribution, pm.Categorical): if distribution.k > 2: return Competence.IDEAL @@ -554,20 +582,34 @@ class DEMetropolis(PopulationArrayStepShared): Statistics and Computing `link `__ """ - name = 'DEMetropolis' + + name = "DEMetropolis" default_blocked = True generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'accepted': np.bool, - 'tune': np.bool, - 'scaling': np.float64, - 'lambda': np.float64, - }] - - def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, - tune=None, tune_interval=100, model=None, mode=None, **kwargs): + stats_dtypes = [ + { + "accept": np.float64, + "accepted": np.bool, + "tune": np.bool, + "scaling": np.float64, + "lambda": np.float64, + } + ] + + def __init__( + self, + vars=None, + S=None, + proposal_dist=None, + lamb=None, + scaling=0.001, + tune=None, + tune_interval=100, + model=None, + mode=None, + **kwargs + ): model = pm.modelcontext(model) @@ -583,12 +625,12 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0 else: self.proposal_dist = UniformProposal(S) - self.scaling = np.atleast_1d(scaling).astype('d') + self.scaling = np.atleast_1d(scaling).astype("d") if lamb is None: # default to the optimal lambda for normally distributed targets lamb = 2.38 / np.sqrt(2 * model.ndim) self.lamb = float(lamb) - if tune not in {None, 'scaling', 'lambda'}: + if tune not in {None, "scaling", "lambda"}: raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}') self.tune = tune self.tune_interval = tune_interval @@ -603,9 +645,9 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0 def astep(self, q0): if not self.steps_until_tune and self.tune: - if self.tune == 'scaling': + if self.tune == "scaling": self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) - elif self.tune == 'lambda': + elif self.tune == "lambda": self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval @@ -628,11 +670,11 @@ def astep(self, q0): self.steps_until_tune -= 1 stats = { - 'tune': self.tune, - 'scaling': self.scaling, - 'lambda': self.lamb, - 'accept': np.exp(accept), - 'accepted': accepted + "tune": self.tune, + "scaling": self.scaling, + "lambda": self.lamb, + "accept": np.exp(accept), + "accepted": accepted, } return q_new, [stats] @@ -681,20 +723,35 @@ class DEMetropolisZ(ArrayStepShared): Statistics and Computing `link `__ """ - name = 'DEMetropolisZ' + + name = "DEMetropolisZ" default_blocked = True generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'accepted': np.bool, - 'tune': np.bool, - 'scaling': np.float64, - 'lambda': np.float64, - }] - - def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, - tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs): + stats_dtypes = [ + { + "accept": np.float64, + "accepted": np.bool, + "tune": np.bool, + "scaling": np.float64, + "lambda": np.float64, + } + ] + + def __init__( + self, + vars=None, + S=None, + proposal_dist=None, + lamb=None, + scaling=0.001, + tune="lambda", + tune_interval=100, + tune_drop_fraction: float = 0.9, + model=None, + mode=None, + **kwargs + ): model = pm.modelcontext(model) if vars is None: @@ -703,18 +760,18 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0 if S is None: S = np.ones(model.ndim) - + if proposal_dist is not None: self.proposal_dist = proposal_dist(S) else: self.proposal_dist = UniformProposal(S) - self.scaling = np.atleast_1d(scaling).astype('d') + self.scaling = np.atleast_1d(scaling).astype("d") if lamb is None: # default to the optimal lambda for normally distributed targets lamb = 2.38 / np.sqrt(2 * model.ndim) self.lamb = float(lamb) - if tune not in {None, 'scaling', 'lambda'}: + if tune not in {None, "scaling", "lambda"}: raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}') self.tune = True self.tune_target = tune @@ -730,7 +787,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0 scaling=self.scaling, lamb=self.lamb, steps_until_tune=tune_interval, - accepted=self.accepted + accepted=self.accepted, ) self.mode = mode @@ -750,9 +807,9 @@ def reset_tuning(self): def astep(self, q0): # same tuning scheme as DEMetropolis if not self.steps_until_tune and self.tune: - if self.tune_target == 'scaling': + if self.tune_target == "scaling": self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) - elif self.tune_target == 'lambda': + elif self.tune_target == "lambda": self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval @@ -769,7 +826,7 @@ def astep(self, q0): iz2 = np.random.randint(it) while iz2 == iz1: iz2 = np.random.randint(it) - + z1 = self._history[iz1] z2 = self._history[iz2] # propose a jump @@ -786,11 +843,11 @@ def astep(self, q0): self.steps_until_tune -= 1 stats = { - 'tune': self.tune, - 'scaling': self.scaling, - 'lambda': self.lamb, - 'accept': np.exp(accept), - 'accepted': accepted + "tune": self.tune, + "scaling": self.scaling, + "lambda": self.lamb, + "accept": np.exp(accept), + "accepted": accepted, } return q_new, [stats] @@ -820,14 +877,14 @@ def sample_except(limit, excluded): def softmax(x): e_x = np.exp(x - np.max(x)) - return e_x / np.sum(e_x, axis = 0) + return e_x / np.sum(e_x, axis=0) def delta_logp(logp, vars, shared): [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared) tensor_type = inarray0.type - inarray1 = tensor_type('inarray1') + inarray1 = tensor_type("inarray1") logp1 = pm.CallableTensor(logp0)(inarray1) diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py index 61c9584fd4..dff75b6621 100644 --- a/pymc3/step_methods/sgmcmc.py +++ b/pymc3/step_methods/sgmcmc.py @@ -23,8 +23,10 @@ __all__ = [] -EXPERIMENTAL_WARNING = "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet"\ +EXPERIMENTAL_WARNING = ( + "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet" " recommended for use in PyMC3!" +) def _value_error(cond, str): @@ -34,18 +36,14 @@ def _value_error(cond, str): def _check_minibatches(minibatch_tensors, minibatches): - _value_error( - isinstance(minibatch_tensors, list), - 'minibatch_tensors must be a list.') + _value_error(isinstance(minibatch_tensors, list), "minibatch_tensors must be a list.") - _value_error( - hasattr(minibatches, "__iter__"), 'minibatches must be an iterator.') + _value_error(hasattr(minibatches, "__iter__"), "minibatches must be an iterator.") def prior_dlogp(vars, model, flat_view): """Returns the gradient of the prior on the parameters as a vector of size D x 1""" - terms = tt.concatenate( - [theano.grad(var.logpt, var).flatten() for var in vars], axis=0) + terms = tt.concatenate([theano.grad(var.logpt, var).flatten() for var in vars], axis=0) dlogp = theano.clone(terms, flat_view.replacements, strict=False) return dlogp @@ -59,16 +57,16 @@ def elemwise_dlogL(vars, model, flat_view): # select one observed random variable obs_var = model.observed_RVs[0] # tensor of shape (batch_size,) - logL = obs_var.logp_elemwiset.sum( - axis=tuple(range(1, obs_var.logp_elemwiset.ndim))) + logL = obs_var.logp_elemwiset.sum(axis=tuple(range(1, obs_var.logp_elemwiset.ndim))) # calculate fisher information terms = [] for var in vars: - output, _ = theano.scan(lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),\ - sequences=[tt.arange(logL.shape[0])]) + output, _ = theano.scan( + lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(), + sequences=[tt.arange(logL.shape[0])], + ) terms.append(output) - dlogL = theano.clone( - tt.concatenate(terms, axis=1), flat_view.replacements, strict=False) + dlogL = theano.clone(tt.concatenate(terms, axis=1), flat_view.replacements, strict=False) return dlogL @@ -111,16 +109,18 @@ class BaseStochasticGradient(ArrayStepShared): Returns None it creates class variables which are required for the training fn """ - def __init__(self, - vars=None, - batch_size=None, - total_size=None, - step_size=1.0, - model=None, - random_seed=None, - minibatches=None, - minibatch_tensors=None, - **kwargs): + def __init__( + self, + vars=None, + batch_size=None, + total_size=None, + step_size=1.0, + model=None, + random_seed=None, + minibatches=None, + minibatch_tensors=None, + **kwargs + ): warnings.warn(EXPERIMENTAL_WARNING) model = modelcontext(model) @@ -136,7 +136,8 @@ def __init__(self, self.total_size = total_size _value_error( total_size != None or batch_size != None, - 'total_size and batch_size of training data have to be specified') + "total_size and batch_size of training data have to be specified", + ) self.expected_iter = int(total_size / batch_size) # set random stream @@ -168,12 +169,10 @@ def __init__(self, def is_shared(t): return isinstance(t, theano.compile.sharedvalue.SharedVariable) - tensors = [(t.type() if is_shared(t) else t) - for t in minibatch_tensors] - updates = OrderedDict({ - t: t_ - for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t) - }) + tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] + updates = OrderedDict( + {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)} + ) self.minibatch_tensors = tensors self.inarray += self.minibatch_tensors self.updates.update(updates) @@ -207,7 +206,7 @@ def astep(self, q0): ------- q """ - if hasattr(self, 'minibatch_tensors'): + if hasattr(self, "minibatch_tensors"): return q0 + self.training_fn(q0, *next(self.minibatches)) else: return q0 + self.training_fn(q0) diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py index b0d93dc2f0..f6e86e1fbe 100644 --- a/pymc3/step_methods/slicer.py +++ b/pymc3/step_methods/slicer.py @@ -22,9 +22,9 @@ from ..theanof import inputvars from ..vartypes import continuous_types -__all__ = ['Slice'] +__all__ = ["Slice"] -LOOP_ERR_MSG = 'max slicer iters %d exceeded' +LOOP_ERR_MSG = "max slicer iters %d exceeded" class Slice(ArrayStep): @@ -43,15 +43,15 @@ class Slice(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - name = 'slice' + + name = "slice" default_blocked = False - def __init__(self, vars=None, w=1., tune=True, model=None, - iter_limit=np.inf, **kwargs): + def __init__(self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, **kwargs): self.model = modelcontext(model) self.w = w self.tune = tune - self.n_tunes = 0. + self.n_tunes = 0.0 self.iter_limit = iter_limit if vars is None: @@ -72,13 +72,13 @@ def astep(self, q0, logp): qr[i] = q[i] + self.w[i] # Stepping out procedure cnt = 0 - while(y <= logp(ql)): # changed lt to leq for locally uniform posteriors + while y <= logp(ql): # changed lt to leq for locally uniform posteriors ql[i] -= self.w[i] cnt += 1 if cnt > self.iter_limit: raise RuntimeError(LOOP_ERR_MSG % self.iter_limit) cnt = 0 - while(y <= logp(qr)): + while y <= logp(qr): qr[i] += self.w[i] cnt += 1 if cnt > self.iter_limit: @@ -97,11 +97,14 @@ def astep(self, q0, logp): if cnt > self.iter_limit: raise RuntimeError(LOOP_ERR_MSG % self.iter_limit) - if self.tune: # I was under impression from MacKays lectures that slice width can be tuned without + if ( + self.tune + ): # I was under impression from MacKays lectures that slice width can be tuned without # breaking markovianness. Can we do it regardless of self.tune?(@madanh) - self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) +\ - (qr[i] - ql[i]) / (self.n_tunes + 1) # same as before - # unobvious and important: return qr and ql to the same point + self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) + (qr[i] - ql[i]) / ( + self.n_tunes + 1 + ) # same as before + # unobvious and important: return qr and ql to the same point qr[i] = q[i] ql[i] = q[i] if self.tune: diff --git a/pymc3/step_methods/step_sizes.py b/pymc3/step_methods/step_sizes.py index bdf0683c62..3603e30fa5 100644 --- a/pymc3/step_methods/step_sizes.py +++ b/pymc3/step_methods/step_sizes.py @@ -30,7 +30,7 @@ def __init__(self, initial_step, target, gamma, k, t0): def reset(self): self._log_step = np.log(self._initial_step) self._log_bar = self._log_step - self._hbar = 0. + self._hbar = 0.0 self._count = 1 self._mu = np.log(10 * self._initial_step) self._tuned_stats = [] @@ -47,8 +47,8 @@ def update(self, accept_stat, tune): return count, k, t0 = self._count, self._k, self._t0 - w = 1. / (count + t0) - self._hbar = ((1 - w) * self._hbar + w * (self._target - accept_stat)) + w = 1.0 / (count + t0) + self._hbar = (1 - w) * self._hbar + w * (self._target - accept_stat) self._log_step = self._mu - self._hbar * np.sqrt(count) / self._gamma mk = count ** -k @@ -57,8 +57,8 @@ def update(self, accept_stat, tune): def stats(self): return { - 'step_size': np.exp(self._log_step), - 'step_size_bar': np.exp(self._log_bar), + "step_size": np.exp(self._log_step), + "step_size_bar": np.exp(self._log_bar), } def warnings(self): @@ -71,13 +71,13 @@ def warnings(self): n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95) if target_accept < lower or target_accept > upper: - msg = ('The acceptance probability does not match the target. It ' - 'is %s, but should be close to %s. Try to increase the ' - 'number of tuning steps.' - % (mean_accept, target_accept)) - info = {'target': target_accept, 'actual': mean_accept} - warning = SamplerWarning( - WarningType.BAD_ACCEPTANCE, msg, 'warn', extra=info) + msg = ( + "The acceptance probability does not match the target. It " + "is %s, but should be close to %s. Try to increase the " + "number of tuning steps." % (mean_accept, target_accept) + ) + info = {"target": target_accept, "actual": mean_accept} + warning = SamplerWarning(WarningType.BAD_ACCEPTANCE, msg, "warn", extra=info) return [warning] else: return [] From c4c48392d4e6f833ca8269592aa05e9a5e6a7242 Mon Sep 17 00:00:00 2001 From: Ali Akbar Septiandri Date: Sun, 27 Sep 2020 12:34:35 +0100 Subject: [PATCH 3/7] Revert two files --- pymc3/distributions/continuous.py | 12 ++- pymc3/step_methods/hmc/quadpotential.py | 120 ++++++++++++------------ 2 files changed, 67 insertions(+), 65 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 80fc6a0b3e..5ccdb7f5d2 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -134,7 +134,9 @@ def assert_negative_support(var, label, distname, value=-1e-6): support = False if np.any(support): - msg = "The variable specified for {} has negative support for {}, ".format(label, distname) + msg = "The variable specified for {} has negative support for {}, ".format( + label, distname + ) msg += "likely making it unsuitable for this parameter." warnings.warn(msg) @@ -710,7 +712,7 @@ def random(self, point=None, size=None): ) def _random(self, mu, sigma, lower, upper, size): - """Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's + """ Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's parametrization to scipy.truncnorm. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -3445,7 +3447,7 @@ def random(self, point=None, size=None): ) def _random(self, c, lower, upper, size): - """Wrapper around stats.triang.rvs that converts Triangular's + """ Wrapper around stats.triang.rvs that converts Triangular's parametrization to scipy.triang. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -3704,7 +3706,7 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs): self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma)) self.b = b = tt.as_tensor_variable(floatX(b)) - nu_sigma_ratio = -(nu ** 2) / (2 * sigma ** 2) + nu_sigma_ratio = -nu ** 2 / (2 * sigma ** 2) self.mean = ( sigma * np.sqrt(np.pi / 2) @@ -3760,7 +3762,7 @@ def random(self, point=None, size=None): return generate_samples(self._random, nu=nu, sigma=sigma, dist_shape=self.shape, size=size) def _random(self, nu, sigma, size): - """Wrapper around stats.rice.rvs that converts Rice's + """ Wrapper around stats.rice.rvs that converts Rice's parametrization to scipy.rice. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index fd962f48cb..8750fd6484 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -22,15 +22,9 @@ from pymc3.theanof import floatX -__all__ = [ - "quad_potential", - "QuadPotentialDiag", - "QuadPotentialFull", - "QuadPotentialFullInv", - "QuadPotentialDiagAdapt", - "QuadPotentialFullAdapt", - "isquadpotential", -] +__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull', + 'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', + 'QuadPotentialFullAdapt', 'isquadpotential'] def quad_potential(C, is_cov): @@ -62,7 +56,7 @@ def quad_potential(C, is_cov): if is_cov: return QuadPotentialDiag(C) else: - return QuadPotentialDiag(1.0 / C) + return QuadPotentialDiag(1. / C) else: if is_cov: return QuadPotentialFull(C) @@ -76,10 +70,11 @@ def partial_check_positive_definite(C): d = C else: d = np.diag(C) - (i,) = np.nonzero(np.logical_or(np.isnan(d), d <= 0)) + i, = np.nonzero(np.logical_or(np.isnan(d), d <= 0)) if len(i): - raise PositiveDefiniteError("Simple check failed. Diagonal contains negatives", i) + raise PositiveDefiniteError( + "Simple check failed. Diagonal contains negatives", i) class PositiveDefiniteError(ValueError): @@ -89,22 +84,23 @@ def __init__(self, msg, idx): self.msg = msg def __str__(self): - return "Scaling is not positive definite: %s. Check indexes %s." % (self.msg, self.idx) + return ("Scaling is not positive definite: %s. Check indexes %s." + % (self.msg, self.idx)) class QuadPotential: def velocity(self, x, out=None): """Compute the current velocity at a position in parameter space.""" - raise NotImplementedError("Abstract method") + raise NotImplementedError('Abstract method') def energy(self, x, velocity=None): - raise NotImplementedError("Abstract method") + raise NotImplementedError('Abstract method') def random(self, x): - raise NotImplementedError("Abstract method") + raise NotImplementedError('Abstract method') def velocity_energy(self, x, v_out): - raise NotImplementedError("Abstract method") + raise NotImplementedError('Abstract method') def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning. @@ -156,17 +152,15 @@ def __init__( ): """Set up a diagonal mass matrix.""" if initial_diag is not None and initial_diag.ndim != 1: - raise ValueError("Initial diagonal must be one-dimensional.") + raise ValueError('Initial diagonal must be one-dimensional.') if initial_mean.ndim != 1: - raise ValueError("Initial mean must be one-dimensional.") + raise ValueError('Initial mean must be one-dimensional.') if initial_diag is not None and len(initial_diag) != n: - raise ValueError( - "Wrong shape for initial_diag: expected %s got %s" % (n, len(initial_diag)) - ) + raise ValueError('Wrong shape for initial_diag: expected %s got %s' + % (n, len(initial_diag))) if len(initial_mean) != n: - raise ValueError( - "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean)) - ) + raise ValueError('Wrong shape for initial_mean: expected %s got %s' + % (n, len(initial_mean))) if dtype is None: dtype = theano.config.floatX @@ -190,10 +184,9 @@ def reset(self): self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True) self._var_theano = theano.shared(self._var) self._stds = np.sqrt(self._initial_diag) - self._inv_stds = floatX(1.0) / self._stds + self._inv_stds = floatX(1.) / self._stds self._foreground_var = _WeightedVariance( - self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype - ) + self._n, self._initial_mean, self._initial_diag, self._initial_weight, self.dtype) self._background_var = _WeightedVariance(self._n, dtype=self.dtype) self._n_samples = 0 @@ -263,12 +256,11 @@ def raise_ok(self, vmap): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(self._stds == 0)[0] - errmsg = ["Mass matrix contains zeros on the diagonal. "] + errmsg = ['Mass matrix contains zeros on the diagonal. '] for ii in index: - errmsg.append( - "The derivative of RV `{}`.ravel()[{}]" " is zero.".format(*name_slc[ii]) - ) - raise ValueError("\n".join(errmsg)) + errmsg.append('The derivative of RV `{}`.ravel()[{}]' + ' is zero.'.format(*name_slc[ii])) + raise ValueError('\n'.join(errmsg)) if np.any(~np.isfinite(self._stds)): name_slc = [] @@ -278,12 +270,11 @@ def raise_ok(self, vmap): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(~np.isfinite(self._stds))[0] - errmsg = ["Mass matrix contains non-finite values on the diagonal. "] + errmsg = ['Mass matrix contains non-finite values on the diagonal. '] for ii in index: - errmsg.append( - "The derivative of RV `{}`.ravel()[{}]" " is non-finite.".format(*name_slc[ii]) - ) - raise ValueError("\n".join(errmsg)) + errmsg.append('The derivative of RV `{}`.ravel()[{}]' + ' is non-finite.'.format(*name_slc[ii])) + raise ValueError('\n'.join(errmsg)) class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt): @@ -330,26 +321,25 @@ def update(self, sample, grad, tune): class _WeightedVariance: """Online algorithm for computing mean of variance.""" - def __init__( - self, nelem, initial_mean=None, initial_variance=None, initial_weight=0, dtype="d" - ): + def __init__(self, nelem, initial_mean=None, initial_variance=None, + initial_weight=0, dtype='d'): self._dtype = dtype self.n_samples = float(initial_weight) if initial_mean is None: - self.mean = np.zeros(nelem, dtype="d") + self.mean = np.zeros(nelem, dtype='d') else: - self.mean = np.array(initial_mean, dtype="d", copy=True) + self.mean = np.array(initial_mean, dtype='d', copy=True) if initial_variance is None: - self.raw_var = np.zeros(nelem, dtype="d") + self.raw_var = np.zeros(nelem, dtype='d') else: - self.raw_var = np.array(initial_variance, dtype="d", copy=True) + self.raw_var = np.array(initial_variance, dtype='d', copy=True) self.raw_var[:] *= self.n_samples if self.raw_var.shape != (nelem,): - raise ValueError("Invalid shape for initial variance.") + raise ValueError('Invalid shape for initial variance.') if self.mean.shape != (nelem,): - raise ValueError("Invalid shape for initial mean.") + raise ValueError('Invalid shape for initial mean.') def add_sample(self, x, weight): x = np.asarray(x) @@ -357,11 +347,11 @@ def add_sample(self, x, weight): old_diff = x - self.mean self.mean[:] += old_diff / self.n_samples new_diff = x - self.mean - self.raw_var[:] += weight * old_diff * new_diff + self.raw_var[:] += weight * old_diff * new_diff def current_variance(self, out=None): if self.n_samples == 0: - raise ValueError("Can not compute variance without samples.") + raise ValueError('Can not compute variance without samples.') if out is not None: return np.divide(self.raw_var, self.n_samples, out=out) else: @@ -386,10 +376,10 @@ def __init__(self, v, dtype=None): dtype = theano.config.floatX self.dtype = dtype v = v.astype(self.dtype) - s = v ** 0.5 + s = v ** .5 self.s = s - self.inv_s = 1.0 / s + self.inv_s = 1. / s self.v = v def velocity(self, x, out=None): @@ -407,7 +397,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is not None: return 0.5 * np.dot(x, velocity) - return 0.5 * x.dot(self.v * x) + return .5 * x.dot(self.v * x) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -447,7 +437,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is None: velocity = self.velocity(x) - return 0.5 * x.dot(velocity) + return .5 * x.dot(velocity) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -480,7 +470,8 @@ def velocity(self, x, out=None): def random(self): """Draw random value from QuadPotential.""" vals = np.random.normal(size=self._n).astype(self.dtype) - return scipy.linalg.solve_triangular(self._chol.T, vals, overwrite_b=True) + return scipy.linalg.solve_triangular(self._chol.T, vals, + overwrite_b=True) def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" @@ -498,7 +489,6 @@ def velocity_energy(self, x, v_out): class QuadPotentialFullAdapt(QuadPotentialFull): """Adapt a dense mass matrix using the sample covariances.""" - def __init__( self, n, @@ -518,11 +508,13 @@ def __init__( raise ValueError("Initial mean must be one-dimensional.") if initial_cov is not None and initial_cov.shape != (n, n): raise ValueError( - "Wrong shape for initial_cov: expected %s got %s" % (n, initial_cov.shape) + "Wrong shape for initial_cov: expected %s got %s" + % (n, initial_cov.shape) ) if len(initial_mean) != n: raise ValueError( - "Wrong shape for initial_mean: expected %s got %s" % (n, len(initial_mean)) + "Wrong shape for initial_mean: expected %s got %s" + % (n, len(initial_mean)) ) if dtype is None: @@ -581,7 +573,9 @@ def update(self, sample, grad, tune): # window. if delta >= self.adaptation_window: self._foreground_cov = self._background_cov - self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype) + self._background_cov = _WeightedCovariance( + self._n, dtype=self.dtype + ) self._previous_update = self._n_samples self.adaptation_window = int(self.adaptation_window * self.adaptation_window_multiplier) @@ -651,13 +645,12 @@ def current_mean(self): try: import sksparse.cholmod as cholmod - chol_available = True except ImportError: chol_available = False if chol_available: - __all__ += ["QuadPotentialSparse"] + __all__ += ['QuadPotentialSparse'] import theano.sparse @@ -691,3 +684,10 @@ def random(self): def energy(self, x): """Compute kinetic energy at a position in parameter space.""" return 0.5 * x.T.dot(self.velocity(x)) + n = self.factor.solve_Lt(n) + n = self.factor.apply_Pt(n) + return n + + def energy(self, x): + """Compute kinetic energy at a position in parameter space.""" + return 0.5 * x.T.dot(self.velocity(x)) From 1f2b8568398bcf683b18ea6d7cf1915759e73f30 Mon Sep 17 00:00:00 2001 From: Ali Akbar S Date: Sun, 27 Sep 2020 13:17:27 +0100 Subject: [PATCH 4/7] Update pymc3/distributions/bound.py Co-authored-by: Marco Gorelli --- pymc3/distributions/bound.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py index b52dc905b1..11fc9ddafe 100644 --- a/pymc3/distributions/bound.py +++ b/pymc3/distributions/bound.py @@ -82,7 +82,7 @@ def _random(self, lower, upper, point=None, size=None): upper = np.asarray(upper) if lower.size > 1 or upper.size > 1: raise ValueError( - "Drawing samples from distributions with " "array-valued bounds is not supported." + "Drawing samples from distributions with array-valued bounds is not supported." ) total_size = np.prod(size).astype(np.int) samples = [] From 53b42a11bcc919401ecc90f56ac8510c9a2318f1 Mon Sep 17 00:00:00 2001 From: Ali Akbar Septiandri Date: Sun, 27 Sep 2020 13:28:21 +0100 Subject: [PATCH 5/7] Revert blackened metropolis.py --- pymc3/step_methods/metropolis.py | 309 +++++++++++++------------------ 1 file changed, 126 insertions(+), 183 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 89b0ff63af..383536725d 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -18,29 +18,13 @@ import scipy.linalg from ..distributions import draw_values -from .arraystep import ( - ArrayStepShared, - PopulationArrayStepShared, - ArrayStep, - metrop_select, - Competence, -) +from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence import pymc3 as pm from pymc3.theanof import floatX -__all__ = [ - "Metropolis", - "DEMetropolis", - "DEMetropolisZ", - "BinaryMetropolis", - "BinaryGibbsMetropolis", - "CategoricalGibbsMetropolis", - "NormalProposal", - "CauchyProposal", - "LaplaceProposal", - "PoissonProposal", - "MultivariateNormalProposal", -] +__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis', + 'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal', + 'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal'] # Available proposal distributions for Metropolis @@ -117,32 +101,19 @@ class Metropolis(ArrayStepShared): mode: string or `Mode` instance. compilation mode passed to Theano functions """ - - name = "metropolis" + name = 'metropolis' default_blocked = False generates_stats = True - stats_dtypes = [ - { - "accept": np.float64, - "accepted": np.bool, - "tune": np.bool, - "scaling": np.float64, - } - ] - - def __init__( - self, - vars=None, - S=None, - proposal_dist=None, - scaling=1.0, - tune=True, - tune_interval=100, - model=None, - mode=None, - **kwargs - ): + stats_dtypes = [{ + 'accept': np.float64, + 'accepted': np.bool, + 'tune': np.bool, + 'scaling': np.float64, + }] + + def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., + tune=True, tune_interval=100, model=None, mode=None, **kwargs): model = pm.modelcontext(model) @@ -162,7 +133,7 @@ def __init__( else: raise ValueError("Invalid rank for variance: %s" % S.ndim) - self.scaling = np.atleast_1d(scaling).astype("d") + self.scaling = np.atleast_1d(scaling).astype('d') self.tune = tune self.tune_interval = tune_interval self.steps_until_tune = tune_interval @@ -170,14 +141,15 @@ def __init__( # Determine type of variables self.discrete = np.concatenate( - [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars] - ) + [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # remember initial settings before tuning so they can be reset self._untuned_settings = dict( - scaling=self.scaling, steps_until_tune=tune_interval, accepted=self.accepted + scaling=self.scaling, + steps_until_tune=tune_interval, + accepted=self.accepted ) self.mode = mode @@ -195,7 +167,8 @@ def reset_tuning(self): def astep(self, q0): if not self.steps_until_tune and self.tune: # Tune scaling parameter - self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) + self.scaling = tune( + self.scaling, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 @@ -204,12 +177,13 @@ def astep(self, q0): if self.any_discrete: if self.all_discrete: - delta = np.round(delta, 0).astype("int64") - q0 = q0.astype("int64") - q = (q0 + delta).astype("int64") + delta = np.round(delta, 0).astype('int64') + q0 = q0.astype('int64') + q = (q0 + delta).astype('int64') else: - delta[self.discrete] = np.round(delta[self.discrete], 0) - q = q0 + delta + delta[self.discrete] = np.round( + delta[self.discrete], 0) + q = (q0 + delta) else: q = floatX(q0 + delta) @@ -220,10 +194,10 @@ def astep(self, q0): self.steps_until_tune -= 1 stats = { - "tune": self.tune, - "scaling": self.scaling, - "accept": np.exp(accept), - "accepted": accepted, + 'tune': self.tune, + 'scaling': self.scaling, + 'accept': np.exp(accept), + 'accepted': accepted, } return q_new, [stats] @@ -287,19 +261,16 @@ class BinaryMetropolis(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - - name = "binary_metropolis" + name = 'binary_metropolis' generates_stats = True - stats_dtypes = [ - { - "accept": np.float64, - "tune": np.bool, - "p_jump": np.float64, - } - ] + stats_dtypes = [{ + 'accept': np.float64, + 'tune': np.bool, + 'p_jump': np.float64, + }] - def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None): + def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None): model = pm.modelcontext(model) @@ -310,19 +281,20 @@ def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None): self.accepted = 0 if not all([v.dtype in pm.discrete_types for v in vars]): - raise ValueError("All variables must be Bernoulli for BinaryMetropolis") + raise ValueError( + 'All variables must be Bernoulli for BinaryMetropolis') super().__init__(vars, [model.fastlogp]) def astep(self, q0, logp): # Convert adaptive_scale_factor to a jump probability - p_jump = 1.0 - 0.5 ** self.scaling + p_jump = 1. - .5 ** self.scaling rand_array = nr.random(q0.shape) q = np.copy(q0) # Locations where switches occur, according to p_jump - switch_locs = rand_array < p_jump + switch_locs = (rand_array < p_jump) q[switch_locs] = True - q[switch_locs] accept = logp(q) - logp(q0) @@ -330,20 +302,21 @@ def astep(self, q0, logp): self.accepted += accepted stats = { - "tune": self.tune, - "accept": np.exp(accept), - "p_jump": p_jump, + 'tune': self.tune, + 'accept': np.exp(accept), + 'p_jump': p_jump, } return q_new, [stats] @staticmethod def competence(var): - """ + ''' BinaryMetropolis is only suitable for binary (bool) and Categorical variables with k=1. - """ - distribution = getattr(var.distribution, "parent_dist", var.distribution) + ''' + distribution = getattr( + var.distribution, 'parent_dist', var.distribution) if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types): return Competence.COMPATIBLE elif isinstance(distribution, pm.Categorical) and (distribution.k == 2): @@ -368,10 +341,9 @@ class BinaryGibbsMetropolis(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ + name = 'binary_gibbs_metropolis' - name = "binary_gibbs_metropolis" - - def __init__(self, vars, order="random", transit_p=0.8, model=None): + def __init__(self, vars, order='random', transit_p=.8, model=None): model = pm.modelcontext(model) @@ -380,17 +352,18 @@ def __init__(self, vars, order="random", transit_p=0.8, model=None): self.dim = sum(v.dsize for v in vars) - if order == "random": + if order == 'random': self.shuffle_dims = True self.order = list(range(self.dim)) else: if sorted(order) != list(range(self.dim)): - raise ValueError("Argument 'order' has to be a permutation") + raise ValueError('Argument \'order\' has to be a permutation') self.shuffle_dims = False self.order = order if not all([v.dtype in pm.discrete_types for v in vars]): - raise ValueError("All variables must be binary for BinaryGibbsMetropolis") + raise ValueError( + 'All variables must be binary for BinaryGibbsMetropolis') super().__init__(vars, [model.fastlogp]) @@ -416,11 +389,12 @@ def astep(self, q0, logp): @staticmethod def competence(var): - """ + ''' BinaryMetropolis is only suitable for Bernoulli and Categorical variables with k=2. - """ - distribution = getattr(var.distribution, "parent_dist", var.distribution) + ''' + distribution = getattr( + var.distribution, 'parent_dist', var.distribution) if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types): return Competence.IDEAL elif isinstance(distribution, pm.Categorical) and (distribution.k == 2): @@ -430,16 +404,15 @@ def competence(var): class CategoricalGibbsMetropolis(ArrayStep): """A Metropolis-within-Gibbs step method optimized for categorical variables. - This step method works for Bernoulli variables as well, but it is not - optimized for them, like BinaryGibbsMetropolis is. Step method supports - two types of proposals: A uniform proposal and a proportional proposal, - which was introduced by Liu in his 1996 technical report - "Metropolized Gibbs Sampler: An Improvement". + This step method works for Bernoulli variables as well, but it is not + optimized for them, like BinaryGibbsMetropolis is. Step method supports + two types of proposals: A uniform proposal and a proportional proposal, + which was introduced by Liu in his 1996 technical report + "Metropolized Gibbs Sampler: An Improvement". """ + name = 'categorical_gibbs_metropolis' - name = "categorical_gibbs_metropolis" - - def __init__(self, vars, proposal="uniform", order="random", model=None): + def __init__(self, vars, proposal='uniform', order='random', model=None): model = pm.modelcontext(model) vars = pm.inputvars(vars) @@ -450,36 +423,34 @@ def __init__(self, vars, proposal="uniform", order="random", model=None): # variable with M categories and y being a 3-D variable with N # categories, we will have dimcats = [(0, M), (1, M), (2, N), (3, N), (4, N)]. for v in vars: - distr = getattr(v.distribution, "parent_dist", v.distribution) + distr = getattr(v.distribution, 'parent_dist', v.distribution) if isinstance(distr, pm.Categorical): k = draw_values([distr.k])[0] elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types): k = 2 else: - raise ValueError( - "All variables must be categorical or binary" + "for CategoricalGibbsMetropolis" - ) + raise ValueError('All variables must be categorical or binary' + + 'for CategoricalGibbsMetropolis') start = len(dimcats) dimcats += [(dim, k) for dim in range(start, start + v.dsize)] - if order == "random": + if order == 'random': self.shuffle_dims = True self.dimcats = dimcats else: if sorted(order) != list(range(len(dimcats))): - raise ValueError("Argument 'order' has to be a permutation") + raise ValueError('Argument \'order\' has to be a permutation') self.shuffle_dims = False self.dimcats = [dimcats[j] for j in order] - if proposal == "uniform": + if proposal == 'uniform': self.astep = self.astep_unif - elif proposal == "proportional": + elif proposal == 'proportional': # Use the optimized "Metropolized Gibbs Sampler" described in Liu96. self.astep = self.astep_prop else: - raise ValueError( - "Argument 'proposal' should either be " + "'uniform' or 'proportional'" - ) + raise ValueError('Argument \'proposal\' should either be ' + + '\'uniform\' or \'proportional\'') super().__init__(vars, [model.fastlogp]) @@ -523,8 +494,8 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): log_probs[candidate_cat] = logp(q) probs = softmax(log_probs) prob_curr, probs[given_cat] = probs[given_cat], 0.0 - probs /= 1.0 - prob_curr - proposed_cat = nr.choice(candidates, p=probs) + probs /= (1.0 - prob_curr) + proposed_cat = nr.choice(candidates, p = probs) accept_ratio = (1.0 - prob_curr) / (1.0 - probs[proposed_cat]) if not np.isfinite(accept_ratio) or nr.uniform() >= accept_ratio: q[dim] = given_cat @@ -534,11 +505,12 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): @staticmethod def competence(var): - """ + ''' CategoricalGibbsMetropolis is only suitable for Bernoulli and Categorical variables. - """ - distribution = getattr(var.distribution, "parent_dist", var.distribution) + ''' + distribution = getattr( + var.distribution, 'parent_dist', var.distribution) if isinstance(distribution, pm.Categorical): if distribution.k > 2: return Competence.IDEAL @@ -582,34 +554,20 @@ class DEMetropolis(PopulationArrayStepShared): Statistics and Computing `link `__ """ - - name = "DEMetropolis" + name = 'DEMetropolis' default_blocked = True generates_stats = True - stats_dtypes = [ - { - "accept": np.float64, - "accepted": np.bool, - "tune": np.bool, - "scaling": np.float64, - "lambda": np.float64, - } - ] - - def __init__( - self, - vars=None, - S=None, - proposal_dist=None, - lamb=None, - scaling=0.001, - tune=None, - tune_interval=100, - model=None, - mode=None, - **kwargs - ): + stats_dtypes = [{ + 'accept': np.float64, + 'accepted': np.bool, + 'tune': np.bool, + 'scaling': np.float64, + 'lambda': np.float64, + }] + + def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, + tune=None, tune_interval=100, model=None, mode=None, **kwargs): model = pm.modelcontext(model) @@ -625,12 +583,12 @@ def __init__( else: self.proposal_dist = UniformProposal(S) - self.scaling = np.atleast_1d(scaling).astype("d") + self.scaling = np.atleast_1d(scaling).astype('d') if lamb is None: # default to the optimal lambda for normally distributed targets lamb = 2.38 / np.sqrt(2 * model.ndim) self.lamb = float(lamb) - if tune not in {None, "scaling", "lambda"}: + if tune not in {None, 'scaling', 'lambda'}: raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}') self.tune = tune self.tune_interval = tune_interval @@ -645,9 +603,9 @@ def __init__( def astep(self, q0): if not self.steps_until_tune and self.tune: - if self.tune == "scaling": + if self.tune == 'scaling': self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) - elif self.tune == "lambda": + elif self.tune == 'lambda': self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval @@ -670,11 +628,11 @@ def astep(self, q0): self.steps_until_tune -= 1 stats = { - "tune": self.tune, - "scaling": self.scaling, - "lambda": self.lamb, - "accept": np.exp(accept), - "accepted": accepted, + 'tune': self.tune, + 'scaling': self.scaling, + 'lambda': self.lamb, + 'accept': np.exp(accept), + 'accepted': accepted } return q_new, [stats] @@ -723,35 +681,20 @@ class DEMetropolisZ(ArrayStepShared): Statistics and Computing `link `__ """ - - name = "DEMetropolisZ" + name = 'DEMetropolisZ' default_blocked = True generates_stats = True - stats_dtypes = [ - { - "accept": np.float64, - "accepted": np.bool, - "tune": np.bool, - "scaling": np.float64, - "lambda": np.float64, - } - ] - - def __init__( - self, - vars=None, - S=None, - proposal_dist=None, - lamb=None, - scaling=0.001, - tune="lambda", - tune_interval=100, - tune_drop_fraction: float = 0.9, - model=None, - mode=None, - **kwargs - ): + stats_dtypes = [{ + 'accept': np.float64, + 'accepted': np.bool, + 'tune': np.bool, + 'scaling': np.float64, + 'lambda': np.float64, + }] + + def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, + tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs): model = pm.modelcontext(model) if vars is None: @@ -760,18 +703,18 @@ def __init__( if S is None: S = np.ones(model.ndim) - + if proposal_dist is not None: self.proposal_dist = proposal_dist(S) else: self.proposal_dist = UniformProposal(S) - self.scaling = np.atleast_1d(scaling).astype("d") + self.scaling = np.atleast_1d(scaling).astype('d') if lamb is None: # default to the optimal lambda for normally distributed targets lamb = 2.38 / np.sqrt(2 * model.ndim) self.lamb = float(lamb) - if tune not in {None, "scaling", "lambda"}: + if tune not in {None, 'scaling', 'lambda'}: raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}') self.tune = True self.tune_target = tune @@ -787,7 +730,7 @@ def __init__( scaling=self.scaling, lamb=self.lamb, steps_until_tune=tune_interval, - accepted=self.accepted, + accepted=self.accepted ) self.mode = mode @@ -807,9 +750,9 @@ def reset_tuning(self): def astep(self, q0): # same tuning scheme as DEMetropolis if not self.steps_until_tune and self.tune: - if self.tune_target == "scaling": + if self.tune_target == 'scaling': self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) - elif self.tune_target == "lambda": + elif self.tune_target == 'lambda': self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval @@ -826,7 +769,7 @@ def astep(self, q0): iz2 = np.random.randint(it) while iz2 == iz1: iz2 = np.random.randint(it) - + z1 = self._history[iz1] z2 = self._history[iz2] # propose a jump @@ -843,11 +786,11 @@ def astep(self, q0): self.steps_until_tune -= 1 stats = { - "tune": self.tune, - "scaling": self.scaling, - "lambda": self.lamb, - "accept": np.exp(accept), - "accepted": accepted, + 'tune': self.tune, + 'scaling': self.scaling, + 'lambda': self.lamb, + 'accept': np.exp(accept), + 'accepted': accepted } return q_new, [stats] @@ -877,14 +820,14 @@ def sample_except(limit, excluded): def softmax(x): e_x = np.exp(x - np.max(x)) - return e_x / np.sum(e_x, axis=0) + return e_x / np.sum(e_x, axis = 0) def delta_logp(logp, vars, shared): [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared) tensor_type = inarray0.type - inarray1 = tensor_type("inarray1") + inarray1 = tensor_type('inarray1') logp1 = pm.CallableTensor(logp0)(inarray1) From 1e2db79b4e08d0ed5de6cb449bd73a5a404c6d3f Mon Sep 17 00:00:00 2001 From: Ali Akbar Septiandri Date: Sun, 27 Sep 2020 13:32:41 +0100 Subject: [PATCH 6/7] Revert blackened files --- pymc3/distributions/discrete.py | 251 +++++++------ pymc3/distributions/multivariate.py | 543 +++++++++++++--------------- 2 files changed, 393 insertions(+), 401 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index cfe87ed3d4..090949c905 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -24,23 +24,10 @@ from ..theanof import floatX, intX, take_along_axis -__all__ = [ - "Binomial", - "BetaBinomial", - "Bernoulli", - "DiscreteWeibull", - "Poisson", - "NegativeBinomial", - "ConstantDist", - "Constant", - "ZeroInflatedPoisson", - "ZeroInflatedBinomial", - "ZeroInflatedNegativeBinomial", - "DiscreteUniform", - "Geometric", - "Categorical", - "OrderedLogistic", -] +__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull', + 'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant', + 'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial', + 'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic'] class Binomial(Discrete): @@ -109,7 +96,9 @@ def random(self, point=None, size=None): array """ n, p = draw_values([self.n, self.p], point=point, size=size) - return generate_samples(stats.binom.rvs, n=n, p=p, dist_shape=self.shape, size=size) + return generate_samples(stats.binom.rvs, n=n, p=p, + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -130,11 +119,8 @@ def logp(self, value): return bound( binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value), - 0 <= value, - value <= n, - 0 <= p, - p <= 1, - ) + 0 <= value, value <= n, + 0 <= p, p <= 1) class BetaBinomial(Discrete): @@ -197,7 +183,7 @@ def __init__(self, alpha, beta, n, *args, **kwargs): self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) self.beta = beta = tt.as_tensor_variable(floatX(beta)) self.n = n = tt.as_tensor_variable(intX(n)) - self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8") + self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8') def _random(self, alpha, beta, n, size=None): size = size or 1 @@ -211,11 +197,8 @@ def _random(self, alpha, beta, n, size=None): quotient, remainder = divmod(_p.shape[0], _n.shape[0]) if remainder != 0: - raise TypeError( - "n has a bad size! Was cast to {}, must evenly divide {}".format( - _n.shape[0], _p.shape[0] - ) - ) + raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format( + _n.shape[0], _p.shape[0])) if quotient != 1: _n = np.tile(_n, quotient) samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size) @@ -238,10 +221,11 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size) - return generate_samples( - self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size - ) + alpha, beta, n = \ + draw_values([self.alpha, self.beta, self.n], point=point, size=size) + return generate_samples(self._random, alpha=alpha, beta=beta, n=n, + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -259,15 +243,11 @@ def logp(self, value): """ alpha = self.alpha beta = self.beta - return bound( - binomln(self.n, value) - + betaln(value + alpha, self.n - value + beta) - - betaln(alpha, beta), - value >= 0, - value <= self.n, - alpha > 0, - beta > 0, - ) + return bound(binomln(self.n, value) + + betaln(value + alpha, self.n - value + beta) + - betaln(alpha, beta), + value >= 0, value <= self.n, + alpha > 0, beta > 0) class Bernoulli(Discrete): @@ -313,7 +293,7 @@ class Bernoulli(Discrete): def __init__(self, p=None, logit_p=None, *args, **kwargs): super().__init__(*args, **kwargs) if sum(int(var is None) for var in [p, logit_p]) != 1: - raise ValueError("Specify one of p and logit_p") + raise ValueError('Specify one of p and logit_p') if p is not None: self._is_logit = False self.p = p = tt.as_tensor_variable(floatX(p)) @@ -323,7 +303,7 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs): self.p = tt.nnet.sigmoid(floatX(logit_p)) self._logit_p = tt.as_tensor_variable(logit_p) - self.mode = tt.cast(tround(self.p), "int8") + self.mode = tt.cast(tround(self.p), 'int8') def random(self, point=None, size=None): r""" @@ -343,7 +323,9 @@ def random(self, point=None, size=None): array """ p = draw_values([self.p], point=point, size=size)[0] - return generate_samples(stats.bernoulli.rvs, p, dist_shape=self.shape, size=size) + return generate_samples(stats.bernoulli.rvs, p, + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -365,8 +347,9 @@ def logp(self, value): else: p = self.p return bound( - tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1 - ) + tt.switch(value, tt.log(p), tt.log(1 - p)), + value >= 0, value <= 1, + p >= 0, p <= 1) def _distr_parameters_for_repr(self): return ["p"] @@ -410,9 +393,8 @@ def DiscreteWeibull(q, b, x): Variance :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2` ======== ====================== """ - def __init__(self, q, beta, *args, **kwargs): - super().__init__(*args, defaults=("median",), **kwargs) + super().__init__(*args, defaults=('median',), **kwargs) self.q = q = tt.as_tensor_variable(floatX(q)) self.beta = beta = tt.as_tensor_variable(floatX(beta)) @@ -436,13 +418,10 @@ def logp(self, value): q = self.q beta = self.beta - return bound( - tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), - 0 <= value, - 0 < q, - q < 1, - 0 < beta, - ) + return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), + 0 <= value, + 0 < q, q < 1, + 0 < beta) def _ppf(self, p): r""" @@ -452,12 +431,12 @@ def _ppf(self, p): q = self.q beta = self.beta - return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64") + return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64') def _random(self, q, beta, size=None): p = np.random.uniform(size=size) - return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1 + return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1 def random(self, point=None, size=None): r""" @@ -478,7 +457,9 @@ def random(self, point=None, size=None): """ q, beta = draw_values([self.q, self.beta], point=point, size=size) - return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size) + return generate_samples(self._random, q, beta, + dist_shape=self.shape, + size=size) class Poisson(Discrete): @@ -548,7 +529,9 @@ def random(self, point=None, size=None): array """ mu = draw_values([self.mu], point=point, size=size)[0] - return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size) + return generate_samples(stats.poisson.rvs, mu, + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -565,9 +548,12 @@ def logp(self, value): TensorVariable """ mu = self.mu - log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0) + log_prob = bound( + logpow(mu, value) - factln(value) - mu, + mu >= 0, value >= 0) # Return zero when mu and value are both zero - return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob) + return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), + 0, log_prob) class NegativeBinomial(Discrete): @@ -644,12 +630,14 @@ def random(self, point=None, size=None): array """ mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size) - g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size) + g = generate_samples(self._random, mu=mu, alpha=alpha, + dist_shape=self.shape, + size=size) g[g == 0] = np.finfo(float).eps # Just in case return np.asarray(stats.poisson.rvs(g)).reshape(g.shape) def _random(self, mu, alpha, size): - r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's + r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's parametrization to scipy.gamma. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -676,17 +664,15 @@ def logp(self, value): """ mu = self.mu alpha = self.alpha - negbinom = bound( - binomln(value + alpha - 1, value) - + logpow(mu / (mu + alpha), value) - + logpow(alpha / (mu + alpha), alpha), - value >= 0, - mu > 0, - alpha > 0, - ) + negbinom = bound(binomln(value + alpha - 1, value) + + logpow(mu / (mu + alpha), value) + + logpow(alpha / (mu + alpha), alpha), + value >= 0, mu > 0, alpha > 0) # Return Poisson when alpha gets very large. - return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom) + return tt.switch(tt.gt(alpha, 1e10), + Poisson.dist(self.mu).logp(value), + negbinom) class Geometric(Discrete): @@ -749,7 +735,9 @@ def random(self, point=None, size=None): array """ p = draw_values([self.p], point=point, size=size)[0] - return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size) + return generate_samples(np.random.geometric, p, + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -766,7 +754,8 @@ def logp(self, value): TensorVariable """ p = self.p - return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) + return bound(tt.log(p) + logpow(1 - p, value - 1), + 0 <= p, p <= 1, value >= 1) class DiscreteUniform(Discrete): @@ -812,7 +801,8 @@ def __init__(self, lower, upper, *args, **kwargs): super().__init__(*args, **kwargs) self.lower = intX(tt.floor(lower)) self.upper = intX(tt.floor(upper)) - self.mode = tt.maximum(intX(tt.floor((upper + lower) / 2.0)), self.lower) + self.mode = tt.maximum( + intX(tt.floor((upper + lower) / 2.)), self.lower) def _random(self, lower, upper, size=None): # This way seems to be the only to deal with lower and upper @@ -838,7 +828,10 @@ def random(self, point=None, size=None): array """ lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size) + return generate_samples(self._random, + lower, upper, + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -856,7 +849,8 @@ def logp(self, value): """ upper = self.upper lower = self.lower - return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper) + return bound(-tt.log(upper - lower + 1), + lower <= value, value <= upper) class Categorical(Discrete): @@ -929,13 +923,11 @@ def random(self, point=None, size=None): p, k = draw_values([self.p, self.k], point=point, size=size) p = p / np.sum(p, axis=-1, keepdims=True) - return generate_samples( - random_choice, - p=p, - broadcast_shape=p.shape[:-1] or (1,), - dist_shape=self.shape, - size=size, - ) + return generate_samples(random_choice, + p=p, + broadcast_shape=p.shape[:-1] or (1,), + dist_shape=self.shape, + size=size) def logp(self, value): r""" @@ -961,9 +953,13 @@ def logp(self, value): if p.ndim > 1: if p.ndim > value_clip.ndim: - value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim) + value_clip = tt.shape_padleft( + value_clip, p_.ndim - value_clip.ndim + ) elif p.ndim < value_clip.ndim: - p = tt.shape_padleft(p, value_clip.ndim - p_.ndim) + p = tt.shape_padleft( + p, value_clip.ndim - p_.ndim + ) pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) a = tt.log( take_along_axis( @@ -974,9 +970,8 @@ def logp(self, value): else: a = tt.log(p[value_clip]) - return bound( - a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1) - ) + return bound(a, value >= 0, value <= (k - 1), + tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) class Constant(Discrete): @@ -990,10 +985,8 @@ class Constant(Discrete): """ def __init__(self, c, *args, **kwargs): - warnings.warn( - "Constant has been deprecated. We recommend using a Deterministic object instead.", - DeprecationWarning, - ) + warnings.warn("Constant has been deprecated. We recommend using a Deterministic object instead.", + DeprecationWarning) super().__init__(*args, **kwargs) self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c) @@ -1020,7 +1013,8 @@ def random(self, point=None, size=None): def _random(c, dtype=dtype, size=None): return np.full(size, fill_value=c, dtype=dtype) - return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype) + return generate_samples(_random, c=c, dist_shape=self.shape, + size=size).astype(dtype) def logp(self, value): r""" @@ -1118,7 +1112,9 @@ def random(self, point=None, size=None): array """ theta, psi = draw_values([self.theta, self.psi], point=point, size=size) - g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size) + g = generate_samples(stats.poisson.rvs, theta, + dist_shape=self.shape, + size=size) g, psi = broadcast_distribution_samples([g, psi], size=size) return g * (np.random.random(g.shape) < psi) @@ -1142,10 +1138,13 @@ def logp(self, value): logp_val = tt.switch( tt.gt(value, 0), tt.log(psi) + self.pois.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) - theta), - ) + logaddexp(tt.log1p(-psi), tt.log(psi) - theta)) - return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta) + return bound( + logp_val, + 0 <= value, + 0 <= psi, psi <= 1, + 0 <= theta) class ZeroInflatedBinomial(Discrete): @@ -1225,7 +1224,9 @@ def random(self, point=None, size=None): array """ n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size) - g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size) + g = generate_samples(stats.binom.rvs, n, p, + dist_shape=self.shape, + size=size) g, psi = broadcast_distribution_samples([g, psi], size=size) return g * (np.random.random(g.shape) < psi) @@ -1250,10 +1251,13 @@ def logp(self, value): logp_val = tt.switch( tt.gt(value, 0), tt.log(psi) + self.bin.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)), - ) + logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p))) - return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1) + return bound( + logp_val, + 0 <= value, value <= n, + 0 <= psi, psi <= 1, + 0 <= p, p <= 1) class ZeroInflatedNegativeBinomial(Discrete): @@ -1349,14 +1353,21 @@ def random(self, point=None, size=None): ------- array """ - mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size) - g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size) + mu, alpha, psi = draw_values( + [self.mu, self.alpha, self.psi], point=point, size=size) + g = generate_samples( + self._random, + mu=mu, + alpha=alpha, + dist_shape=self.shape, + size=size + ) g[g == 0] = np.finfo(float).eps # Just in case g, psi = broadcast_distribution_samples([g, psi], size=size) return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi) def _random(self, mu, alpha, size): - r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's + r""" Wrapper around stats.gamma.rvs that converts NegativeBinomial's parametrization to scipy.gamma. All parameter arrays should have been broadcasted properly by generate_samples at this point and size is the scipy.rvs representation. @@ -1387,12 +1398,19 @@ def logp(self, value): logp_other = tt.log(psi) + self.nb.logp(value) logp_0 = logaddexp( - tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)) - ) + tt.log1p(-psi), + tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))) - logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0) + logp_val = tt.switch( + tt.gt(value, 0), + logp_other, + logp_0) - return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0) + return bound( + logp_val, + 0 <= value, + 0 <= psi, psi <= 1, + mu > 0, alpha > 0) class OrderedLogistic(Categorical): @@ -1466,14 +1484,11 @@ def __init__(self, eta, cutpoints, *args, **kwargs): self.cutpoints = tt.as_tensor_variable(cutpoints) pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta)) - p_cum = tt.concatenate( - [ - tt.zeros_like(tt.shape_padright(pa[..., 0])), - pa, - tt.ones_like(tt.shape_padright(pa[..., 0])), - ], - axis=-1, - ) + p_cum = tt.concatenate([ + tt.zeros_like(tt.shape_padright(pa[..., 0])), + pa, + tt.ones_like(tt.shape_padright(pa[..., 0])) + ], axis=-1) p = p_cum[..., 1:] - p_cum[..., :-1] super().__init__(p=p, *args, **kwargs) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 01783d35e5..b22ffd7ce4 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -30,7 +30,8 @@ from pymc3.theanof import floatX from . import transforms -from .distribution import Continuous, Discrete, draw_values, generate_samples, _DrawValuesContext +from .distribution import (Continuous, Discrete, draw_values, generate_samples, + _DrawValuesContext) from ..model import Deterministic from .continuous import ChiSquared, Normal from .special import gammaln, multigammaln @@ -39,22 +40,15 @@ from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker -__all__ = [ - "MvNormal", - "MvStudentT", - "Dirichlet", - "Multinomial", - "Wishart", - "WishartBartlett", - "LKJCorr", - "LKJCholeskyCov", - "MatrixNormal", - "KroneckerNormal", -] +__all__ = ['MvNormal', 'MvStudentT', 'Dirichlet', + 'Multinomial', 'Wishart', 'WishartBartlett', + 'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal', + 'KroneckerNormal'] class _QuadFormBase(Continuous): - def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **kwargs): + def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, + *args, **kwargs): super().__init__(*args, **kwargs) if len(self.shape) > 2: raise ValueError("Only 1 or 2 dimensions are allowed.") @@ -62,40 +56,40 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, ** if chol is not None and not lower: chol = chol.T if len([i for i in [tau, cov, chol] if i is not None]) != 1: - raise ValueError( - "Incompatible parameterization. " "Specify exactly one of tau, cov, " "or chol." - ) + raise ValueError('Incompatible parameterization. ' + 'Specify exactly one of tau, cov, ' + 'or chol.') self.mu = mu = tt.as_tensor_variable(mu) self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") # Step methods and advi do not catch LinAlgErrors at the # moment. We work around that by using a cholesky op # that returns a nan as first entry instead of raising # an error. - cholesky = Cholesky(lower=True, on_error="nan") + cholesky = Cholesky(lower=True, on_error='nan') if cov is not None: self.k = cov.shape[0] - self._cov_type = "cov" + self._cov_type = 'cov' cov = tt.as_tensor_variable(cov) if cov.ndim != 2: - raise ValueError("cov must be two dimensional.") + raise ValueError('cov must be two dimensional.') self.chol_cov = cholesky(cov) self.cov = cov self._n = self.cov.shape[-1] elif tau is not None: self.k = tau.shape[0] - self._cov_type = "tau" + self._cov_type = 'tau' tau = tt.as_tensor_variable(tau) if tau.ndim != 2: - raise ValueError("tau must be two dimensional.") + raise ValueError('tau must be two dimensional.') self.chol_tau = cholesky(tau) self.tau = tau self._n = self.tau.shape[-1] else: self.k = chol.shape[0] - self._cov_type = "chol" + self._cov_type = 'chol' if chol.ndim != 2: - raise ValueError("chol must be two dimensional.") + raise ValueError('chol must be two dimensional.') self.chol_cov = tt.as_tensor_variable(chol) self._n = self.chol_cov.shape[-1] @@ -103,7 +97,7 @@ def _quaddist(self, value): """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma.""" mu = self.mu if value.ndim > 2 or value.ndim == 0: - raise ValueError("Invalid dimension for value: %s" % value.ndim) + raise ValueError('Invalid dimension for value: %s' % value.ndim) if value.ndim == 1: onedim = True value = value[None, :] @@ -112,11 +106,11 @@ def _quaddist(self, value): delta = value - mu - if self._cov_type == "cov": + if self._cov_type == 'cov': # Use this when Theano#5908 is released. # return MvNormalLogp()(self.cov, delta) dist, logdet, ok = self._quaddist_cov(delta) - elif self._cov_type == "tau": + elif self._cov_type == 'tau': dist, logdet, ok = self._quaddist_tau(delta) else: dist, logdet, ok = self._quaddist_chol(delta) @@ -228,7 +222,8 @@ class MvNormal(_QuadFormBase): vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T) """ - def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs): + def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, + *args, **kwargs): super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs) self.mean = self.median = self.mode = self.mu = self.mu @@ -258,35 +253,40 @@ def random(self, point=None, size=None): except TypeError: size = (size,) - if self._cov_type == "cov": + if self._cov_type == 'cov': mu, cov = draw_values([self.mu, self.cov], point=point, size=size) if mu.shape[-1] != cov.shape[-1]: raise ValueError("Shapes for mu and cov don't match") try: - dist = stats.multivariate_normal(mean=mu, cov=cov, allow_singular=True) + dist = stats.multivariate_normal( + mean=mu, cov=cov, allow_singular=True) except ValueError: size += (mu.shape[-1],) return np.nan * np.zeros(size) return dist.rvs(size) - elif self._cov_type == "chol": - mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size) + elif self._cov_type == 'chol': + mu, chol = draw_values([self.mu, self.chol_cov], + point=point, size=size) if size and mu.ndim == len(size) and mu.shape == size: mu = mu[..., np.newaxis] if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1: raise ValueError("Shapes for mu and chol don't match") - broadcast_shape = np.broadcast(np.empty(mu.shape[:-1]), np.empty(chol.shape[:-2])).shape + broadcast_shape = ( + np.broadcast(np.empty(mu.shape[:-1]), + np.empty(chol.shape[:-2])).shape + ) mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],)) chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:]) # If mu and chol were fixed by the point, only the standard normal # should change - if mu.shape[: len(size)] != size: + if mu.shape[:len(size)] != size: std_norm_shape = size + mu.shape else: std_norm_shape = mu.shape standard_normal = np.random.standard_normal(std_norm_shape) - return mu + np.einsum("...ij,...j->...i", chol, standard_normal) + return mu + np.einsum('...ij,...j->...i', chol, standard_normal) else: mu, tau = draw_values([self.mu, self.tau], point=point, size=size) if mu.shape[-1] != tau[0].shape[-1]: @@ -299,7 +299,8 @@ def random(self, point=None, size=None): return np.nan * np.zeros(size) standard_normal = np.random.standard_normal(size) - transformed = linalg.solve_triangular(chol, standard_normal.T, lower=True) + transformed = linalg.solve_triangular( + chol, standard_normal.T, lower=True) return mu + transformed.T def logp(self, value): @@ -318,7 +319,7 @@ def logp(self, value): """ quaddist, logdet, ok = self._quaddist(value) k = floatX(value.shape[-1]) - norm = -0.5 * k * pm.floatX(np.log(2 * np.pi)) + norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi)) return bound(norm - 0.5 * quaddist - logdet, ok) def _distr_parameters_for_repr(self): @@ -366,12 +367,11 @@ class MvStudentT(_QuadFormBase): Whether the cholesky fatcor is given as a lower triangular matrix. """ - def __init__( - self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, lower=True, *args, **kwargs - ): + def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, + lower=True, *args, **kwargs): if Sigma is not None: if cov is not None: - raise ValueError("Specify only one of cov and Sigma") + raise ValueError('Specify only one of cov and Sigma') cov = Sigma super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs) self.nu = nu = tt.as_tensor_variable(nu) @@ -396,14 +396,14 @@ def random(self, point=None, size=None): """ with _DrawValuesContext(): nu, mu = draw_values([self.nu, self.mu], point=point, size=size) - if self._cov_type == "cov": - (cov,) = draw_values([self.cov], point=point, size=size) + if self._cov_type == 'cov': + cov, = draw_values([self.cov], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov) - elif self._cov_type == "tau": - (tau,) = draw_values([self.tau], point=point, size=size) + elif self._cov_type == 'tau': + tau, = draw_values([self.tau], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau) else: - (chol,) = draw_values([self.chol_cov], point=point, size=size) + chol, = draw_values([self.chol_cov], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol) samples = dist.random(point, size) @@ -428,12 +428,10 @@ def logp(self, value): quaddist, logdet, ok = self._quaddist(value) k = floatX(value.shape[-1]) - norm = ( - gammaln((self.nu + k) / 2.0) - - gammaln(self.nu / 2.0) - - 0.5 * k * floatX(np.log(self.nu * np.pi)) - ) - inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu) + norm = (gammaln((self.nu + k) / 2.) + - gammaln(self.nu / 2.) + - 0.5 * k * floatX(np.log(self.nu * np.pi))) + inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu) return bound(norm + inner - logdet, ok) def _distr_parameters_for_repr(self): @@ -464,19 +462,20 @@ class Dirichlet(Continuous): Concentration parameters (a > 0). """ - def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): + def __init__(self, a, transform=transforms.stick_breaking, + *args, **kwargs): - if kwargs.get("shape") is None: + if kwargs.get('shape') is None: warnings.warn( ( "Shape not explicitly set. " "Please, set the value using the `shape` keyword argument. " "Using the test value to infer the shape." ), - DeprecationWarning, + DeprecationWarning ) try: - kwargs["shape"] = np.shape(get_test_value(a)) + kwargs['shape'] = np.shape(get_test_value(a)) except AttributeError: pass @@ -486,13 +485,15 @@ def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): self.a = a = tt.as_tensor_variable(a) self.mean = a / tt.sum(a) - self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan) + self.mode = tt.switch(tt.all(a > 1), + (a - 1) / tt.sum(a - 1), + np.nan) def _random(self, a, size=None): gen = stats.dirichlet.rvs shape = tuple(np.atleast_1d(self.shape)) - if size[-len(shape) :] == shape: - real_size = size[: -len(shape)] + if size[-len(shape):] == shape: + real_size = size[:-len(shape)] else: real_size = size if self.size_prefix: @@ -527,7 +528,10 @@ def random(self, point=None, size=None): array """ a = draw_values([self.a], point=point, size=size)[0] - samples = generate_samples(self._random, a=a, dist_shape=self.shape, size=size) + samples = generate_samples(self._random, + a=a, + dist_shape=self.shape, + size=size) return samples def logp(self, value): @@ -547,14 +551,11 @@ def logp(self, value): a = self.a # only defined for sum(value) == 1 - return bound( - tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)), - tt.all(value >= 0), - tt.all(value <= 1), - np.logical_not(a.broadcastable), - tt.all(a > 0), - broadcast_conditions=False, - ) + return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + + gammaln(tt.sum(a, axis=-1)), + tt.all(value >= 0), tt.all(value <= 1), + np.logical_not(a.broadcastable), tt.all(a > 0), + broadcast_conditions=False) def _distr_parameters_for_repr(self): return ["a"] @@ -597,7 +598,7 @@ def __init__(self, n, p, *args, **kwargs): super().__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) - n = np.squeeze(n) # works also if n is a tensor + n = np.squeeze(n) # works also if n is a tensor if len(self.shape) > 1: self.n = tt.shape_padright(n) @@ -611,16 +612,17 @@ def __init__(self, n, p, *args, **kwargs): self.p = tt.as_tensor_variable(p) self.mean = self.n * self.p - mode = tt.cast(tt.round(self.mean), "int32") + mode = tt.cast(tt.round(self.mean), 'int32') diff = self.n - tt.sum(mode, axis=-1, keepdims=True) inc_bool_arr = tt.abs_(diff) > 0 - mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) + mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], + diff[inc_bool_arr.nonzero()]) self.mode = mode def _random(self, n, p, size=None, raw_size=None): original_dtype = p.dtype # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317) - p = p.astype("float64") + p = p.astype('float64') # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals p /= np.sum(p, axis=-1, keepdims=True) @@ -633,19 +635,23 @@ def _random(self, n, p, size=None, raw_size=None): # np.random.multinomial needs `n` to be a scalar int and `p` a # sequence so we semi flatten them and iterate over them size_ = to_tuple(raw_size) - if p.ndim > len(size_) and p.shape[: len(size_)] == size_: + if p.ndim > len(size_) and p.shape[:len(size_)] == size_: # p and n have the size_ prepend so we don't need it in np.random n_ = n.reshape([-1]) p_ = p.reshape([-1, p.shape[-1]]) - samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)]) + samples = np.array([ + np.random.multinomial(nn, pp) + for nn, pp in zip(n_, p_) + ]) samples = samples.reshape(p.shape) else: # p and n don't have the size prepend n_ = n.reshape([-1]) p_ = p.reshape([-1, p.shape[-1]]) - samples = np.array( - [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)] - ) + samples = np.array([ + np.random.multinomial(nn, pp, size=size_) + for nn, pp in zip(n_, p_) + ]) samples = np.moveaxis(samples, 0, -1) samples = samples.reshape(size + p.shape) # We cast back to the original dtype @@ -669,14 +675,10 @@ def random(self, point=None, size=None): array """ n, p = draw_values([self.n, self.p], point=point, size=size) - samples = generate_samples( - self._random, - n, - p, - dist_shape=self.shape, - not_broadcast_kwargs={"raw_size": size}, - size=size, - ) + samples = generate_samples(self._random, n, p, + dist_shape=self.shape, + not_broadcast_kwargs={'raw_size': size}, + size=size) return samples def logp(self, x): @@ -703,7 +705,7 @@ def logp(self, x): tt.all(p <= 1), tt.all(tt.eq(tt.sum(p, axis=-1), 1)), tt.all(tt.ge(n, 0)), - broadcast_conditions=False, + broadcast_conditions=False ) @@ -729,7 +731,7 @@ class PosDefMatrix(theano.Op): def make_node(self, x): x = tt.as_tensor_variable(x) assert x.ndim == 2 - o = tt.TensorType(dtype="int8", broadcastable=[])() + o = tt.TensorType(dtype='int8', broadcastable=[])() return theano.Apply(self, [x], [o]) # Python implementation: @@ -738,22 +740,21 @@ def perform(self, node, inputs, outputs): (x,) = inputs (z,) = outputs try: - z[0] = np.array(posdef(x), dtype="int8") + z[0] = np.array(posdef(x), dtype='int8') except Exception: - pm._log.exception("Failed to check if %s positive definite", x) + pm._log.exception('Failed to check if %s positive definite', x) raise def infer_shape(self, node, shapes): return [[]] def grad(self, inp, grads): - (x,) = inp + x, = inp return [x.zeros_like(theano.config.floatX)] def __str__(self): return "MatrixIsPositiveDefinite" - matrix_pos_def = PosDefMatrix() @@ -796,20 +797,20 @@ class Wishart(Continuous): def __init__(self, nu, V, *args, **kwargs): super().__init__(*args, **kwargs) - warnings.warn( - "The Wishart distribution can currently not be used " - "for MCMC sampling. The probability of sampling a " - "symmetric matrix is basically zero. Instead, please " - "use LKJCholeskyCov or LKJCorr. For more information " - "on the issues surrounding the Wishart see here: " - "https://github.com/pymc-devs/pymc3/issues/538.", - UserWarning, - ) + warnings.warn('The Wishart distribution can currently not be used ' + 'for MCMC sampling. The probability of sampling a ' + 'symmetric matrix is basically zero. Instead, please ' + 'use LKJCholeskyCov or LKJCorr. For more information ' + 'on the issues surrounding the Wishart see here: ' + 'https://github.com/pymc-devs/pymc3/issues/538.', + UserWarning) self.nu = nu = tt.as_tensor_variable(nu) self.p = p = tt.as_tensor_variable(V.shape[0]) self.V = V = tt.as_tensor_variable(V) self.mean = nu * V - self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan) + self.mode = tt.switch(tt.ge(nu, p + 1), + (nu - p - 1) * V, + np.nan) def random(self, point=None, size=None): """ @@ -829,8 +830,9 @@ def random(self, point=None, size=None): array """ nu, V = draw_values([self.nu, self.V], point=point, size=size) - size = 1 if size is None else size - return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, broadcast_shape=(size,)) + size= 1 if size is None else size + return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, + broadcast_shape=(size,)) def logp(self, X): """ @@ -853,19 +855,14 @@ def logp(self, X): IVI = det(V) IXI = det(X) - return bound( - ( - (nu - p - 1) * tt.log(IXI) - - trace(matrix_inverse(V).dot(X)) - - nu * p * tt.log(2) - - nu * tt.log(IVI) - - 2 * multigammaln(nu / 2.0, p) - ) - / 2, - matrix_pos_def(X), - tt.eq(X, X.T), - nu > (p - 1), - broadcast_conditions=False, + return bound(((nu - p - 1) * tt.log(IXI) + - trace(matrix_inverse(V).dot(X)) + - nu * p * tt.log(2) - nu * tt.log(IVI) + - 2 * multigammaln(nu / 2., p)) / 2, + matrix_pos_def(X), + tt.eq(X, X.T), + nu > (p - 1), + broadcast_conditions=False ) @@ -927,18 +924,17 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv # Inverse transform testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T)) testval = linalg.cholesky(testval, lower=True) - diag_testval = testval[diag_idx] ** 2 + diag_testval = testval[diag_idx]**2 tril_testval = testval[tril_idx] else: diag_testval = None tril_testval = None - c = tt.sqrt( - ChiSquared("%s_c" % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval) - ) - pm._log.info("Added new variable %s_c to model diagonal of Wishart." % name) - z = Normal("%s_z" % name, 0.0, 1.0, shape=n_tril, testval=tril_testval) - pm._log.info("Added new variable %s_z to model off-diagonals of Wishart." % name) + c = tt.sqrt(ChiSquared('%s_c' % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, + testval=diag_testval)) + pm._log.info('Added new variable %s_c to model diagonal of Wishart.' % name) + z = Normal('%s_z' % name, 0., 1., shape=n_tril, testval=tril_testval) + pm._log.info('Added new variable %s_z to model off-diagonals of Wishart.' % name) # Construct A matrix A = tt.zeros(S.shape, dtype=np.float32) A = tt.set_subtensor(A[diag_idx], c) @@ -953,24 +949,20 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv def _lkj_normalizing_constant(eta, n): if eta == 1: - result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum() + result = gammaln(2. * tt.arange(1, int((n - 1) / 2) + 1)).sum() if n % 2 == 1: - result += ( - 0.25 * (n ** 2 - 1) * tt.log(np.pi) - - 0.25 * (n - 1) ** 2 * tt.log(2.0) - - (n - 1) * gammaln(int((n + 1) / 2)) - ) + result += (0.25 * (n ** 2 - 1) * tt.log(np.pi) + - 0.25 * (n - 1) ** 2 * tt.log(2.) + - (n - 1) * gammaln(int((n + 1) / 2))) else: - result += ( - 0.25 * n * (n - 2) * tt.log(np.pi) - + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0) - + n * gammaln(n / 2) - - (n - 1) * gammaln(n) - ) + result += (0.25 * n * (n - 2) * tt.log(np.pi) + + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.) + + n * gammaln(n / 2) - (n - 1) * gammaln(n)) else: result = -(n - 1) * gammaln(eta + 0.5 * (n - 1)) k = tt.arange(1, n) - result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum() + result += (0.5 * k * tt.log(np.pi) + + gammaln(eta + 0.5 * (n - 1 - k))).sum() return result @@ -978,25 +970,24 @@ class _LKJCholeskyCov(Continuous): R"""Underlying class for covariance matrix with LKJ distributed correlations. See docs for LKJCholeskyCov function for more details on how to use it in models. """ - def __init__(self, eta, n, sd_dist, *args, **kwargs): self.n = n self.eta = eta - if "transform" in kwargs and kwargs["transform"] is not None: - raise ValueError("Invalid parameter: transform.") - if "shape" in kwargs: - raise ValueError("Invalid parameter: shape.") + if 'transform' in kwargs and kwargs['transform'] is not None: + raise ValueError('Invalid parameter: transform.') + if 'shape' in kwargs: + raise ValueError('Invalid parameter: shape.') shape = n * (n + 1) // 2 if sd_dist.shape.ndim not in [0, 1]: - raise ValueError("Invalid shape for sd_dist.") + raise ValueError('Invalid shape for sd_dist.') transform = transforms.CholeskyCovPacked(n) - kwargs["shape"] = shape - kwargs["transform"] = transform + kwargs['shape'] = shape + kwargs['transform'] = transform super().__init__(*args, **kwargs) self.sd_dist = sd_dist @@ -1026,7 +1017,9 @@ def logp(self, x): cumsum = tt.cumsum(x ** 2) variance = tt.zeros(n) variance = tt.inc_subtensor(variance[0], x[0] ** 2) - variance = tt.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) + variance = tt.inc_subtensor( + variance[1:], + cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) sd_vals = tt.sqrt(variance) logp_sd = self.sd_dist.logp(sd_vals).sum() @@ -1050,40 +1043,40 @@ def _random(self, n, eta, size=1): P = np.eye(n) * np.ones(eta_sample_shape + (n, n)) # original implementation in R see: # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r - beta = eta - 1.0 + n / 2.0 - r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.0 + beta = eta - 1. + n/2. + r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1. P[..., 0, 1] = r12 - P[..., 1, 1] = np.sqrt(1.0 - r12 ** 2) + P[..., 1, 1] = np.sqrt(1. - r12**2) for mp1 in range(2, n): beta -= 0.5 - y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=eta_sample_shape) + y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape) z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,)) - z = z / np.sqrt(np.einsum("ij,ij->j", z, z)) + z = z / np.sqrt(np.einsum('ij,ij->j', z, z)) P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z - P[..., mp1, mp1] = np.sqrt(1.0 - y) - C = np.einsum("...ji,...jk->...ik", P, P) + P[..., mp1, mp1] = np.sqrt(1. - y) + C = np.einsum('...ji,...jk->...ik', P, P) D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2])) if D.shape in [tuple(), (1,)]: D = self.sd_dist.random(size=P.shape[:-1]) elif D.ndim < C.ndim - 1: - D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)] + D = [D] + [self.sd_dist.random(size=P.shape[:-2]) + for _ in range(n - 1)] D = np.moveaxis(np.array(D), 0, C.ndim - 2) elif D.ndim == C.ndim - 1: if D.shape[-1] == 1: - D = [D] + [self.sd_dist.random(size=P.shape[:-2]) for _ in range(n - 1)] + D = [D] + [self.sd_dist.random(size=P.shape[:-2]) + for _ in range(n - 1)] D = np.concatenate(D, axis=-1) elif D.shape[-1] != n: - raise ValueError( - "The size of the samples drawn from the " - "supplied sd_dist.random have the wrong " - "size. Expected {} but got {} instead.".format(n, D.shape[-1]) - ) + raise ValueError('The size of the samples drawn from the ' + 'supplied sd_dist.random have the wrong ' + 'size. Expected {} but got {} instead.'. + format(n, D.shape[-1])) else: - raise ValueError( - "Supplied sd_dist.random generates samples with " - "too many dimensions. It must yield samples " - "with 0 or 1 dimensions. Got {} instead".format(D.ndim - C.ndim - 2) - ) + raise ValueError('Supplied sd_dist.random generates samples with ' + 'too many dimensions. It must yield samples ' + 'with 0 or 1 dimensions. Got {} instead'. + format(D.ndim - C.ndim - 2)) C *= D[..., :, np.newaxis] * D[..., np.newaxis, :] tril_idx = np.tril_indices(n, k=0) return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]] @@ -1112,7 +1105,7 @@ def random(self, point=None, size=None): # We can only handle cov matrices with a constant n per random call n = np.unique(n) if len(n) > 1: - raise RuntimeError("Varying n is not supported for LKJCholeskyCov") + raise RuntimeError('Varying n is not supported for LKJCholeskyCov') n = int(n[0]) dist_shape = ((n * (n + 1)) // 2,) # We make sure that eta and the drawn n get their shapes broadcasted @@ -1129,10 +1122,10 @@ def random(self, point=None, size=None): size = None elif size == broadcast_shape: size = None - elif size[-len(sample_shape) :] == sample_shape: - size = size[: len(size) - len(sample_shape)] - elif size[-len(broadcast_shape) :] == broadcast_shape: - size = size[: len(size) - len(broadcast_shape)] + elif size[-len(sample_shape):] == sample_shape: + size = size[:len(size) - len(sample_shape)] + elif size[-len(broadcast_shape):] == broadcast_shape: + size = size[:len(size) - len(broadcast_shape)] # We will always provide _random with an integer size and then reshape # the output to get the correct size if size is not None: @@ -1147,7 +1140,9 @@ def random(self, point=None, size=None): return samples -def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs): +def LKJCholeskyCov( + name, eta, n, sd_dist, compute_corr=False, store_in_trace=True, *args, **kwargs +): R"""Wrapper function for covariance matrix with LKJ distributed correlations. This defines a distribution over Cholesky decomposed covariance @@ -1342,14 +1337,12 @@ class LKJCorr(Continuous): 100(9), pp.1989-2001. """ - def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwargs): + def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs): if (p is not None) and (n is not None) and (eta is None): - warnings.warn( - "Parameters to LKJCorr have changed: shape parameter n -> eta " - "dimension parameter p -> n. Please update your code. " - "Automatically re-assigning parameters for backwards compatibility.", - DeprecationWarning, - ) + warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta ' + 'dimension parameter p -> n. Please update your code. ' + 'Automatically re-assigning parameters for backwards compatibility.', + DeprecationWarning) self.n = p self.eta = n eta = self.eta @@ -1358,24 +1351,20 @@ def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwar self.n = n self.eta = eta else: - raise ValueError( - "Invalid parameter: please use eta as the shape parameter and " - "n as the dimension parameter." - ) + raise ValueError('Invalid parameter: please use eta as the shape parameter and ' + 'n as the dimension parameter.') shape = n * (n - 1) // 2 self.mean = floatX(np.zeros(shape)) - if transform == "interval": + if transform == 'interval': transform = transforms.interval(-1, 1) super().__init__(shape=shape, transform=transform, *args, **kwargs) - warnings.warn( - "Parameters in LKJCorr have been rename: shape parameter n -> eta " - "dimension parameter p -> n. Please double check your initialization.", - DeprecationWarning, - ) - self.tri_index = np.zeros([n, n], dtype="int32") + warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta ' + 'dimension parameter p -> n. Please double check your initialization.', + DeprecationWarning) + self.tri_index = np.zeros([n, n], dtype='int32') self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape) self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape) @@ -1383,19 +1372,19 @@ def _random(self, n, eta, size=None): size = size if isinstance(size, tuple) else (size,) # original implementation in R see: # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r - beta = eta - 1.0 + n / 2.0 - r12 = 2.0 * stats.beta.rvs(a=beta, b=beta, size=size) - 1.0 + beta = eta - 1. + n/2. + r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=size) - 1. P = np.eye(n)[:, :, np.newaxis] * np.ones(size) P[0, 1] = r12 - P[1, 1] = np.sqrt(1.0 - r12 ** 2) + P[1, 1] = np.sqrt(1. - r12**2) for mp1 in range(2, n): beta -= 0.5 - y = stats.beta.rvs(a=mp1 / 2.0, b=beta, size=size) - z = stats.norm.rvs(loc=0, scale=1, size=(mp1,) + size) - z = z / np.sqrt(np.einsum("ij,ij->j", z, z)) + y = stats.beta.rvs(a=mp1 / 2., b=beta, size=size) + z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + size) + z = z / np.sqrt(np.einsum('ij,ij->j', z, z)) P[0:mp1, mp1] = np.sqrt(y) * z - P[mp1, mp1] = np.sqrt(1.0 - y) - C = np.einsum("ji...,jk...->...ik", P, P) + P[mp1, mp1] = np.sqrt(1. - y) + C = np.einsum('ji...,jk...->...ik', P, P) triu_idx = np.triu_indices(n, k=1) return C[..., triu_idx[0], triu_idx[1]] @@ -1417,8 +1406,9 @@ def random(self, point=None, size=None): array """ n, eta = draw_values([self.n, self.eta], point=point, size=size) - size = 1 if size is None else size - samples = generate_samples(self._random, n, eta, broadcast_shape=(size,)) + size= 1 if size is None else size + samples = generate_samples(self._random, n, eta, + broadcast_shape=(size,)) return samples def logp(self, x): @@ -1442,14 +1432,12 @@ def logp(self, x): X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) - result += (eta - 1.0) * tt.log(det(X)) - return bound( - result, - tt.all(X <= 1), - tt.all(X >= -1), - matrix_pos_def(X), - eta > 0, - broadcast_conditions=False, + result += (eta - 1.) * tt.log(det(X)) + return bound(result, + tt.all(X <= 1), tt.all(X >= -1), + matrix_pos_def(X), + eta > 0, + broadcast_conditions=False ) @@ -1542,22 +1530,12 @@ class MatrixNormal(Continuous): observed=data, shape=(m, n)) """ - def __init__( - self, - mu=0, - rowcov=None, - rowchol=None, - rowtau=None, - colcov=None, - colchol=None, - coltau=None, - shape=None, - *args, - **kwargs, - ): + def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None, + colcov=None, colchol=None, coltau=None, shape=None, *args, + **kwargs): self._setup_matrices(colcov, colchol, coltau, rowcov, rowchol, rowtau) if shape is None: - raise TypeError("shape is a required argument") + raise TypeError('shape is a required argument') assert len(shape) == 2, "shape must have length 2: mxn" self.shape = shape super().__init__(shape=shape, *args, **kwargs) @@ -1567,68 +1545,64 @@ def __init__( self.solve_upper = tt.slinalg.solve_upper_triangular def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): - cholesky = Cholesky(lower=True, on_error="raise") + cholesky = Cholesky(lower=True, on_error='raise') # Among-row matrices if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1: - raise ValueError( - "Incompatible parameterization. " - "Specify exactly one of rowtau, rowcov, " - "or rowchol." - ) + raise ValueError('Incompatible parameterization. ' + 'Specify exactly one of rowtau, rowcov, ' + 'or rowchol.') if rowcov is not None: self.m = rowcov.shape[0] - self._rowcov_type = "cov" + self._rowcov_type = 'cov' rowcov = tt.as_tensor_variable(rowcov) if rowcov.ndim != 2: - raise ValueError("rowcov must be two dimensional.") + raise ValueError('rowcov must be two dimensional.') self.rowchol_cov = cholesky(rowcov) self.rowcov = rowcov elif rowtau is not None: - raise ValueError("rowtau not supported at this time") + raise ValueError('rowtau not supported at this time') self.m = rowtau.shape[0] - self._rowcov_type = "tau" + self._rowcov_type = 'tau' rowtau = tt.as_tensor_variable(rowtau) if rowtau.ndim != 2: - raise ValueError("rowtau must be two dimensional.") + raise ValueError('rowtau must be two dimensional.') self.rowchol_tau = cholesky(rowtau) self.rowtau = rowtau else: self.m = rowchol.shape[0] - self._rowcov_type = "chol" + self._rowcov_type = 'chol' if rowchol.ndim != 2: - raise ValueError("rowchol must be two dimensional.") + raise ValueError('rowchol must be two dimensional.') self.rowchol_cov = tt.as_tensor_variable(rowchol) # Among-column matrices if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1: - raise ValueError( - "Incompatible parameterization. " - "Specify exactly one of coltau, colcov, " - "or colchol." - ) + raise ValueError('Incompatible parameterization. ' + 'Specify exactly one of coltau, colcov, ' + 'or colchol.') if colcov is not None: self.n = colcov.shape[0] - self._colcov_type = "cov" + self._colcov_type = 'cov' colcov = tt.as_tensor_variable(colcov) if colcov.ndim != 2: - raise ValueError("colcov must be two dimensional.") + raise ValueError('colcov must be two dimensional.') self.colchol_cov = cholesky(colcov) self.colcov = colcov elif coltau is not None: - raise ValueError("coltau not supported at this time") + raise ValueError('coltau not supported at this time') self.n = coltau.shape[0] - self._colcov_type = "tau" + self._colcov_type = 'tau' coltau = tt.as_tensor_variable(coltau) if coltau.ndim != 2: - raise ValueError("coltau must be two dimensional.") + raise ValueError('coltau must be two dimensional.') self.colchol_tau = cholesky(coltau) self.coltau = coltau else: self.n = colchol.shape[0] - self._colcov_type = "chol" + self._colcov_type = 'chol' if colchol.ndim != 2: - raise ValueError("colchol must be two dimensional.") + raise ValueError('colchol must be two dimensional.') self.colchol_cov = tt.as_tensor_variable(colchol) def random(self, point=None, size=None): @@ -1649,8 +1623,9 @@ def random(self, point=None, size=None): array """ mu, colchol, rowchol = draw_values( - [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size - ) + [self.mu, self.colchol_cov, self.rowchol_cov], + point=point, + size=size) if size is None: size = () if size in (None, ()): @@ -1665,15 +1640,14 @@ def random(self, point=None, size=None): samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))) else: for j in range(np.prod(size)): - standard_normal = np.random.standard_normal( - (self.shape[0], colchol[j].shape[-1]) - ) - samples.append( - mu[j] + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)) - ) + standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1])) + samples.append(mu[j] + + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T))) samples = np.array(samples).reshape(size + tuple(self.shape)) return samples + + def _trquaddist(self, value): """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and the logdet of colcov and rowcov.""" @@ -1712,8 +1686,8 @@ def logp(self, value): trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value) m = self.m n = self.n - norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi)) - return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet + norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi)) + return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet class KroneckerNormal(Continuous): @@ -1805,23 +1779,24 @@ class KroneckerNormal(Continuous): .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models" """ - def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs): + def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, + *args, **kwargs): self._setup(covs, chols, evds, sigma) super().__init__(*args, **kwargs) self.mu = tt.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu def _setup(self, covs, chols, evds, sigma): - self.cholesky = Cholesky(lower=True, on_error="raise") + self.cholesky = Cholesky(lower=True, on_error='raise') if len([i for i in [covs, chols, evds] if i is not None]) != 1: - raise ValueError( - "Incompatible parameterization. " "Specify exactly one of covs, chols, " "or evds." - ) + raise ValueError('Incompatible parameterization. ' + 'Specify exactly one of covs, chols, ' + 'or evds.') self._isEVD = False self.sigma = sigma self.is_noisy = self.sigma is not None and self.sigma != 0 if covs is not None: - self._cov_type = "cov" + self._cov_type = 'cov' self.covs = covs if self.is_noisy: # Noise requires eigendecomposition @@ -1831,10 +1806,11 @@ def _setup(self, covs, chols, evds, sigma): # Otherwise use cholesky as usual self.chols = list(map(self.cholesky, self.covs)) self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) - self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols]) + self.sizes = tt.as_tensor_variable( + [chol.shape[0] for chol in self.chols]) self.N = tt.prod(self.sizes) elif chols is not None: - self._cov_type = "chol" + self._cov_type = 'chol' if self.is_noisy: # A strange case... # Noise requires eigendecomposition covs = [tt.dot(chol, chol.T) for chol in chols] @@ -1843,10 +1819,11 @@ def _setup(self, covs, chols, evds, sigma): else: self.chols = chols self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) - self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols]) + self.sizes = tt.as_tensor_variable( + [chol.shape[0] for chol in self.chols]) self.N = tt.prod(self.sizes) else: - self._cov_type = "evd" + self._cov_type = 'evd' self._setup_evd(evds) def _setup_evd(self, eigh_iterable): @@ -1858,18 +1835,18 @@ def _setup_evd(self, eigh_iterable): self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep)) self.eigs = kron_diag(*self.eigs_sep) # Combine separate eigs if self.is_noisy: - self.eigs += self.sigma ** 2 + self.eigs += self.sigma**2 self.N = self.eigs.shape[0] def _setup_random(self): - if not hasattr(self, "mv_params"): - self.mv_params = {"mu": self.mu} - if self._cov_type == "cov": + if not hasattr(self, 'mv_params'): + self.mv_params = {'mu': self.mu} + if self._cov_type == 'cov': cov = kronecker(*self.covs) if self.is_noisy: - cov = cov + self.sigma ** 2 * tt.identity_like(cov) - self.mv_params["cov"] = cov - elif self._cov_type == "chol": + cov = cov + self.sigma**2 * tt.identity_like(cov) + self.mv_params['cov'] = cov + elif self._cov_type == 'chol': if self.is_noisy: covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): @@ -1877,19 +1854,19 @@ def _setup_random(self): covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma ** 2 * tt.identity_like(cov) - self.mv_params["chol"] = self.cholesky(cov) + cov = cov + self.sigma**2 * tt.identity_like(cov) + self.mv_params['chol'] = self.cholesky(cov) else: - self.mv_params["chol"] = kronecker(*self.chols) - elif self._cov_type == "evd": + self.mv_params['chol'] = kronecker(*self.chols) + elif self._cov_type == 'evd': covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma ** 2 * tt.identity_like(cov) - self.mv_params["cov"] = cov + cov = cov + self.sigma**2 * tt.identity_like(cov) + self.mv_params['cov'] = cov def random(self, point=None, size=None): """ @@ -1917,7 +1894,7 @@ def random(self, point=None, size=None): def _quaddist(self, value): """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))""" if value.ndim > 2 or value.ndim == 0: - raise ValueError("Invalid dimension for value: %s" % value.ndim) + raise ValueError('Invalid dimension for value: %s' % value.ndim) if value.ndim == 1: onedim = True value = value[None, :] @@ -1927,14 +1904,14 @@ def _quaddist(self, value): delta = value - self.mu if self._isEVD: sqrt_quad = kron_dot(self.QTs, delta.T) - sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None]) + sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None]) logdet = tt.sum(tt.log(self.eigs)) else: sqrt_quad = kron_solve_lower(self.chols, delta.T) logdet = 0 for chol_size, chol_diag in zip(self.sizes, self.chol_diags): - logchol = tt.log(chol_diag) * self.N / chol_size - logdet += tt.sum(2 * logchol) + logchol = tt.log(chol_diag) * self.N/chol_size + logdet += tt.sum(2*logchol) # Square each sample quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T) if onedim: @@ -1956,4 +1933,4 @@ def logp(self, value): TensorVariable """ quad, logdet = self._quaddist(value) - return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0 + return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0 From 66f4885cb922be078783b5336ae987a28bef8c6f Mon Sep 17 00:00:00 2001 From: Ali Septiandri Date: Mon, 28 Sep 2020 11:41:58 +0100 Subject: [PATCH 7/7] Fix error in quadpotential.py --- pymc3/step_methods/hmc/quadpotential.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 8750fd6484..c55f12a365 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -684,10 +684,3 @@ def random(self): def energy(self, x): """Compute kinetic energy at a position in parameter space.""" return 0.5 * x.T.dot(self.velocity(x)) - n = self.factor.solve_Lt(n) - n = self.factor.apply_Pt(n) - return n - - def energy(self, x): - """Compute kinetic energy at a position in parameter space.""" - return 0.5 * x.T.dot(self.velocity(x))