Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conditional autoregression distribution #4504

Merged
merged 12 commits into from
Mar 9, 2021
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

### New Features
+ `pm.math.cartesian` can now handle inputs that are themselves >1D (see [#4482](https://github.com/pymc-devs/pymc3/pull/4482)).
+ The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models.
+ ...

### Maintenance
Expand Down
1 change: 1 addition & 0 deletions docs/source/api/distributions/multivariate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Multivariate
Multinomial
Dirichlet
DirichletMultinomial
CAR

.. automodule:: pymc3.distributions.multivariate
:members:
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
)
from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
from pymc3.distributions.multivariate import (
CAR,
Dirichlet,
DirichletMultinomial,
KroneckerNormal,
Expand Down Expand Up @@ -184,4 +185,5 @@
"Simulator",
"fast_sample_posterior_predictive",
"BART",
"CAR",
]
125 changes: 125 additions & 0 deletions pymc3/distributions/multivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"LKJCholeskyCov",
"MatrixNormal",
"KroneckerNormal",
"CAR",
]


Expand Down Expand Up @@ -2089,3 +2090,127 @@ def logp(self, value):

def _distr_parameters_for_repr(self):
return ["mu"]


class CAR(Continuous):
R"""
Likelihood for a conditional autoregression. This is a special case of the
multivariate normal with an adjacency-structured covariance matrix.

.. math::

f(x \mid W, \alpha, \tau) =
\frac{|T|^{1/2}}{(2\pi)^{k/2}}
\exp\left\{ -\frac{1}{2} (x-\mu)^{\prime} T^{-1} (x-\mu) \right\}

where :math:`T = (\tau D(I-\alpha W))^{-1}` and :math:`D = diag(\sum_i W_{ij})`.

======== ==========================
Support :math:`x \in \mathbb{R}^k`
Mean :math:`\mu \in \mathbb{R}^k`
Variance :math:`(\tau D(I-\alpha W))^{-1}`
======== ==========================

Parameters
----------
mu: array
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
Real-valued mean vector
W: Numpy matrix
Symmetric adjacency matrix of 1s and 0s indicating
adjacency between elements.
alpha: float or array
Autoregression parameter taking values between -1 and 1. Values closer to 0 indicate weaker
correlation and values closer to 1 indicate higher autocorrelation. For most use cases, the
support of alpha should be restricted to (0, 1)
tau: float or array
Positive precision variable controlling the scale of the underlying normal variates.
sparse: bool, default=False
Determines whether or not sparse computations are used

References
----------
.. Jin, X., Carlin, B., Banerjee, S.
"Generalized Hierarchical Multivariate CAR Models for Areal Data"
Biometrics, Vol. 61, No. 4 (Dec., 2005), pp. 950-961
"""

def __init__(self, mu, W, alpha, tau, sparse=False, *args, **kwargs):
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(*args, **kwargs)

D = W.sum(axis=0)
d, _ = W.shape

self.d = d
self.median = self.mode = self.mean = self.mu = aet.as_tensor_variable(mu)
self.sparse = sparse

if not W.ndim == 2 or not np.allclose(W, W.T):
raise ValueError("W must be a symmetric adjacency matrix.")

if sparse:
W_sparse = scipy.sparse.csr_matrix(W)
self.W = aesara.sparse.as_sparse_variable(W_sparse)
else:
self.W = aet.as_tensor_variable(W)

# eigenvalues of D^−1/2 * W * D^−1/2
Dinv_sqrt = np.diag(1 / np.sqrt(D))
DWD = np.matmul(np.matmul(Dinv_sqrt, W), Dinv_sqrt)
self.lam = scipy.linalg.eigvalsh(DWD)
self.D = aet.as_tensor_variable(D)

tau = aet.as_tensor_variable(tau)
if tau.ndim > 0:
self.tau = tau[:, None]
else:
self.tau = tau

alpha = aet.as_tensor_variable(alpha)
if alpha.ndim > 0:
self.alpha = alpha[:, None]
else:
self.alpha = alpha

def logp(self, value):
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
"""
Calculate log-probability of a CAR-distributed vector
at specified value. This log probability function differs from
the true CAR log density (AKA a multivariate normal with CAR-structured
covariance matrix) by an additive constant.

Parameters
----------
value: array
Value for which log-probability is calculated.

Returns
-------
TensorVariable
"""

if value.ndim == 1:
value = value[None, :]

logtau = self.d * aet.log(self.tau).sum()
logdet = aet.log(1 - self.alpha.T * self.lam[:, None]).sum()
delta = value - self.mu

if self.sparse:
Wdelta = aesara.sparse.dot(delta, self.W)
else:
Wdelta = aet.dot(delta, self.W)

tau_dot_delta = self.D[None, :] * delta - self.alpha * Wdelta
logquad = (self.tau * delta * tau_dot_delta).sum(axis=-1)
return bound(
0.5 * (logtau + logdet - logquad),
aet.all(self.alpha <= 1),
aet.all(self.alpha >= -1),
self.tau > 0,
)

def random(self, point=None, size=None):
raise NotImplementedError("Sampling from a CAR distribution is not supported.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we turn this into a feature request?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I've got a few leads on efficient methods for doing this. We can track that discussion at #4518.


def _distr_parameters_for_repr(self):
return ["mu", "W", "alpha", "tau"]
37 changes: 37 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from pymc3.blocking import DictToVarBijection
from pymc3.distributions import (
AR1,
CAR,
AsymmetricLaplace,
Bernoulli,
Beta,
Expand Down Expand Up @@ -2573,6 +2574,42 @@ def test_orderedlogistic_dimensions(shape):
assert np.allclose(ologp, expected)


@pytest.mark.parametrize("shape", [(4,), (4, 1), (4, 4)], ids=str)
def test_car_logp(shape):
"""
Tests the log probability function for the CAR distribution by checking
against Scipy's multivariate normal logpdf, up to an additive constant.
The formula used by the CAR logp implementation omits several additive terms.
"""
np.random.seed(1)

xs = np.random.randn(*shape)

# d x d adjacency matrix for a square (d=4) of rook-adjacent sites
W = np.array(
[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
)

tau = 2.0
alpha = 0.5
mu = np.zeros(4)

# Compute CAR covariance matrix and resulting MVN logp
D = W.sum(axis=0)
prec = tau * (np.diag(D) - alpha * W)
cov = np.linalg.inv(prec)
scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)

car_logp = CAR.dist(mu, W, alpha, tau, shape=shape).logp(xs).eval()

# Check to make sure that the CAR and MVN log PDFs are equivalent
# up to an additive constant which is independent of the CAR parameters
delta_logp = scipy_logp - car_logp

# Check to make sure all the delta values are identical.
assert np.allclose(delta_logp - delta_logp[0], 0.0)


ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
class TestBugfixes:
@pytest.mark.parametrize(
"dist_cls,kwargs", [(MvNormal, dict(mu=0)), (MvStudentT, dict(mu=0, nu=2))]
Expand Down