Skip to content

Commit

Permalink
Conditional autoregression distribution (pymc-devs#4504)
Browse files Browse the repository at this point in the history
* Add conditional autoregression distribution (CAR)
  • Loading branch information
ckrapu authored and aerubanov committed Mar 26, 2021
1 parent 985d3cd commit 7c46997
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 0 deletions.
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

### New Features
+ `pm.math.cartesian` can now handle inputs that are themselves >1D (see [#4482](https://github.com/pymc-devs/pymc3/pull/4482)).
+ The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models.
+ ...

### Maintenance
Expand Down
1 change: 1 addition & 0 deletions docs/source/api/distributions/multivariate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Multivariate
Multinomial
Dirichlet
DirichletMultinomial
CAR

.. automodule:: pymc3.distributions.multivariate
:members:
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,7 @@ def logpt_sum(*args, **kwargs):
)
from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
from pymc3.distributions.multivariate import (
CAR,
Dirichlet,
DirichletMultinomial,
KroneckerNormal,
Expand Down Expand Up @@ -609,4 +610,5 @@ def logpt_sum(*args, **kwargs):
"Moyal",
"Simulator",
"BART",
"CAR",
]
125 changes: 125 additions & 0 deletions pymc3/distributions/multivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"LKJCholeskyCov",
"MatrixNormal",
"KroneckerNormal",
"CAR",
]

solve_lower = Solve(A_structure="lower_triangular")
Expand Down Expand Up @@ -1937,3 +1938,127 @@ def logp(self, value):

def _distr_parameters_for_repr(self):
return ["mu"]


class CAR(Continuous):
R"""
Likelihood for a conditional autoregression. This is a special case of the
multivariate normal with an adjacency-structured covariance matrix.
.. math::
f(x \mid W, \alpha, \tau) =
\frac{|T|^{1/2}}{(2\pi)^{k/2}}
\exp\left\{ -\frac{1}{2} (x-\mu)^{\prime} T^{-1} (x-\mu) \right\}
where :math:`T = (\tau D(I-\alpha W))^{-1}` and :math:`D = diag(\sum_i W_{ij})`.
======== ==========================
Support :math:`x \in \mathbb{R}^k`
Mean :math:`\mu \in \mathbb{R}^k`
Variance :math:`(\tau D(I-\alpha W))^{-1}`
======== ==========================
Parameters
----------
mu: array
Real-valued mean vector
W: Numpy matrix
Symmetric adjacency matrix of 1s and 0s indicating
adjacency between elements.
alpha: float or array
Autoregression parameter taking values between -1 and 1. Values closer to 0 indicate weaker
correlation and values closer to 1 indicate higher autocorrelation. For most use cases, the
support of alpha should be restricted to (0, 1)
tau: float or array
Positive precision variable controlling the scale of the underlying normal variates.
sparse: bool, default=False
Determines whether or not sparse computations are used
References
----------
.. Jin, X., Carlin, B., Banerjee, S.
"Generalized Hierarchical Multivariate CAR Models for Areal Data"
Biometrics, Vol. 61, No. 4 (Dec., 2005), pp. 950-961
"""

def __init__(self, mu, W, alpha, tau, sparse=False, *args, **kwargs):
super().__init__(*args, **kwargs)

D = W.sum(axis=0)
d, _ = W.shape

self.d = d
self.median = self.mode = self.mean = self.mu = aet.as_tensor_variable(mu)
self.sparse = sparse

if not W.ndim == 2 or not np.allclose(W, W.T):
raise ValueError("W must be a symmetric adjacency matrix.")

if sparse:
W_sparse = scipy.sparse.csr_matrix(W)
self.W = aesara.sparse.as_sparse_variable(W_sparse)
else:
self.W = aet.as_tensor_variable(W)

# eigenvalues of D^−1/2 * W * D^−1/2
Dinv_sqrt = np.diag(1 / np.sqrt(D))
DWD = np.matmul(np.matmul(Dinv_sqrt, W), Dinv_sqrt)
self.lam = scipy.linalg.eigvalsh(DWD)
self.D = aet.as_tensor_variable(D)

tau = aet.as_tensor_variable(tau)
if tau.ndim > 0:
self.tau = tau[:, None]
else:
self.tau = tau

alpha = aet.as_tensor_variable(alpha)
if alpha.ndim > 0:
self.alpha = alpha[:, None]
else:
self.alpha = alpha

def logp(self, value):
"""
Calculate log-probability of a CAR-distributed vector
at specified value. This log probability function differs from
the true CAR log density (AKA a multivariate normal with CAR-structured
covariance matrix) by an additive constant.
Parameters
----------
value: array
Value for which log-probability is calculated.
Returns
-------
TensorVariable
"""

if value.ndim == 1:
value = value[None, :]

logtau = self.d * aet.log(self.tau).sum()
logdet = aet.log(1 - self.alpha.T * self.lam[:, None]).sum()
delta = value - self.mu

if self.sparse:
Wdelta = aesara.sparse.dot(delta, self.W)
else:
Wdelta = aet.dot(delta, self.W)

tau_dot_delta = self.D[None, :] * delta - self.alpha * Wdelta
logquad = (self.tau * delta * tau_dot_delta).sum(axis=-1)
return bound(
0.5 * (logtau + logdet - logquad),
aet.all(self.alpha <= 1),
aet.all(self.alpha >= -1),
self.tau > 0,
)

def random(self, point=None, size=None):
raise NotImplementedError("Sampling from a CAR distribution is not supported.")

def _distr_parameters_for_repr(self):
return ["mu", "W", "alpha", "tau"]
37 changes: 37 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from pymc3.aesaraf import floatX
from pymc3.distributions import (
AR1,
CAR,
AsymmetricLaplace,
Bernoulli,
Beta,
Expand Down Expand Up @@ -2768,6 +2769,42 @@ def test_orderedlogistic_dimensions(shape):
assert np.allclose(ologp, expected)


@pytest.mark.parametrize("shape", [(4,), (4, 1), (4, 4)], ids=str)
def test_car_logp(shape):
"""
Tests the log probability function for the CAR distribution by checking
against Scipy's multivariate normal logpdf, up to an additive constant.
The formula used by the CAR logp implementation omits several additive terms.
"""
np.random.seed(1)

xs = np.random.randn(*shape)

# d x d adjacency matrix for a square (d=4) of rook-adjacent sites
W = np.array(
[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
)

tau = 2.0
alpha = 0.5
mu = np.zeros(4)

# Compute CAR covariance matrix and resulting MVN logp
D = W.sum(axis=0)
prec = tau * (np.diag(D) - alpha * W)
cov = np.linalg.inv(prec)
scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)

car_logp = CAR.dist(mu, W, alpha, tau, shape=shape).logp(xs).eval()

# Check to make sure that the CAR and MVN log PDFs are equivalent
# up to an additive constant which is independent of the CAR parameters
delta_logp = scipy_logp - car_logp

# Check to make sure all the delta values are identical.
assert np.allclose(delta_logp - delta_logp[0], 0.0)


class TestBugfixes:
@pytest.mark.parametrize(
"dist_cls,kwargs", [(MvNormal, dict(mu=0)), (MvStudentT, dict(mu=0, nu=2))]
Expand Down

0 comments on commit 7c46997

Please sign in to comment.