Skip to content

Commit

Permalink
Emit FutureWarning when pm.Data(mutable=...) is not specified
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelosthege authored and twiecki committed Jan 4, 2022
1 parent 696d237 commit 600fe90
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 54 deletions.
2 changes: 1 addition & 1 deletion pymc/backends/arviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def constant_data_to_xarray(self):
"""Convert constant data to xarray."""
# For constant data, we are concerned only with deterministics and
# data. The constant data vars must be either pm.Data
# (TensorSharedVariable) or pm.Deterministic
# (TensorConstant/SharedVariable) or pm.Deterministic
constant_data_vars = {} # type: Dict[str, Var]

def is_data(name, var) -> bool:
Expand Down
23 changes: 21 additions & 2 deletions pymc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import pkgutil
import urllib.request
import warnings

from copy import copy
from typing import Any, Dict, List, Optional, Sequence, Union
Expand All @@ -30,6 +31,7 @@
from aesara.graph.basic import Apply
from aesara.tensor.type import TensorType
from aesara.tensor.var import TensorConstant, TensorVariable
from packaging import version

import pymc as pm

Expand Down Expand Up @@ -555,7 +557,7 @@ def Data(
*,
dims: Optional[Sequence[str]] = None,
export_index_as_coords=False,
mutable: bool = True,
mutable: Optional[bool] = None,
**kwargs,
) -> Union[SharedVariable, TensorConstant]:
"""Data container that registers a data variable with the model.
Expand All @@ -570,6 +572,11 @@ def Data(
The name for this variable
value: {List, np.ndarray, pd.Series, pd.Dataframe}
A value to associate with this variable
mutable : bool, optional
Switches between creating a ``SharedVariable`` (``mutable=True``, default)
vs. creating a ``TensorConstant`` (``mutable=False``).
Consider using ``pm.ConstantData`` or ``pm.MutableData`` as less verbose
alternatives to ``pm.Data(..., mutable=...)``.
dims: {str, tuple of str}, optional, default=None
Dimension names of the random variables (as opposed to the shapes of these
random variables). Use this when `value` is a pandas Series or DataFrame. The
Expand All @@ -592,7 +599,7 @@ def Data(
>>> observed_data = [mu + np.random.randn(20) for mu in true_mu]
>>> with pm.Model() as model:
... data = pm.Data('data', observed_data[0])
... data = pm.MutableData('data', observed_data[0])
... mu = pm.Normal('mu', 0, 10)
... pm.Normal('y', mu=mu, sigma=1, observed=data)
Expand Down Expand Up @@ -626,6 +633,18 @@ def Data(
# `pandas_to_array` takes care of parameter `value` and
# transforms it to something digestible for Aesara.
arr = pandas_to_array(value)

if mutable is None:
current = version.Version(pm.__version__)
mutable = current.major == 4 and current.minor < 1
if mutable:
warnings.warn(
"The `mutable` kwarg was not specified. Currently it defaults to `pm.Data(mutable=True)`,"
" which is equivalent to using `pm.MutableData()`."
" In v4.1.0 the default will change to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`."
" Set `pm.Data(..., mutable=False/True)`, or use `pm.ConstantData`/`pm.MutableData`.",
FutureWarning,
)
if mutable:
x = aesara.shared(arr, name, **kwargs)
else:
Expand Down
11 changes: 6 additions & 5 deletions pymc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,7 @@ def set_data(
):
"""Changes the values of a data variable in the model.
In contrast to pm.Data().set_value, this method can also
In contrast to pm.MutableData().set_value, this method can also
update the corresponding coordinates.
Parameters
Expand All @@ -1131,7 +1131,8 @@ def set_data(
shared_object = self[name]
if not isinstance(shared_object, SharedVariable):
raise TypeError(
f"The variable `{name}` must be a `SharedVariable` (e.g. `pymc.Data`) to allow updating. "
f"The variable `{name}` must be a `SharedVariable`"
" (created through `pm.MutableData()` or `pm.Data(mutable=True)`) to allow updating. "
f"The current type is: {type(shared_object)}"
)

Expand All @@ -1156,7 +1157,7 @@ def set_data(
length_changed = new_length != old_length

# Reject resizing if we already know that it would create shape problems.
# NOTE: If there are multiple pm.Data containers sharing this dim, but the user only
# NOTE: If there are multiple pm.MutableData containers sharing this dim, but the user only
# changes the values for one of them, they will run into shape problems nonetheless.
length_belongs_to = length_tensor.owner.inputs[0].owner.inputs[0]
if not isinstance(length_belongs_to, SharedVariable) and length_changed:
Expand Down Expand Up @@ -1735,8 +1736,8 @@ def set_data(new_data, model=None):
>>> import pymc as pm
>>> with pm.Model() as model:
... x = pm.Data('x', [1., 2., 3.])
... y = pm.Data('y', [1., 2., 3.])
... x = pm.MutableData('x', [1., 2., 3.])
... y = pm.MutableData('y', [1., 2., 3.])
... beta = pm.Normal('beta', 0, 1)
... obs = pm.Normal('obs', x * beta, 1, observed=y)
... idata = pm.sample(1000, tune=1000)
Expand Down
8 changes: 6 additions & 2 deletions pymc/model_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,14 @@ def _make_node(self, var_name, graph, *, formatting: str = "plain"):
shape = "octagon"
style = "filled"
label = f"{var_name}\n~\nPotential"
elif isinstance(v, (SharedVariable, TensorConstant)):
elif isinstance(v, TensorConstant):
shape = "box"
style = "rounded, filled"
label = f"{var_name}\n~\nData"
label = f"{var_name}\n~\nConstantData"
elif isinstance(v, SharedVariable):
shape = "box"
style = "rounded, filled"
label = f"{var_name}\n~\nMutableData"
elif v.owner and isinstance(v.owner.op, RandomVariable):
shape = "ellipse"
if hasattr(v.tag, "observations"):
Expand Down
7 changes: 4 additions & 3 deletions pymc/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import xarray

from aesara.compile.mode import Mode
from aesara.graph.basic import Constant
from aesara.tensor.sharedvar import SharedVariable
from arviz import InferenceData
from fastprogress.fastprogress import progress_bar
Expand Down Expand Up @@ -1728,7 +1729,7 @@ def sample_posterior_predictive(
for rv in walk_model(vars_to_sample, walk_past_rvs=True)
if rv not in vars_to_sample
and rv in model.named_vars.values()
and not isinstance(rv, SharedVariable)
and not isinstance(rv, (Constant, SharedVariable))
]
if inputs_and_names:
inputs, input_names = zip(*inputs_and_names)
Expand All @@ -1739,7 +1740,7 @@ def sample_posterior_predictive(
input_names = [
n
for n in _trace.varnames
if n not in output_names and not isinstance(model[n], SharedVariable)
if n not in output_names and not isinstance(model[n], (Constant, SharedVariable))
]
inputs = [model[n] for n in input_names]

Expand Down Expand Up @@ -2067,7 +2068,7 @@ def sample_prior_predictive(
names.append(rv_var.name)
vars_to_sample.append(rv_var)

inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)]
inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, (Constant, SharedVariable))]

sampler_fn = compile_pymc(
inputs, vars_to_sample, allow_input_downcast=True, accept_inplace=True, mode=mode
Expand Down
52 changes: 32 additions & 20 deletions pymc/tests/test_data_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import pytest

from aesara import shared
from aesara.compile.sharedvalue import SharedVariable
from aesara.tensor.sharedvar import ScalarSharedVariable
from aesara.tensor.var import TensorVariable

Expand All @@ -32,7 +33,7 @@ class TestData(SeededTest):
def test_deterministic(self):
data_values = np.array([0.5, 0.4, 5, 2])
with pm.Model() as model:
X = pm.Data("X", data_values)
X = pm.MutableData("X", data_values)
pm.Normal("y", 0, 1, observed=X)
model.logp(model.recompute_initial_point())

Expand All @@ -43,7 +44,7 @@ def test_sample(self):
x_pred = np.linspace(-3, 3, 200, dtype="float32")

with pm.Model():
x_shared = pm.Data("x_shared", x)
x_shared = pm.MutableData("x_shared", x)
b = pm.Normal("b", 0.0, 10.0)
pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y)

Expand Down Expand Up @@ -95,8 +96,8 @@ def test_sample_posterior_predictive_after_set_data(self):

def test_sample_after_set_data(self):
with pm.Model() as model:
x = pm.Data("x", [1.0, 2.0, 3.0])
y = pm.Data("y", [1.0, 2.0, 3.0])
x = pm.MutableData("x", [1.0, 2.0, 3.0])
y = pm.MutableData("y", [1.0, 2.0, 3.0])
beta = pm.Normal("beta", 0, 10.0)
pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
pm.sample(
Expand Down Expand Up @@ -131,8 +132,8 @@ def test_shared_data_as_index(self):
See https://github.com/pymc-devs/pymc/issues/3813
"""
with pm.Model() as model:
index = pm.Data("index", [2, 0, 1, 0, 2])
y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0])
index = pm.MutableData("index", [2, 0, 1, 0, 2])
y = pm.MutableData("y", [1.0, 2.0, 3.0, 2.0, 1.0])
alpha = pm.Normal("alpha", 0, 1.5, size=3)
pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y)

Expand Down Expand Up @@ -163,7 +164,7 @@ def test_shared_data_as_rv_input(self):
See https://github.com/pymc-devs/pymc/issues/3842
"""
with pm.Model() as m:
x = pm.Data("x", [1.0, 2.0, 3.0])
x = pm.MutableData("x", [1.0, 2.0, 3.0])
y = pm.Normal("y", mu=x, size=(2, 3))
assert y.eval().shape == (2, 3)
idata = pm.sample(
Expand Down Expand Up @@ -221,7 +222,7 @@ def test_shared_scalar_as_rv_input(self):

def test_creation_of_data_outside_model_context(self):
with pytest.raises((IndexError, TypeError)) as error:
pm.Data("data", [1.1, 2.2, 3.3])
pm.ConstantData("data", [1.1, 2.2, 3.3])
error.match("No model on context stack")

def test_set_data_to_non_data_container_variables(self):
Expand All @@ -244,8 +245,8 @@ def test_set_data_to_non_data_container_variables(self):
@pytest.mark.xfail(reason="Depends on ModelGraph")
def test_model_to_graphviz_for_model_with_data_container(self):
with pm.Model() as model:
x = pm.Data("x", [1.0, 2.0, 3.0])
y = pm.Data("y", [1.0, 2.0, 3.0])
x = pm.ConstantData("x", [1.0, 2.0, 3.0])
y = pm.MutableData("y", [1.0, 2.0, 3.0])
beta = pm.Normal("beta", 0, 10.0)
obs_sigma = floatX(np.sqrt(1e-2))
pm.Normal("obs", beta * x, obs_sigma, observed=y)
Expand All @@ -262,12 +263,14 @@ def test_model_to_graphviz_for_model_with_data_container(self):
pm.model_to_graphviz(model, formatting=formatting)

exp_without = [
'x [label="x\n~\nData" shape=box style="rounded, filled"]',
'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]',
'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]',
'beta [label="beta\n~\nNormal"]',
'obs [label="obs\n~\nNormal" style=filled]',
]
exp_with = [
'x [label="x\n~\nData" shape=box style="rounded, filled"]',
'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]',
'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]',
'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]',
f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]',
]
Expand All @@ -290,7 +293,7 @@ def test_explicit_coords(self):
}
# pass coordinates explicitly, use numpy array in Data container
with pm.Model(coords=coords) as pmodel:
pm.Data("observations", data, dims=("rows", "columns"))
pm.MutableData("observations", data, dims=("rows", "columns"))

assert "rows" in pmodel.coords
assert pmodel.coords["rows"] == ("R1", "R2", "R3", "R4", "R5")
Expand All @@ -310,7 +313,7 @@ def test_symbolic_coords(self):
Their lengths are then automatically linked to the corresponding Tensor dimension.
"""
with pm.Model() as pmodel:
intensity = pm.Data("intensity", np.ones((2, 3)), dims=("row", "column"))
intensity = pm.MutableData("intensity", np.ones((2, 3)), dims=("row", "column"))
assert "row" in pmodel.dim_lengths
assert "column" in pmodel.dim_lengths
assert isinstance(pmodel.dim_lengths["row"], TensorVariable)
Expand All @@ -327,7 +330,7 @@ def test_no_resize_of_implied_dimensions(self):
# Imply a dimension through RV params
pm.Normal("n", mu=[1, 2, 3], dims="city")
# _Use_ the dimension for a data variable
inhabitants = pm.Data("inhabitants", [100, 200, 300], dims="city")
inhabitants = pm.MutableData("inhabitants", [100, 200, 300], dims="city")

# Attempting to re-size the dimension through the data variable would
# cause shape problems in InferenceData conversion, because the RV remains (3,).
Expand All @@ -343,7 +346,7 @@ def test_implicit_coords_series(self):
name="sales",
)
with pm.Model() as pmodel:
pm.Data("sales", ser_sales, dims="date", export_index_as_coords=True)
pm.ConstantData("sales", ser_sales, dims="date", export_index_as_coords=True)

assert "date" in pmodel.coords
assert len(pmodel.coords["date"]) == 22
Expand All @@ -360,7 +363,9 @@ def test_implicit_coords_dataframe(self):

# infer coordinates from index and columns of the DataFrame
with pm.Model() as pmodel:
pm.Data("observations", df_data, dims=("rows", "columns"), export_index_as_coords=True)
pm.ConstantData(
"observations", df_data, dims=("rows", "columns"), export_index_as_coords=True
)

assert "rows" in pmodel.coords
assert "columns" in pmodel.coords
Expand All @@ -370,23 +375,30 @@ def test_data_kwargs(self):
strict_value = True
allow_downcast_value = False
with pm.Model():
data = pm.Data(
"data",
data = pm.MutableData(
"mdata",
value=[[1.0], [2.0], [3.0]],
strict=strict_value,
allow_downcast=allow_downcast_value,
)
assert data.container.strict is strict_value
assert data.container.allow_downcast is allow_downcast_value

def test_data_mutable_default_warning(self):
with pm.Model():
with pytest.warns(FutureWarning, match="`mutable` kwarg was not specified"):
data = pm.Data("x", [1, 2, 3])
assert isinstance(data, SharedVariable)
pass


def test_data_naming():
"""
This is a test for issue #3793 -- `Data` objects in named models are
not given model-relative names.
"""
with pm.Model("named_model") as model:
x = pm.Data("x", [1.0, 2.0, 3.0])
x = pm.ConstantData("x", [1.0, 2.0, 3.0])
y = pm.Normal("y")
assert y.name == "named_model_y"
assert x.name == "named_model_x"
Loading

0 comments on commit 600fe90

Please sign in to comment.