diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index fbc2772655..f9d46986ad 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -113,9 +113,13 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01
   - Added partial dependence plots and individual conditional expectation plots [5091](https://github.com/pymc-devs/pymc3/pull/5091).
   - Modify how particle weights are computed. This improves accuracy of the modeled function (see [5177](https://github.com/pymc-devs/pymc3/pull/5177)).
   - Improve sampling, increase default number of particles [5229](https://github.com/pymc-devs/pymc3/pull/5229).
-- `pm.Data` now passes additional kwargs to `aesara.shared`. [#5098](https://github.com/pymc-devs/pymc/pull/5098)
 - The new `pm.find_constrained_prior` function can be used to find optimized prior parameters of a distribution under some
   constraints (e.g lower and upper bound). See [#5231](https://github.com/pymc-devs/pymc/pull/5231).
+- New features for `pm.Data` containers:
+  - With `pm.Data(..., mutable=True/False)`, or by using `pm.MutableData` vs. `pm.ConstantData` one can now create `TensorConstant` data variables. They can be more performant and compatible in situations where a variable doesn't need to be changed via `pm.set_data()`. See [#5295](https://github.com/pymc-devs/pymc/pull/5295).
+  - New named dimensions can be introduced to the model via `pm.Data(..., dims=...)`. For mutable data variables (see above) the lengths of these dimensions are symbolic, so they can be re-sized via `pm.set_data()`.
+  - `pm.Data` now passes additional kwargs to `aesara.shared`/`at.as_tensor`. [#5098](https://github.com/pymc-devs/pymc/pull/5098).
+- ...
 
 
 ### Internal changes
diff --git a/pymc/backends/arviz.py b/pymc/backends/arviz.py
index 4f94636d0f..be37b296c9 100644
--- a/pymc/backends/arviz.py
+++ b/pymc/backends/arviz.py
@@ -454,7 +454,7 @@ def constant_data_to_xarray(self):
         """Convert constant data to xarray."""
         # For constant data, we are concerned only with deterministics and
         # data.  The constant data vars must be either pm.Data
-        # (TensorSharedVariable) or pm.Deterministic
+        # (TensorConstant/SharedVariable) or pm.Deterministic
         constant_data_vars = {}  # type: Dict[str, Var]
 
         def is_data(name, var) -> bool:
diff --git a/pymc/data.py b/pymc/data.py
index 804595831b..472cd5343a 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -17,18 +17,21 @@
 import os
 import pkgutil
 import urllib.request
+import warnings
 
 from copy import copy
-from typing import Any, Dict, List, Sequence
+from typing import Any, Dict, List, Optional, Sequence, Union
 
 import aesara
 import aesara.tensor as at
 import numpy as np
 import pandas as pd
 
+from aesara.compile.sharedvalue import SharedVariable
 from aesara.graph.basic import Apply
 from aesara.tensor.type import TensorType
-from aesara.tensor.var import TensorVariable
+from aesara.tensor.var import TensorConstant, TensorVariable
+from packaging import version
 
 import pymc as pm
 
@@ -40,6 +43,8 @@
     "Minibatch",
     "align_minibatches",
     "Data",
+    "ConstantData",
+    "MutableData",
 ]
 BASE_URL = "https://raw.githubusercontent.com/pymc-devs/pymc-examples/main/examples/data/{filename}"
 
@@ -463,9 +468,103 @@ def align_minibatches(batches=None):
                 rng.seed()
 
 
-class Data:
-    """Data container class that wraps :func:`aesara.shared` and lets
-    the model be aware of its inputs and outputs.
+def determine_coords(model, value, dims: Optional[Sequence[str]] = None) -> Dict[str, Sequence]:
+    """Determines coordinate values from data or the model (via ``dims``)."""
+    coords = {}
+
+    # If value is a df or a series, we interpret the index as coords:
+    if isinstance(value, (pd.Series, pd.DataFrame)):
+        dim_name = None
+        if dims is not None:
+            dim_name = dims[0]
+        if dim_name is None and value.index.name is not None:
+            dim_name = value.index.name
+        if dim_name is not None:
+            coords[dim_name] = value.index
+
+    # If value is a df, we also interpret the columns as coords:
+    if isinstance(value, pd.DataFrame):
+        dim_name = None
+        if dims is not None:
+            dim_name = dims[1]
+        if dim_name is None and value.columns.name is not None:
+            dim_name = value.columns.name
+        if dim_name is not None:
+            coords[dim_name] = value.columns
+
+    if isinstance(value, np.ndarray) and dims is not None:
+        if len(dims) != value.ndim:
+            raise pm.exceptions.ShapeError(
+                "Invalid data shape. The rank of the dataset must match the " "length of `dims`.",
+                actual=value.shape,
+                expected=value.ndim,
+            )
+        for size, dim in zip(value.shape, dims):
+            coord = model.coords.get(dim, None)
+            if coord is None:
+                coords[dim] = pd.RangeIndex(size, name=dim)
+
+    return coords
+
+
+def ConstantData(
+    name: str,
+    value,
+    *,
+    dims: Optional[Sequence[str]] = None,
+    export_index_as_coords=False,
+    **kwargs,
+) -> TensorConstant:
+    """Alias for ``pm.Data(..., mutable=False)``.
+
+    Registers the ``value`` as a ``TensorConstant`` with the model.
+    """
+    return Data(
+        name,
+        value,
+        dims=dims,
+        export_index_as_coords=export_index_as_coords,
+        mutable=False,
+        **kwargs,
+    )
+
+
+def MutableData(
+    name: str,
+    value,
+    *,
+    dims: Optional[Sequence[str]] = None,
+    export_index_as_coords=False,
+    **kwargs,
+) -> SharedVariable:
+    """Alias for ``pm.Data(..., mutable=True)``.
+
+    Registers the ``value`` as a ``SharedVariable`` with the model.
+    """
+    return Data(
+        name,
+        value,
+        dims=dims,
+        export_index_as_coords=export_index_as_coords,
+        mutable=True,
+        **kwargs,
+    )
+
+
+def Data(
+    name: str,
+    value,
+    *,
+    dims: Optional[Sequence[str]] = None,
+    export_index_as_coords=False,
+    mutable: Optional[bool] = None,
+    **kwargs,
+) -> Union[SharedVariable, TensorConstant]:
+    """Data container that registers a data variable with the model.
+
+    Depending on the ``mutable`` setting (default: True), the variable
+    is registered as a ``SharedVariable``, enabling it to be altered
+    in value and shape, but NOT in dimensionality using ``pm.set_data()``.
 
     Parameters
     ----------
@@ -473,6 +572,11 @@ class Data:
         The name for this variable
     value: {List, np.ndarray, pd.Series, pd.Dataframe}
         A value to associate with this variable
+    mutable : bool, optional
+        Switches between creating a ``SharedVariable`` (``mutable=True``, default)
+        vs. creating a ``TensorConstant`` (``mutable=False``).
+        Consider using ``pm.ConstantData`` or ``pm.MutableData`` as less verbose
+        alternatives to ``pm.Data(..., mutable=...)``.
     dims: {str, tuple of str}, optional, default=None
         Dimension names of the random variables (as opposed to the shapes of these
         random variables). Use this when `value` is a pandas Series or DataFrame. The
@@ -495,7 +599,7 @@ class Data:
     >>> observed_data = [mu + np.random.randn(20) for mu in true_mu]
 
     >>> with pm.Model() as model:
-    ...     data = pm.Data('data', observed_data[0])
+    ...     data = pm.MutableData('data', observed_data[0])
     ...     mu = pm.Normal('mu', 0, 10)
     ...     pm.Normal('y', mu=mu, sigma=1, observed=data)
 
@@ -513,104 +617,58 @@ class Data:
     For more information, take a look at this example notebook
     https://docs.pymc.io/notebooks/data_container.html
     """
+    if isinstance(value, list):
+        value = np.array(value)
 
-    def __new__(
-        self,
-        name,
-        value,
-        *,
-        dims=None,
-        export_index_as_coords=False,
-        **kwargs,
-    ):
-        if isinstance(value, list):
-            value = np.array(value)
-
-        # Add data container to the named variables of the model.
-        try:
-            model = pm.Model.get_context()
-        except TypeError:
-            raise TypeError(
-                "No model on context stack, which is needed to instantiate a data container. "
-                "Add variable inside a 'with model:' block."
-            )
-        name = model.name_for(name)
-
-        # `pandas_to_array` takes care of parameter `value` and
-        # transforms it to something digestible for pymc
-        shared_object = aesara.shared(pandas_to_array(value), name, **kwargs)
-
-        if isinstance(dims, str):
-            dims = (dims,)
-        if not (dims is None or len(dims) == shared_object.ndim):
-            raise pm.exceptions.ShapeError(
-                "Length of `dims` must match the dimensions of the dataset.",
-                actual=len(dims),
-                expected=shared_object.ndim,
+    # Add data container to the named variables of the model.
+    try:
+        model = pm.Model.get_context()
+    except TypeError:
+        raise TypeError(
+            "No model on context stack, which is needed to instantiate a data container. "
+            "Add variable inside a 'with model:' block."
+        )
+    name = model.name_for(name)
+
+    # `pandas_to_array` takes care of parameter `value` and
+    # transforms it to something digestible for Aesara.
+    arr = pandas_to_array(value)
+
+    if mutable is None:
+        current = version.Version(pm.__version__)
+        mutable = current.major == 4 and current.minor < 1
+        if mutable:
+            warnings.warn(
+                "The `mutable` kwarg was not specified. Currently it defaults to `pm.Data(mutable=True)`,"
+                " which is equivalent to using `pm.MutableData()`."
+                " In v4.1.0 the default will change to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`."
+                " Set `pm.Data(..., mutable=False/True)`, or use `pm.ConstantData`/`pm.MutableData`.",
+                FutureWarning,
             )
-
-        coords = self.set_coords(model, value, dims)
-
-        if export_index_as_coords:
-            model.add_coords(coords)
-        elif dims:
-            # Register new dimension lengths
-            for d, dname in enumerate(dims):
-                if not dname in model.dim_lengths:
-                    model.add_coord(dname, values=None, length=shared_object.shape[d])
-
-        # To draw the node for this variable in the graphviz Digraph we need
-        # its shape.
-        # XXX: This needs to be refactored
-        # shared_object.dshape = tuple(shared_object.shape.eval())
-        # if dims is not None:
-        #     shape_dims = model.shape_from_dims(dims)
-        #     if shared_object.dshape != shape_dims:
-        #         raise pm.exceptions.ShapeError(
-        #             "Data shape does not match with specified `dims`.",
-        #             actual=shared_object.dshape,
-        #             expected=shape_dims,
-        #         )
-
-        model.add_random_variable(shared_object, dims=dims)
-
-        return shared_object
-
-    @staticmethod
-    def set_coords(model, value, dims=None) -> Dict[str, Sequence]:
-        coords = {}
-
-        # If value is a df or a series, we interpret the index as coords:
-        if isinstance(value, (pd.Series, pd.DataFrame)):
-            dim_name = None
-            if dims is not None:
-                dim_name = dims[0]
-            if dim_name is None and value.index.name is not None:
-                dim_name = value.index.name
-            if dim_name is not None:
-                coords[dim_name] = value.index
-
-        # If value is a df, we also interpret the columns as coords:
-        if isinstance(value, pd.DataFrame):
-            dim_name = None
-            if dims is not None:
-                dim_name = dims[1]
-            if dim_name is None and value.columns.name is not None:
-                dim_name = value.columns.name
-            if dim_name is not None:
-                coords[dim_name] = value.columns
-
-        if isinstance(value, np.ndarray) and dims is not None:
-            if len(dims) != value.ndim:
-                raise pm.exceptions.ShapeError(
-                    "Invalid data shape. The rank of the dataset must match the "
-                    "length of `dims`.",
-                    actual=value.shape,
-                    expected=value.ndim,
-                )
-            for size, dim in zip(value.shape, dims):
-                coord = model.coords.get(dim, None)
-                if coord is None:
-                    coords[dim] = pd.RangeIndex(size, name=dim)
-
-        return coords
+    if mutable:
+        x = aesara.shared(arr, name, **kwargs)
+    else:
+        x = at.as_tensor_variable(arr, name, **kwargs)
+
+    if isinstance(dims, str):
+        dims = (dims,)
+    if not (dims is None or len(dims) == x.ndim):
+        raise pm.exceptions.ShapeError(
+            "Length of `dims` must match the dimensions of the dataset.",
+            actual=len(dims),
+            expected=x.ndim,
+        )
+
+    coords = determine_coords(model, value, dims)
+
+    if export_index_as_coords:
+        model.add_coords(coords)
+    elif dims:
+        # Register new dimension lengths
+        for d, dname in enumerate(dims):
+            if not dname in model.dim_lengths:
+                model.add_coord(dname, values=None, length=x.shape[d])
+
+    model.add_random_variable(x, dims=dims)
+
+    return x
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 0b928f229d..f0f031ff02 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -1938,6 +1938,13 @@ def dist(cls, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs)
         # mean = median = mode = mu
         return super().dist([mu, sigma, *covs], **kwargs)
 
+    def get_moment(rv, size, mu, covs, chols, evds):
+        mean = mu
+        if not rv_size_is_none(size):
+            moment_size = at.concatenate([size, mu.shape])
+            mean = at.full(moment_size, mu)
+        return mean
+
     def logp(value, mu, sigma, *covs):
         """
         Calculate log-probability of Multivariate Normal distribution
diff --git a/pymc/model.py b/pymc/model.py
index 8ec776d8b8..fabf608f47 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -1114,7 +1114,7 @@ def set_data(
     ):
         """Changes the values of a data variable in the model.
 
-        In contrast to pm.Data().set_value, this method can also
+        In contrast to pm.MutableData().set_value, this method can also
         update the corresponding coordinates.
 
         Parameters
@@ -1131,7 +1131,8 @@ def set_data(
         shared_object = self[name]
         if not isinstance(shared_object, SharedVariable):
             raise TypeError(
-                f"The variable `{name}` must be a `SharedVariable` (e.g. `pymc.Data`) to allow updating. "
+                f"The variable `{name}` must be a `SharedVariable`"
+                " (created through `pm.MutableData()` or `pm.Data(mutable=True)`) to allow updating. "
                 f"The current type is: {type(shared_object)}"
             )
 
@@ -1156,7 +1157,7 @@ def set_data(
             length_changed = new_length != old_length
 
             # Reject resizing if we already know that it would create shape problems.
-            # NOTE: If there are multiple pm.Data containers sharing this dim, but the user only
+            # NOTE: If there are multiple pm.MutableData containers sharing this dim, but the user only
             #       changes the values for one of them, they will run into shape problems nonetheless.
             length_belongs_to = length_tensor.owner.inputs[0].owner.inputs[0]
             if not isinstance(length_belongs_to, SharedVariable) and length_changed:
@@ -1735,8 +1736,8 @@ def set_data(new_data, model=None):
 
         >>> import pymc as pm
         >>> with pm.Model() as model:
-        ...     x = pm.Data('x', [1., 2., 3.])
-        ...     y = pm.Data('y', [1., 2., 3.])
+        ...     x = pm.MutableData('x', [1., 2., 3.])
+        ...     y = pm.MutableData('y', [1., 2., 3.])
         ...     beta = pm.Normal('beta', 0, 1)
         ...     obs = pm.Normal('obs', x * beta, 1, observed=y)
         ...     idata = pm.sample(1000, tune=1000)
diff --git a/pymc/model_graph.py b/pymc/model_graph.py
index a0fd44a9e3..bfb9134c11 100644
--- a/pymc/model_graph.py
+++ b/pymc/model_graph.py
@@ -20,7 +20,7 @@
 from aesara.compile.sharedvalue import SharedVariable
 from aesara.graph.basic import walk
 from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.var import TensorVariable
+from aesara.tensor.var import TensorConstant, TensorVariable
 
 import pymc as pm
 
@@ -133,10 +133,14 @@ def _make_node(self, var_name, graph, *, formatting: str = "plain"):
             shape = "octagon"
             style = "filled"
             label = f"{var_name}\n~\nPotential"
+        elif isinstance(v, TensorConstant):
+            shape = "box"
+            style = "rounded, filled"
+            label = f"{var_name}\n~\nConstantData"
         elif isinstance(v, SharedVariable):
             shape = "box"
             style = "rounded, filled"
-            label = f"{var_name}\n~\nData"
+            label = f"{var_name}\n~\nMutableData"
         elif v.owner and isinstance(v.owner.op, RandomVariable):
             shape = "ellipse"
             if hasattr(v.tag, "observations"):
diff --git a/pymc/sampling.py b/pymc/sampling.py
index f3c269bdce..6a01e9ed7c 100644
--- a/pymc/sampling.py
+++ b/pymc/sampling.py
@@ -42,6 +42,7 @@
 import xarray
 
 from aesara.compile.mode import Mode
+from aesara.graph.basic import Constant
 from aesara.tensor.sharedvar import SharedVariable
 from arviz import InferenceData
 from fastprogress.fastprogress import progress_bar
@@ -1728,7 +1729,7 @@ def sample_posterior_predictive(
             for rv in walk_model(vars_to_sample, walk_past_rvs=True)
             if rv not in vars_to_sample
             and rv in model.named_vars.values()
-            and not isinstance(rv, SharedVariable)
+            and not isinstance(rv, (Constant, SharedVariable))
         ]
         if inputs_and_names:
             inputs, input_names = zip(*inputs_and_names)
@@ -1739,7 +1740,7 @@ def sample_posterior_predictive(
         input_names = [
             n
             for n in _trace.varnames
-            if n not in output_names and not isinstance(model[n], SharedVariable)
+            if n not in output_names and not isinstance(model[n], (Constant, SharedVariable))
         ]
         inputs = [model[n] for n in input_names]
 
@@ -2067,7 +2068,7 @@ def sample_prior_predictive(
             names.append(rv_var.name)
             vars_to_sample.append(rv_var)
 
-    inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)]
+    inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, (Constant, SharedVariable))]
 
     sampler_fn = compile_pymc(
         inputs, vars_to_sample, allow_input_downcast=True, accept_inplace=True, mode=mode
diff --git a/pymc/tests/test_data_container.py b/pymc/tests/test_data_container.py
index 8a4f2668b8..e8776a04d7 100644
--- a/pymc/tests/test_data_container.py
+++ b/pymc/tests/test_data_container.py
@@ -17,6 +17,7 @@
 import pytest
 
 from aesara import shared
+from aesara.compile.sharedvalue import SharedVariable
 from aesara.tensor.sharedvar import ScalarSharedVariable
 from aesara.tensor.var import TensorVariable
 
@@ -32,7 +33,7 @@ class TestData(SeededTest):
     def test_deterministic(self):
         data_values = np.array([0.5, 0.4, 5, 2])
         with pm.Model() as model:
-            X = pm.Data("X", data_values)
+            X = pm.MutableData("X", data_values)
             pm.Normal("y", 0, 1, observed=X)
             model.logp(model.recompute_initial_point())
 
@@ -43,7 +44,7 @@ def test_sample(self):
         x_pred = np.linspace(-3, 3, 200, dtype="float32")
 
         with pm.Model():
-            x_shared = pm.Data("x_shared", x)
+            x_shared = pm.MutableData("x_shared", x)
             b = pm.Normal("b", 0.0, 10.0)
             pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y)
 
@@ -71,8 +72,8 @@ def test_sample(self):
 
     def test_sample_posterior_predictive_after_set_data(self):
         with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.MutableData("x", [1.0, 2.0, 3.0])
+            y = pm.ConstantData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 10.0)
             pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
             trace = pm.sample(
@@ -95,8 +96,8 @@ def test_sample_posterior_predictive_after_set_data(self):
 
     def test_sample_after_set_data(self):
         with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.MutableData("x", [1.0, 2.0, 3.0])
+            y = pm.MutableData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 10.0)
             pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
             pm.sample(
@@ -131,8 +132,8 @@ def test_shared_data_as_index(self):
         See https://github.com/pymc-devs/pymc/issues/3813
         """
         with pm.Model() as model:
-            index = pm.Data("index", [2, 0, 1, 0, 2])
-            y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0])
+            index = pm.MutableData("index", [2, 0, 1, 0, 2])
+            y = pm.MutableData("y", [1.0, 2.0, 3.0, 2.0, 1.0])
             alpha = pm.Normal("alpha", 0, 1.5, size=3)
             pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y)
 
@@ -163,7 +164,7 @@ def test_shared_data_as_rv_input(self):
         See https://github.com/pymc-devs/pymc/issues/3842
         """
         with pm.Model() as m:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
+            x = pm.MutableData("x", [1.0, 2.0, 3.0])
             y = pm.Normal("y", mu=x, size=(2, 3))
             assert y.eval().shape == (2, 3)
             idata = pm.sample(
@@ -221,7 +222,7 @@ def test_shared_scalar_as_rv_input(self):
 
     def test_creation_of_data_outside_model_context(self):
         with pytest.raises((IndexError, TypeError)) as error:
-            pm.Data("data", [1.1, 2.2, 3.3])
+            pm.ConstantData("data", [1.1, 2.2, 3.3])
         error.match("No model on context stack")
 
     def test_set_data_to_non_data_container_variables(self):
@@ -244,8 +245,8 @@ def test_set_data_to_non_data_container_variables(self):
     @pytest.mark.xfail(reason="Depends on ModelGraph")
     def test_model_to_graphviz_for_model_with_data_container(self):
         with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
+            y = pm.MutableData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 10.0)
             obs_sigma = floatX(np.sqrt(1e-2))
             pm.Normal("obs", beta * x, obs_sigma, observed=y)
@@ -262,12 +263,14 @@ def test_model_to_graphviz_for_model_with_data_container(self):
                 pm.model_to_graphviz(model, formatting=formatting)
 
         exp_without = [
-            'x [label="x\n~\nData" shape=box style="rounded, filled"]',
+            'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]',
+            'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]',
             'beta [label="beta\n~\nNormal"]',
             'obs [label="obs\n~\nNormal" style=filled]',
         ]
         exp_with = [
-            'x [label="x\n~\nData" shape=box style="rounded, filled"]',
+            'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]',
+            'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]',
             'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]',
             f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]',
         ]
@@ -290,7 +293,7 @@ def test_explicit_coords(self):
         }
         # pass coordinates explicitly, use numpy array in Data container
         with pm.Model(coords=coords) as pmodel:
-            pm.Data("observations", data, dims=("rows", "columns"))
+            pm.MutableData("observations", data, dims=("rows", "columns"))
 
         assert "rows" in pmodel.coords
         assert pmodel.coords["rows"] == ("R1", "R2", "R3", "R4", "R5")
@@ -310,7 +313,7 @@ def test_symbolic_coords(self):
         Their lengths are then automatically linked to the corresponding Tensor dimension.
         """
         with pm.Model() as pmodel:
-            intensity = pm.Data("intensity", np.ones((2, 3)), dims=("row", "column"))
+            intensity = pm.MutableData("intensity", np.ones((2, 3)), dims=("row", "column"))
             assert "row" in pmodel.dim_lengths
             assert "column" in pmodel.dim_lengths
             assert isinstance(pmodel.dim_lengths["row"], TensorVariable)
@@ -327,7 +330,7 @@ def test_no_resize_of_implied_dimensions(self):
             # Imply a dimension through RV params
             pm.Normal("n", mu=[1, 2, 3], dims="city")
             # _Use_ the dimension for a data variable
-            inhabitants = pm.Data("inhabitants", [100, 200, 300], dims="city")
+            inhabitants = pm.MutableData("inhabitants", [100, 200, 300], dims="city")
 
             # Attempting to re-size the dimension through the data variable would
             # cause shape problems in InferenceData conversion, because the RV remains (3,).
@@ -343,7 +346,7 @@ def test_implicit_coords_series(self):
             name="sales",
         )
         with pm.Model() as pmodel:
-            pm.Data("sales", ser_sales, dims="date", export_index_as_coords=True)
+            pm.ConstantData("sales", ser_sales, dims="date", export_index_as_coords=True)
 
         assert "date" in pmodel.coords
         assert len(pmodel.coords["date"]) == 22
@@ -360,7 +363,9 @@ def test_implicit_coords_dataframe(self):
 
         # infer coordinates from index and columns of the DataFrame
         with pm.Model() as pmodel:
-            pm.Data("observations", df_data, dims=("rows", "columns"), export_index_as_coords=True)
+            pm.ConstantData(
+                "observations", df_data, dims=("rows", "columns"), export_index_as_coords=True
+            )
 
         assert "rows" in pmodel.coords
         assert "columns" in pmodel.coords
@@ -370,8 +375,8 @@ def test_data_kwargs(self):
         strict_value = True
         allow_downcast_value = False
         with pm.Model():
-            data = pm.Data(
-                "data",
+            data = pm.MutableData(
+                "mdata",
                 value=[[1.0], [2.0], [3.0]],
                 strict=strict_value,
                 allow_downcast=allow_downcast_value,
@@ -379,6 +384,13 @@ def test_data_kwargs(self):
         assert data.container.strict is strict_value
         assert data.container.allow_downcast is allow_downcast_value
 
+    def test_data_mutable_default_warning(self):
+        with pm.Model():
+            with pytest.warns(FutureWarning, match="`mutable` kwarg was not specified"):
+                data = pm.Data("x", [1, 2, 3])
+            assert isinstance(data, SharedVariable)
+        pass
+
 
 def test_data_naming():
     """
@@ -386,7 +398,7 @@ def test_data_naming():
     not given model-relative names.
     """
     with pm.Model("named_model") as model:
-        x = pm.Data("x", [1.0, 2.0, 3.0])
+        x = pm.ConstantData("x", [1.0, 2.0, 3.0])
         y = pm.Normal("y")
     assert y.name == "named_model_y"
     assert x.name == "named_model_x"
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
index 2188a931c4..4a6361504e 100644
--- a/pymc/tests/test_distributions_moments.py
+++ b/pymc/tests/test_distributions_moments.py
@@ -34,6 +34,7 @@
     HyperGeometric,
     Interpolated,
     InverseGamma,
+    KroneckerNormal,
     Kumaraswamy,
     Laplace,
     Logistic,
@@ -110,7 +111,6 @@ def test_all_distributions_have_moments():
         dist_module.discrete.DiscreteWeibull,
         dist_module.multivariate.CAR,
         dist_module.multivariate.DirichletMultinomial,
-        dist_module.multivariate.KroneckerNormal,
         dist_module.multivariate.Wishart,
     }
 
@@ -1316,3 +1316,32 @@ def normal_sim(rng, mu, sigma, size):
     cutoff = st.norm().ppf(1 - (alpha / 2))
 
     assert np.all(np.abs((result - expected_sample_mean) / expected_sample_mean_std) < cutoff)
+
+
+@pytest.mark.parametrize(
+    "mu, covs, size, expected",
+    [
+        (np.ones(1), [np.identity(1), np.identity(1)], None, np.ones(1)),
+        (np.ones(6), [np.identity(2), np.identity(3)], 5, np.ones((5, 6))),
+        (np.zeros(6), [np.identity(2), np.identity(3)], 6, np.zeros((6, 6))),
+        (np.zeros(3), [np.identity(3), np.identity(1)], 6, np.zeros((6, 3))),
+        (
+            np.array([1, 2, 3, 4]),
+            [
+                np.array([[1.0, 0.5], [0.5, 2]]),
+                np.array([[1.0, 0.4], [0.4, 2]]),
+            ],
+            2,
+            np.array(
+                [
+                    [1, 2, 3, 4],
+                    [1, 2, 3, 4],
+                ]
+            ),
+        ),
+    ],
+)
+def test_kronecker_normal_moments(mu, covs, size, expected):
+    with Model() as model:
+        KroneckerNormal("x", mu=mu, covs=covs, size=size)
+    assert_moment_is_expected(model, expected)
diff --git a/pymc/tests/test_idata_conversion.py b/pymc/tests/test_idata_conversion.py
index 2cefe424c3..5dac5412ba 100644
--- a/pymc/tests/test_idata_conversion.py
+++ b/pymc/tests/test_idata_conversion.py
@@ -255,7 +255,7 @@ def test_autodetect_coords_from_model(self, use_context):
             )
 
             data_dims = ("date", "city")
-            data = pm.Data("data", df_data, dims=data_dims)
+            data = pm.ConstantData("data", df_data, dims=data_dims)
             _ = pm.Normal("likelihood", mu=city_temperature, sd=0.5, observed=data, dims=data_dims)
 
             trace = pm.sample(
@@ -281,14 +281,14 @@ def test_autodetect_coords_from_model(self, use_context):
         np.testing.assert_array_equal(idata.observed_data.coords["city"], coords["city"])
 
     def test_ovewrite_model_coords_dims(self):
-        """Check coords and dims from model object can be partially overwrited."""
+        """Check coords and dims from model object can be partially overwritten."""
         dim1 = ["a", "b"]
         new_dim1 = ["c", "d"]
         coords = {"dim1": dim1, "dim2": ["c1", "c2"]}
         x_data = np.arange(4).reshape((2, 2))
         y = x_data + np.random.normal(size=(2, 2))
         with pm.Model(coords=coords):
-            x = pm.Data("x", x_data, dims=("dim1", "dim2"))
+            x = pm.ConstantData("x", x_data, dims=("dim1", "dim2"))
             beta = pm.Normal("beta", 0, 1, dims="dim1")
             _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2"))
             trace = pm.sample(100, tune=100, return_inferencedata=False)
@@ -466,8 +466,8 @@ def test_potential(self):
     def test_constant_data(self, use_context):
         """Test constant_data group behaviour."""
         with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
+            y = pm.MutableData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 1)
             obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
             trace = pm.sample(100, chains=2, tune=100, return_inferencedata=False)
@@ -483,8 +483,8 @@ def test_constant_data(self, use_context):
 
     def test_predictions_constant_data(self):
         with pm.Model():
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
+            y = pm.MutableData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 1)
             obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
             trace = pm.sample(100, tune=100, return_inferencedata=False)
@@ -495,8 +495,8 @@ def test_predictions_constant_data(self):
         assert not fails
 
         with pm.Model():
-            x = pm.Data("x", [1.0, 2.0])
-            y = pm.Data("y", [1.0, 2.0])
+            x = pm.MutableData("x", [1.0, 2.0])
+            y = pm.ConstantData("y", [1.0, 2.0])
             beta = pm.Normal("beta", 0, 1)
             obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
             predictive_trace = pm.sample_posterior_predictive(
@@ -519,8 +519,8 @@ def test_predictions_constant_data(self):
 
     def test_no_trace(self):
         with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
+            y = pm.MutableData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 1)
             obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
             idata = pm.sample(100, tune=100)
@@ -553,8 +553,8 @@ def test_no_trace(self):
     def test_priors_separation(self, use_context):
         """Test model is enough to get prior, prior predictive and observed_data."""
         with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
+            x = pm.MutableData("x", [1.0, 2.0, 3.0])
+            y = pm.ConstantData("y", [1.0, 2.0, 3.0])
             beta = pm.Normal("beta", 0, 1)
             obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
             prior = pm.sample_prior_predictive(return_inferencedata=False)
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index eb86644efc..9fa5dbc827 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -549,7 +549,7 @@ def test_eval_rv_shapes(self):
                 "city": ["Sydney", "Las Vegas", "Düsseldorf"],
             }
         ) as pmodel:
-            pm.Data("budget", [1, 2, 3, 4], dims="year")
+            pm.MutableData("budget", [1, 2, 3, 4], dims="year")
             pm.Normal("untransformed", size=(1, 2))
             pm.Uniform("transformed", size=(7,))
             obs = pm.Uniform("observed", size=(3,), observed=[0.1, 0.2, 0.3])
diff --git a/pymc/tests/test_model_graph.py b/pymc/tests/test_model_graph.py
index 67d2e9af68..404f32647e 100644
--- a/pymc/tests/test_model_graph.py
+++ b/pymc/tests/test_model_graph.py
@@ -44,9 +44,9 @@ def radon_model():
 
         # Anonymous SharedVariables don't show up
         floor_measure = aesara.shared(floor_measure)
-        floor_measure_offset = pm.Data("floor_measure_offset", 1)
+        floor_measure_offset = pm.MutableData("floor_measure_offset", 1)
         y_hat = a + b * floor_measure + floor_measure_offset
-        log_radon = pm.Data("log_radon", np.random.normal(1, 1, size=n_homes))
+        log_radon = pm.MutableData("log_radon", np.random.normal(1, 1, size=n_homes))
         y_like = pm.Normal("y_like", mu=y_hat, sigma=sigma_y, observed=log_radon)
 
     compute_graph = {
@@ -104,13 +104,13 @@ def model_with_dims():
 
         population = pm.HalfNormal("population", sd=5, dims=("city"))
 
-        time = pm.Data("year", [2014, 2015, 2016], dims="year")
+        time = pm.ConstantData("year", [2014, 2015, 2016], dims="year")
 
         n = pm.Deterministic(
             "tax revenue", economics * population[None, :] * time[:, None], dims=("year", "city")
         )
 
-        yobs = pm.Data("observed", np.ones((3, 4)))
+        yobs = pm.MutableData("observed", np.ones((3, 4)))
         L = pm.Normal("L", n, observed=yobs)
 
     compute_graph = {
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
index 7238a41357..123d5c4a77 100644
--- a/pymc/tests/test_sampling.py
+++ b/pymc/tests/test_sampling.py
@@ -955,7 +955,7 @@ def test_ignores_observed(self):
         observed = np.random.normal(10, 1, size=200)
         with pm.Model():
             # Use a prior that's way off to show we're ignoring the observed variables
-            observed_data = pm.Data("observed_data", observed)
+            observed_data = pm.MutableData("observed_data", observed)
             mu = pm.Normal("mu", mu=-100, sigma=1)
             positive_mu = pm.Deterministic("positive_mu", np.abs(mu))
             z = -1 - positive_mu
diff --git a/pymc/tests/test_shape_handling.py b/pymc/tests/test_shape_handling.py
index 7403d99243..45ef7c594e 100644
--- a/pymc/tests/test_shape_handling.py
+++ b/pymc/tests/test_shape_handling.py
@@ -315,7 +315,7 @@ def test_define_dims_on_the_fly(self):
     @pytest.mark.xfail(reason="Simultaneous use of size and dims is not implemented")
     def test_data_defined_size_dimension_can_register_dimname(self):
         with pm.Model() as pmodel:
-            x = pm.Data("x", [[1, 2, 3, 4]], dims=("first", "second"))
+            x = pm.ConstantData("x", [[1, 2, 3, 4]], dims=("first", "second"))
             assert "first" in pmodel.dim_lengths
             assert "second" in pmodel.dim_lengths
             # two dimensions are implied; a "third" dimension is created
@@ -325,7 +325,7 @@ def test_data_defined_size_dimension_can_register_dimname(self):
 
     def test_can_resize_data_defined_size(self):
         with pm.Model() as pmodel:
-            x = pm.Data("x", [[1, 2, 3, 4]], dims=("first", "second"))
+            x = pm.MutableData("x", [[1, 2, 3, 4]], dims=("first", "second"))
             y = pm.Normal("y", mu=0, dims=("first", "second"))
             z = pm.Normal("z", mu=y, observed=np.ones((1, 4)))
             assert x.eval().shape == (1, 4)