From 4463f43736cc586b552107b18393f091d6be6f3a Mon Sep 17 00:00:00 2001
From: Uri Granta <uri.granta@secondmind.ai>
Date: Mon, 27 Nov 2023 13:13:25 +0000
Subject: [PATCH] Define SupportsPredictY protocol

---
 docs/notebooks/code_overview.pct.py           |  11 +-
 .../test_multifidelity_models.py              |   3 +-
 .../unit/acquisition/function/test_entropy.py |   6 +-
 tests/unit/models/test_interfaces.py          |  20 +---
 tests/util/models/gpflow/models.py            |  11 +-
 trieste/acquisition/function/entropy.py       |  16 ++-
 trieste/acquisition/function/greedy_batch.py  |  24 +++-
 trieste/acquisition/sampler.py                |   6 +-
 trieste/models/gpflow/interface.py            |   2 +
 trieste/models/gpflow/models.py               |   7 +-
 trieste/models/gpflux/interface.py            |   4 +-
 trieste/models/interfaces.py                  | 105 ++++++++++++------
 12 files changed, 136 insertions(+), 79 deletions(-)

diff --git a/docs/notebooks/code_overview.pct.py b/docs/notebooks/code_overview.pct.py
index 85216ce818..3e0ff2a7b0 100644
--- a/docs/notebooks/code_overview.pct.py
+++ b/docs/notebooks/code_overview.pct.py
@@ -198,12 +198,13 @@ class HasGizmoReparamSamplerAndObservationNoise(
     AcquisitionFunction,
     SingleModelAcquisitionBuilder,
 )
+from trieste.models.interfaces import SupportsPredictY
 from trieste.data import Dataset
 
 
-class ProbabilityOfValidity(SingleModelAcquisitionBuilder[ProbabilisticModel]):
+class ProbabilityOfValidity(SingleModelAcquisitionBuilder[SupportsPredictY]):
     def prepare_acquisition_function(
-        self, model: ProbabilisticModel, dataset: Optional[Dataset] = None
+        self, model: SupportsPredictY, dataset: Optional[Dataset] = None
     ) -> AcquisitionFunction:
         def acquisition(at: TensorType) -> TensorType:
             mean, _ = model.predict_y(tf.squeeze(at, -2))
@@ -217,9 +218,9 @@ def acquisition(at: TensorType) -> TensorType:
 
 
 # %%
-class ProbabilityOfValidity2(SingleModelAcquisitionBuilder[ProbabilisticModel]):
+class ProbabilityOfValidity2(SingleModelAcquisitionBuilder[SupportsPredictY]):
     def prepare_acquisition_function(
-        self, model: ProbabilisticModel, dataset: Optional[Dataset] = None
+        self, model: SupportsPredictY, dataset: Optional[Dataset] = None
     ) -> AcquisitionFunction:
         @tf.function
         def acquisition(at: TensorType) -> TensorType:
@@ -231,7 +232,7 @@ def acquisition(at: TensorType) -> TensorType:
     def update_acquisition_function(
         self,
         function: AcquisitionFunction,
-        model: ProbabilisticModel,
+        model: SupportsPredictY,
         dataset: Optional[Dataset] = None,
     ) -> AcquisitionFunction:
         return function  # no need to update anything
diff --git a/tests/integration/models/multifidelity/test_multifidelity_models.py b/tests/integration/models/multifidelity/test_multifidelity_models.py
index 37c011b5b0..8832d82080 100644
--- a/tests/integration/models/multifidelity/test_multifidelity_models.py
+++ b/tests/integration/models/multifidelity/test_multifidelity_models.py
@@ -11,6 +11,7 @@
     check_and_extract_fidelity_query_points,
     split_dataset_by_fidelity,
 )
+from trieste.models import TrainableProbabilisticModel
 from trieste.models.gpflow import GaussianProcessRegression
 from trieste.models.gpflow.builders import (
     build_gpr,
@@ -119,7 +120,7 @@ def test_multifidelity_nonlinear_autoregressive_results_better_than_linear() ->
     observer = mk_observer(noisy_nonlinear_multifidelity)
     initial_data = observer(initial_sample)
 
-    nonlinear_model = MultifidelityNonlinearAutoregressive(
+    nonlinear_model: TrainableProbabilisticModel = MultifidelityNonlinearAutoregressive(
         build_multifidelity_nonlinear_autoregressive_models(
             initial_data, n_fidelities, input_search_space
         )
diff --git a/tests/unit/acquisition/function/test_entropy.py b/tests/unit/acquisition/function/test_entropy.py
index ba860a2211..f922d51252 100644
--- a/tests/unit/acquisition/function/test_entropy.py
+++ b/tests/unit/acquisition/function/test_entropy.py
@@ -36,6 +36,7 @@
     MinValueEntropySearch,
     MUMBOModelType,
     SupportsCovarianceObservationNoiseTrajectory,
+    SupportsCovarianceWithTopFidelityPredictY,
     gibbon_quality_term,
     gibbon_repulsion_term,
     min_value_entropy_search,
@@ -48,7 +49,6 @@
     ThompsonSamplerFromTrajectory,
 )
 from trieste.data import Dataset, add_fidelity_column
-from trieste.models import SupportsCovarianceWithTopFidelity
 from trieste.objectives import Branin
 from trieste.space import Box
 from trieste.types import TensorType
@@ -612,7 +612,7 @@ def test_mumbo_raises_when_use_trajectory_sampler_and_model_without_trajectories
 )
 def test_mumbo_builder_builds_min_value_samples(
     mocked_mves: MagicMock,
-    min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelity],
+    min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelityPredictY],
 ) -> None:
     dataset = Dataset(tf.zeros([3, 2], dtype=tf.float64), tf.ones([3, 2], dtype=tf.float64))
     search_space = Box([0, 0], [1, 1])
@@ -638,7 +638,7 @@ def test_mumbo_builder_builds_min_value_samples(
     [ExactThompsonSampler(sample_min_value=True), GumbelSampler(sample_min_value=True)],
 )
 def test_mumbo_builder_updates_acquisition_function(
-    min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelity],
+    min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelityPredictY],
 ) -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     model = MultiFidelityQuadraticMeanAndRBFKernel(
diff --git a/tests/unit/models/test_interfaces.py b/tests/unit/models/test_interfaces.py
index f7b084d8d8..6839b208ab 100644
--- a/tests/unit/models/test_interfaces.py
+++ b/tests/unit/models/test_interfaces.py
@@ -36,6 +36,7 @@
 from trieste.models import TrainableModelStack, TrainableProbabilisticModel
 from trieste.models.interfaces import (
     TrainablePredictJointReparamModelStack,
+    TrainablePredictYModelStack,
     TrainableSupportsPredictJoint,
     TrainableSupportsPredictJointHasReparamSampler,
 )
@@ -114,28 +115,11 @@ def test_model_stack_predict_joint() -> None:
     npt.assert_allclose(cov[..., 3:, :, :], cov3)
 
 
-def test_model_missing_predict_y() -> None:
-    model = _QuadraticModel([-1.0], [0.1])
-    x_predict = tf.constant([[0]], gpflow.default_float())
-    with pytest.raises(NotImplementedError):
-        model.predict_y(x_predict)
-
-
-def test_model_stack_missing_predict_y() -> None:
-    x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float())
-    model1 = gpr_model(x, fnc_3x_plus_10(x))
-    model2 = _QuadraticModel([1.0], [2.0])
-    stack = TrainableModelStack((model1, 1), (model2, 1))
-    x_predict = tf.constant([[0]], gpflow.default_float())
-    with pytest.raises(NotImplementedError):
-        stack.predict_y(x_predict)
-
-
 def test_model_stack_predict_y() -> None:
     x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float())
     model1 = gpr_model(x, fnc_3x_plus_10(x))
     model2 = sgpr_model(x, fnc_2sin_x_over_3(x))
-    stack = TrainableModelStack((model1, 1), (model2, 1))
+    stack = TrainablePredictYModelStack((model1, 1), (model2, 1))
     mean, variance = stack.predict_y(x)
     npt.assert_allclose(mean[:, 0:1], model1.predict_y(x)[0])
     npt.assert_allclose(mean[:, 1:2], model2.predict_y(x)[0])
diff --git a/tests/util/models/gpflow/models.py b/tests/util/models/gpflow/models.py
index a2deae664a..1d25fdcdfe 100644
--- a/tests/util/models/gpflow/models.py
+++ b/tests/util/models/gpflow/models.py
@@ -47,6 +47,7 @@
     SupportsGetKernel,
     SupportsGetObservationNoise,
     SupportsPredictJoint,
+    SupportsPredictY,
 )
 from trieste.models.optimizer import Optimizer
 from trieste.types import TensorType
@@ -259,7 +260,7 @@ def optimize(self, dataset: Dataset) -> None:
 
 
 class MultiFidelityQuadraticMeanAndRBFKernel(
-    QuadraticMeanAndRBFKernel, SupportsCovarianceWithTopFidelity
+    QuadraticMeanAndRBFKernel, SupportsPredictY, SupportsCovarianceWithTopFidelity
 ):
     r"""
     A Gaussian process with scalar quadratic mean, an RBF kernel and
@@ -293,7 +294,7 @@ def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
 
 
 class MultiFidelityQuadraticMeanAndRBFKernelWithSamplers(
-    QuadraticMeanAndRBFKernelWithSamplers, SupportsCovarianceWithTopFidelity
+    QuadraticMeanAndRBFKernelWithSamplers, SupportsPredictY, SupportsCovarianceWithTopFidelity
 ):
     r"""
     A Gaussian process with scalar quadratic mean, an RBF kernel and
@@ -323,6 +324,12 @@ def covariance_with_top_fidelity(self, x: TensorType) -> TensorType:
         mean, _ = self.predict(x)
         return tf.ones_like(mean, dtype=mean.dtype)  # dummy covariances of correct shape
 
+    @inherit_check_shapes
+    def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
+        fmean, fvar = self.predict(query_points)
+        yvar = fvar + tf.constant(1.0, dtype=fmean.dtype)  # dummy noise variance
+        return fmean, yvar
+
 
 class QuadraticMeanAndRBFKernelWithBatchSamplers(
     QuadraticMeanAndRBFKernel, HasTrajectorySampler, HasReparamSampler
diff --git a/trieste/acquisition/function/entropy.py b/trieste/acquisition/function/entropy.py
index 7f04af0a76..f786417efd 100644
--- a/trieste/acquisition/function/entropy.py
+++ b/trieste/acquisition/function/entropy.py
@@ -29,6 +29,7 @@
     HasTrajectorySampler,
     SupportsCovarianceWithTopFidelity,
     SupportsGetObservationNoise,
+    SupportsPredictY,
 )
 from ...space import SearchSpace
 from ...types import TensorType
@@ -623,10 +624,19 @@ def __call__(self, x: TensorType) -> TensorType:
         return repulsion_weight * repulsion
 
 
+@runtime_checkable
+class SupportsCovarianceWithTopFidelityPredictY(
+    SupportsCovarianceWithTopFidelity, SupportsPredictY, Protocol
+):
+    """A model that is both multifidelity and supports predict_y."""
+
+    pass
+
+
 MUMBOModelType = TypeVar(
-    "MUMBOModelType", bound=SupportsCovarianceWithTopFidelity, contravariant=True
+    "MUMBOModelType", bound=SupportsCovarianceWithTopFidelityPredictY, contravariant=True
 )
-""" Type variable bound to :class:`~trieste.models.SupportsCovarianceWithTopFidelity`. """
+""" Type variable bound to :class:`~trieste.models.SupportsCovarianceWithTopFidelityPredictY`. """
 
 
 class MUMBO(MinValueEntropySearch[MUMBOModelType]):
@@ -645,7 +655,7 @@ class MUMBO(MinValueEntropySearch[MUMBOModelType]):
 
     @overload
     def __init__(
-        self: "MUMBO[SupportsCovarianceWithTopFidelity]",
+        self: "MUMBO[SupportsCovarianceWithTopFidelityPredictY]",
         search_space: SearchSpace,
         num_samples: int = 5,
         grid_size: int = 1000,
diff --git a/trieste/acquisition/function/greedy_batch.py b/trieste/acquisition/function/greedy_batch.py
index 6cd222bd13..55898ed954 100644
--- a/trieste/acquisition/function/greedy_batch.py
+++ b/trieste/acquisition/function/greedy_batch.py
@@ -28,9 +28,12 @@
 from ...models import FastUpdateModel, ModelStack, ProbabilisticModel
 from ...models.interfaces import (
     PredictJointModelStack,
+    PredictJointPredictYModelStack,
+    PredictYModelStack,
     SupportsGetKernel,
     SupportsGetObservationNoise,
     SupportsPredictJoint,
+    SupportsPredictY,
 )
 from ...observer import OBJECTIVE
 from ...space import SearchSpace
@@ -385,17 +388,24 @@ def __call__(self, x: TensorType) -> TensorType:
 
 @runtime_checkable
 class FantasizerModelType(
-    FastUpdateModel, SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, Protocol
+    FastUpdateModel,
+    SupportsPredictJoint,
+    SupportsPredictY,
+    SupportsGetKernel,
+    SupportsGetObservationNoise,
+    Protocol,
 ):
     """The model requirements for the Fantasizer acquisition function."""
 
     pass
 
 
-class FantasizerModelStack(PredictJointModelStack, ModelStack[FantasizerModelType]):
+class FantasizerModelStack(
+    PredictJointModelStack, PredictYModelStack, ModelStack[FantasizerModelType]
+):
     """
     A stack of models :class:`FantasizerModelType` models. Note that this delegates predict_joint
-    but none of the other methods.
+    and predict_y but none of the other methods.
     """
 
     pass
@@ -605,7 +615,7 @@ def _generate_fantasized_data(
 
 def _generate_fantasized_model(
     model: FantasizerModelOrStack, fantasized_data: Dataset
-) -> _fantasized_model | PredictJointModelStack:
+) -> _fantasized_model | PredictJointPredictYModelStack:
     if isinstance(model, ModelStack):
         observations = tf.split(fantasized_data.observations, model._event_sizes, axis=-1)
         fmods = []
@@ -616,12 +626,14 @@ def _generate_fantasized_model(
                     event_size,
                 )
             )
-        return PredictJointModelStack(*fmods)
+        return PredictJointPredictYModelStack(*fmods)
     else:
         return _fantasized_model(model, fantasized_data)
 
 
-class _fantasized_model(SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise):
+class _fantasized_model(
+    SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictY
+):
     """
     Creates a new model from an existing one and additional data.
     This new model posterior is conditioned on both current model data and the additional one.
diff --git a/trieste/acquisition/sampler.py b/trieste/acquisition/sampler.py
index fb75e472a2..d45bbf07ab 100644
--- a/trieste/acquisition/sampler.py
+++ b/trieste/acquisition/sampler.py
@@ -26,7 +26,7 @@
 from scipy.optimize import bisect
 
 from ..models import ProbabilisticModel
-from ..models.interfaces import HasTrajectorySampler, ProbabilisticModelType
+from ..models.interfaces import HasTrajectorySampler, ProbabilisticModelType, SupportsPredictY
 from ..types import TensorType
 from .utils import select_nth_output
 
@@ -174,9 +174,9 @@ def sample(
         tf.debugging.assert_positive(sample_size)
         tf.debugging.assert_shapes([(at, ["N", None])])
 
-        try:
+        if isinstance(model, SupportsPredictY):
             fmean, fvar = model.predict_y(at)
-        except NotImplementedError:
+        else:
             fmean, fvar = model.predict(at)
 
         fsd = tf.math.sqrt(fvar)
diff --git a/trieste/models/gpflow/interface.py b/trieste/models/gpflow/interface.py
index c02e7dc21e..5fad89f679 100644
--- a/trieste/models/gpflow/interface.py
+++ b/trieste/models/gpflow/interface.py
@@ -33,6 +33,7 @@
     SupportsGetKernel,
     SupportsGetObservationNoise,
     SupportsPredictJoint,
+    SupportsPredictY,
     TrainableProbabilisticModel,
 )
 from ..optimizer import Optimizer
@@ -48,6 +49,7 @@ class GPflowPredictor(
     SupportsPredictJoint,
     SupportsGetKernel,
     SupportsGetObservationNoise,
+    SupportsPredictY,
     HasReparamSampler,
     TrainableProbabilisticModel,
     ABC,
diff --git a/trieste/models/gpflow/models.py b/trieste/models/gpflow/models.py
index 5c7a7318b2..fedc993c40 100644
--- a/trieste/models/gpflow/models.py
+++ b/trieste/models/gpflow/models.py
@@ -46,6 +46,7 @@
     SupportsCovarianceWithTopFidelity,
     SupportsGetInducingVariables,
     SupportsGetInternalData,
+    SupportsPredictY,
     TrainableProbabilisticModel,
     TrajectorySampler,
 )
@@ -1369,7 +1370,9 @@ def covariance_between_points(
         )
 
 
-class MultifidelityAutoregressive(TrainableProbabilisticModel, SupportsCovarianceWithTopFidelity):
+class MultifidelityAutoregressive(
+    TrainableProbabilisticModel, SupportsPredictY, SupportsCovarianceWithTopFidelity
+):
     r"""
     A :class:`TrainableProbabilisticModel` implementation of the model
     from :cite:`Kennedy2000`. This is a multi-fidelity model that works with an
@@ -1658,7 +1661,7 @@ def covariance_with_top_fidelity(self, query_points: TensorType) -> TensorType:
 
 
 class MultifidelityNonlinearAutoregressive(
-    TrainableProbabilisticModel, SupportsCovarianceWithTopFidelity
+    TrainableProbabilisticModel, SupportsPredictY, SupportsCovarianceWithTopFidelity
 ):
     r"""
     A :class:`TrainableProbabilisticModel` implementation of the model from
diff --git a/trieste/models/gpflux/interface.py b/trieste/models/gpflux/interface.py
index 1c2b5297c4..cd4d1b1b0e 100644
--- a/trieste/models/gpflux/interface.py
+++ b/trieste/models/gpflux/interface.py
@@ -21,11 +21,11 @@
 from gpflow.base import Module
 
 from ...types import TensorType
-from ..interfaces import SupportsGetObservationNoise
+from ..interfaces import SupportsGetObservationNoise, SupportsPredictY
 from ..optimizer import KerasOptimizer
 
 
-class GPfluxPredictor(SupportsGetObservationNoise, ABC):
+class GPfluxPredictor(SupportsGetObservationNoise, SupportsPredictY, ABC):
     """
     A trainable wrapper for a GPflux deep Gaussian process model. The code assumes subclasses
     will use the Keras `fit` method for training, and so they should provide access to both a
diff --git a/trieste/models/interfaces.py b/trieste/models/interfaces.py
index cbcd1dabdd..000d3c77b8 100644
--- a/trieste/models/interfaces.py
+++ b/trieste/models/interfaces.py
@@ -85,28 +85,6 @@ def sample(self, query_points: TensorType, num_samples: int) -> TensorType:
         """
         raise NotImplementedError
 
-    @check_shapes(
-        "query_points: [broadcast batch..., D]",
-        "return[0]: [batch..., E...]",
-        "return[1]: [batch..., E...]",
-    )
-    def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
-        """
-        Return the mean and variance of the independent marginal distributions at each point in
-        ``query_points`` for the observations, including noise contributions.
-
-        Note that this is not supported by all models.
-
-        :param query_points: The points at which to make predictions, of shape [..., D].
-        :return: The mean and variance of the independent marginal distributions at each point in
-            ``query_points``. For a predictive distribution with event shape E, the mean and
-            variance will both have shape [...] + E.
-        """
-        pass  # (required so that mypy doesn't think this method is abstract)
-        raise NotImplementedError(
-            f"Model {self!r} does not support predicting observations, just the latent function"
-        )
-
     def log(self, dataset: Optional[Dataset] = None) -> None:
         """
         Log model-specific information at a given optimization step.
@@ -190,6 +168,29 @@ def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorTyp
         raise NotImplementedError
 
 
+@runtime_checkable
+class SupportsPredictY(ProbabilisticModel, Protocol):
+    """A probabilistic model that supports predict_y."""
+
+    @abstractmethod
+    @check_shapes(
+        "query_points: [broadcast batch..., D]",
+        "return[0]: [batch..., E...]",
+        "return[1]: [batch..., E...]",
+    )
+    def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
+        """
+        Return the mean and variance of the independent marginal distributions at each point in
+        ``query_points`` for the observations, including noise contributions.
+
+        :param query_points: The points at which to make predictions, of shape [..., D].
+        :return: The mean and variance of the independent marginal distributions at each point in
+            ``query_points``. For a predictive distribution with event shape E, the mean and
+            variance will both have shape [...] + E.
+        """
+        raise NotImplementedError
+
+
 @runtime_checkable
 class SupportsGetKernel(ProbabilisticModel, Protocol):
     """A probabilistic model that supports get_kernel."""
@@ -421,18 +422,6 @@ def sample(self, query_points: TensorType, num_samples: int) -> TensorType:
         samples = [model.sample(query_points, num_samples) for model in self._models]
         return tf.concat(samples, axis=-1)
 
-    def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
-        r"""
-        :param query_points: The points at which to make predictions, of shape [..., D].
-        :return: The predictions from all the wrapped models, concatenated along the event axis in
-            the same order as they appear in :meth:`__init__`. If the wrapped models have predictive
-            distributions with event shapes [:math:`E_i`], the mean and variance will both have
-            shape [..., :math:`\sum_i E_i`].
-        :raise NotImplementedError: If any of the models don't implement predict_y.
-        """
-        means, vars_ = zip(*[model.predict_y(query_points) for model in self._models])
-        return tf.concat(means, axis=-1), tf.concat(vars_, axis=-1)
-
     def log(self, dataset: Optional[Dataset] = None) -> None:
         """
         Log model-specific information at a given optimization step.
@@ -542,6 +531,26 @@ def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorTyp
         return tf.concat(means, axis=-1), tf.concat(covs, axis=-3)
 
 
+class PredictYModelStack(ModelStack[SupportsPredictY], SupportsPredictY):
+    r"""
+    A :class:`PredictJointModelStack` is a wrapper around a number of
+    :class:`SupportsPredictY`\ s.
+    It delegates :meth:`predict_y` to each model.
+    """
+
+    def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
+        r"""
+        :param query_points: The points at which to make predictions, of shape [..., D].
+        :return: The predictions from all the wrapped models, concatenated along the event axis in
+            the same order as they appear in :meth:`__init__`. If the wrapped models have predictive
+            distributions with event shapes [:math:`E_i`], the mean and variance will both have
+            shape [..., :math:`\sum_i E_i`].
+        :raise NotImplementedError: If any of the models don't implement predict_y.
+        """
+        means, vars_ = zip(*[model.predict_y(query_points) for model in self._models])
+        return tf.concat(means, axis=-1), tf.concat(vars_, axis=-1)
+
+
 # It's useful, though a bit ugly, to define the stack constructors for some model type combinations
 class TrainableSupportsPredictJoint(TrainableProbabilisticModel, SupportsPredictJoint, Protocol):
     """A model that is both trainable and supports predict_joint."""
@@ -557,6 +566,34 @@ class TrainablePredictJointModelStack(
     pass
 
 
+class TrainableSupportsPredictY(TrainableProbabilisticModel, SupportsPredictY, Protocol):
+    """A model that is both trainable and supports predict_y."""
+
+    pass
+
+
+class TrainablePredictYModelStack(
+    TrainableModelStack, PredictYModelStack, ModelStack[TrainableSupportsPredictY]
+):
+    """A stack of models that are both trainable and support predict_y."""
+
+    pass
+
+
+class SupportsPredictJointPredictY(SupportsPredictJoint, SupportsPredictY, Protocol):
+    """A model that supports both predict_joint and predict_y."""
+
+    pass
+
+
+class PredictJointPredictYModelStack(
+    PredictJointModelStack, PredictYModelStack, ModelStack[SupportsPredictJointPredictY]
+):
+    """A stack of models that support both predict_joint and predict_y."""
+
+    pass
+
+
 class TrainableSupportsPredictJointHasReparamSampler(
     TrainableSupportsPredictJoint, HasReparamSampler, Protocol
 ):