From 4463f43736cc586b552107b18393f091d6be6f3a Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Mon, 27 Nov 2023 13:13:25 +0000 Subject: [PATCH] Define SupportsPredictY protocol --- docs/notebooks/code_overview.pct.py | 11 +- .../test_multifidelity_models.py | 3 +- .../unit/acquisition/function/test_entropy.py | 6 +- tests/unit/models/test_interfaces.py | 20 +--- tests/util/models/gpflow/models.py | 11 +- trieste/acquisition/function/entropy.py | 16 ++- trieste/acquisition/function/greedy_batch.py | 24 +++- trieste/acquisition/sampler.py | 6 +- trieste/models/gpflow/interface.py | 2 + trieste/models/gpflow/models.py | 7 +- trieste/models/gpflux/interface.py | 4 +- trieste/models/interfaces.py | 105 ++++++++++++------ 12 files changed, 136 insertions(+), 79 deletions(-) diff --git a/docs/notebooks/code_overview.pct.py b/docs/notebooks/code_overview.pct.py index 85216ce818..3e0ff2a7b0 100644 --- a/docs/notebooks/code_overview.pct.py +++ b/docs/notebooks/code_overview.pct.py @@ -198,12 +198,13 @@ class HasGizmoReparamSamplerAndObservationNoise( AcquisitionFunction, SingleModelAcquisitionBuilder, ) +from trieste.models.interfaces import SupportsPredictY from trieste.data import Dataset -class ProbabilityOfValidity(SingleModelAcquisitionBuilder[ProbabilisticModel]): +class ProbabilityOfValidity(SingleModelAcquisitionBuilder[SupportsPredictY]): def prepare_acquisition_function( - self, model: ProbabilisticModel, dataset: Optional[Dataset] = None + self, model: SupportsPredictY, dataset: Optional[Dataset] = None ) -> AcquisitionFunction: def acquisition(at: TensorType) -> TensorType: mean, _ = model.predict_y(tf.squeeze(at, -2)) @@ -217,9 +218,9 @@ def acquisition(at: TensorType) -> TensorType: # %% -class ProbabilityOfValidity2(SingleModelAcquisitionBuilder[ProbabilisticModel]): +class ProbabilityOfValidity2(SingleModelAcquisitionBuilder[SupportsPredictY]): def prepare_acquisition_function( - self, model: ProbabilisticModel, dataset: Optional[Dataset] = None + self, model: SupportsPredictY, dataset: Optional[Dataset] = None ) -> AcquisitionFunction: @tf.function def acquisition(at: TensorType) -> TensorType: @@ -231,7 +232,7 @@ def acquisition(at: TensorType) -> TensorType: def update_acquisition_function( self, function: AcquisitionFunction, - model: ProbabilisticModel, + model: SupportsPredictY, dataset: Optional[Dataset] = None, ) -> AcquisitionFunction: return function # no need to update anything diff --git a/tests/integration/models/multifidelity/test_multifidelity_models.py b/tests/integration/models/multifidelity/test_multifidelity_models.py index 37c011b5b0..8832d82080 100644 --- a/tests/integration/models/multifidelity/test_multifidelity_models.py +++ b/tests/integration/models/multifidelity/test_multifidelity_models.py @@ -11,6 +11,7 @@ check_and_extract_fidelity_query_points, split_dataset_by_fidelity, ) +from trieste.models import TrainableProbabilisticModel from trieste.models.gpflow import GaussianProcessRegression from trieste.models.gpflow.builders import ( build_gpr, @@ -119,7 +120,7 @@ def test_multifidelity_nonlinear_autoregressive_results_better_than_linear() -> observer = mk_observer(noisy_nonlinear_multifidelity) initial_data = observer(initial_sample) - nonlinear_model = MultifidelityNonlinearAutoregressive( + nonlinear_model: TrainableProbabilisticModel = MultifidelityNonlinearAutoregressive( build_multifidelity_nonlinear_autoregressive_models( initial_data, n_fidelities, input_search_space ) diff --git a/tests/unit/acquisition/function/test_entropy.py b/tests/unit/acquisition/function/test_entropy.py index ba860a2211..f922d51252 100644 --- a/tests/unit/acquisition/function/test_entropy.py +++ b/tests/unit/acquisition/function/test_entropy.py @@ -36,6 +36,7 @@ MinValueEntropySearch, MUMBOModelType, SupportsCovarianceObservationNoiseTrajectory, + SupportsCovarianceWithTopFidelityPredictY, gibbon_quality_term, gibbon_repulsion_term, min_value_entropy_search, @@ -48,7 +49,6 @@ ThompsonSamplerFromTrajectory, ) from trieste.data import Dataset, add_fidelity_column -from trieste.models import SupportsCovarianceWithTopFidelity from trieste.objectives import Branin from trieste.space import Box from trieste.types import TensorType @@ -612,7 +612,7 @@ def test_mumbo_raises_when_use_trajectory_sampler_and_model_without_trajectories ) def test_mumbo_builder_builds_min_value_samples( mocked_mves: MagicMock, - min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelity], + min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelityPredictY], ) -> None: dataset = Dataset(tf.zeros([3, 2], dtype=tf.float64), tf.ones([3, 2], dtype=tf.float64)) search_space = Box([0, 0], [1, 1]) @@ -638,7 +638,7 @@ def test_mumbo_builder_builds_min_value_samples( [ExactThompsonSampler(sample_min_value=True), GumbelSampler(sample_min_value=True)], ) def test_mumbo_builder_updates_acquisition_function( - min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelity], + min_value_sampler: ThompsonSampler[SupportsCovarianceWithTopFidelityPredictY], ) -> None: search_space = Box([0.0, 0.0], [1.0, 1.0]) model = MultiFidelityQuadraticMeanAndRBFKernel( diff --git a/tests/unit/models/test_interfaces.py b/tests/unit/models/test_interfaces.py index f7b084d8d8..6839b208ab 100644 --- a/tests/unit/models/test_interfaces.py +++ b/tests/unit/models/test_interfaces.py @@ -36,6 +36,7 @@ from trieste.models import TrainableModelStack, TrainableProbabilisticModel from trieste.models.interfaces import ( TrainablePredictJointReparamModelStack, + TrainablePredictYModelStack, TrainableSupportsPredictJoint, TrainableSupportsPredictJointHasReparamSampler, ) @@ -114,28 +115,11 @@ def test_model_stack_predict_joint() -> None: npt.assert_allclose(cov[..., 3:, :, :], cov3) -def test_model_missing_predict_y() -> None: - model = _QuadraticModel([-1.0], [0.1]) - x_predict = tf.constant([[0]], gpflow.default_float()) - with pytest.raises(NotImplementedError): - model.predict_y(x_predict) - - -def test_model_stack_missing_predict_y() -> None: - x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) - model1 = gpr_model(x, fnc_3x_plus_10(x)) - model2 = _QuadraticModel([1.0], [2.0]) - stack = TrainableModelStack((model1, 1), (model2, 1)) - x_predict = tf.constant([[0]], gpflow.default_float()) - with pytest.raises(NotImplementedError): - stack.predict_y(x_predict) - - def test_model_stack_predict_y() -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) model1 = gpr_model(x, fnc_3x_plus_10(x)) model2 = sgpr_model(x, fnc_2sin_x_over_3(x)) - stack = TrainableModelStack((model1, 1), (model2, 1)) + stack = TrainablePredictYModelStack((model1, 1), (model2, 1)) mean, variance = stack.predict_y(x) npt.assert_allclose(mean[:, 0:1], model1.predict_y(x)[0]) npt.assert_allclose(mean[:, 1:2], model2.predict_y(x)[0]) diff --git a/tests/util/models/gpflow/models.py b/tests/util/models/gpflow/models.py index a2deae664a..1d25fdcdfe 100644 --- a/tests/util/models/gpflow/models.py +++ b/tests/util/models/gpflow/models.py @@ -47,6 +47,7 @@ SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictJoint, + SupportsPredictY, ) from trieste.models.optimizer import Optimizer from trieste.types import TensorType @@ -259,7 +260,7 @@ def optimize(self, dataset: Dataset) -> None: class MultiFidelityQuadraticMeanAndRBFKernel( - QuadraticMeanAndRBFKernel, SupportsCovarianceWithTopFidelity + QuadraticMeanAndRBFKernel, SupportsPredictY, SupportsCovarianceWithTopFidelity ): r""" A Gaussian process with scalar quadratic mean, an RBF kernel and @@ -293,7 +294,7 @@ def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: class MultiFidelityQuadraticMeanAndRBFKernelWithSamplers( - QuadraticMeanAndRBFKernelWithSamplers, SupportsCovarianceWithTopFidelity + QuadraticMeanAndRBFKernelWithSamplers, SupportsPredictY, SupportsCovarianceWithTopFidelity ): r""" A Gaussian process with scalar quadratic mean, an RBF kernel and @@ -323,6 +324,12 @@ def covariance_with_top_fidelity(self, x: TensorType) -> TensorType: mean, _ = self.predict(x) return tf.ones_like(mean, dtype=mean.dtype) # dummy covariances of correct shape + @inherit_check_shapes + def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + fmean, fvar = self.predict(query_points) + yvar = fvar + tf.constant(1.0, dtype=fmean.dtype) # dummy noise variance + return fmean, yvar + class QuadraticMeanAndRBFKernelWithBatchSamplers( QuadraticMeanAndRBFKernel, HasTrajectorySampler, HasReparamSampler diff --git a/trieste/acquisition/function/entropy.py b/trieste/acquisition/function/entropy.py index 7f04af0a76..f786417efd 100644 --- a/trieste/acquisition/function/entropy.py +++ b/trieste/acquisition/function/entropy.py @@ -29,6 +29,7 @@ HasTrajectorySampler, SupportsCovarianceWithTopFidelity, SupportsGetObservationNoise, + SupportsPredictY, ) from ...space import SearchSpace from ...types import TensorType @@ -623,10 +624,19 @@ def __call__(self, x: TensorType) -> TensorType: return repulsion_weight * repulsion +@runtime_checkable +class SupportsCovarianceWithTopFidelityPredictY( + SupportsCovarianceWithTopFidelity, SupportsPredictY, Protocol +): + """A model that is both multifidelity and supports predict_y.""" + + pass + + MUMBOModelType = TypeVar( - "MUMBOModelType", bound=SupportsCovarianceWithTopFidelity, contravariant=True + "MUMBOModelType", bound=SupportsCovarianceWithTopFidelityPredictY, contravariant=True ) -""" Type variable bound to :class:`~trieste.models.SupportsCovarianceWithTopFidelity`. """ +""" Type variable bound to :class:`~trieste.models.SupportsCovarianceWithTopFidelityPredictY`. """ class MUMBO(MinValueEntropySearch[MUMBOModelType]): @@ -645,7 +655,7 @@ class MUMBO(MinValueEntropySearch[MUMBOModelType]): @overload def __init__( - self: "MUMBO[SupportsCovarianceWithTopFidelity]", + self: "MUMBO[SupportsCovarianceWithTopFidelityPredictY]", search_space: SearchSpace, num_samples: int = 5, grid_size: int = 1000, diff --git a/trieste/acquisition/function/greedy_batch.py b/trieste/acquisition/function/greedy_batch.py index 6cd222bd13..55898ed954 100644 --- a/trieste/acquisition/function/greedy_batch.py +++ b/trieste/acquisition/function/greedy_batch.py @@ -28,9 +28,12 @@ from ...models import FastUpdateModel, ModelStack, ProbabilisticModel from ...models.interfaces import ( PredictJointModelStack, + PredictJointPredictYModelStack, + PredictYModelStack, SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictJoint, + SupportsPredictY, ) from ...observer import OBJECTIVE from ...space import SearchSpace @@ -385,17 +388,24 @@ def __call__(self, x: TensorType) -> TensorType: @runtime_checkable class FantasizerModelType( - FastUpdateModel, SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, Protocol + FastUpdateModel, + SupportsPredictJoint, + SupportsPredictY, + SupportsGetKernel, + SupportsGetObservationNoise, + Protocol, ): """The model requirements for the Fantasizer acquisition function.""" pass -class FantasizerModelStack(PredictJointModelStack, ModelStack[FantasizerModelType]): +class FantasizerModelStack( + PredictJointModelStack, PredictYModelStack, ModelStack[FantasizerModelType] +): """ A stack of models :class:`FantasizerModelType` models. Note that this delegates predict_joint - but none of the other methods. + and predict_y but none of the other methods. """ pass @@ -605,7 +615,7 @@ def _generate_fantasized_data( def _generate_fantasized_model( model: FantasizerModelOrStack, fantasized_data: Dataset -) -> _fantasized_model | PredictJointModelStack: +) -> _fantasized_model | PredictJointPredictYModelStack: if isinstance(model, ModelStack): observations = tf.split(fantasized_data.observations, model._event_sizes, axis=-1) fmods = [] @@ -616,12 +626,14 @@ def _generate_fantasized_model( event_size, ) ) - return PredictJointModelStack(*fmods) + return PredictJointPredictYModelStack(*fmods) else: return _fantasized_model(model, fantasized_data) -class _fantasized_model(SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise): +class _fantasized_model( + SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictY +): """ Creates a new model from an existing one and additional data. This new model posterior is conditioned on both current model data and the additional one. diff --git a/trieste/acquisition/sampler.py b/trieste/acquisition/sampler.py index fb75e472a2..d45bbf07ab 100644 --- a/trieste/acquisition/sampler.py +++ b/trieste/acquisition/sampler.py @@ -26,7 +26,7 @@ from scipy.optimize import bisect from ..models import ProbabilisticModel -from ..models.interfaces import HasTrajectorySampler, ProbabilisticModelType +from ..models.interfaces import HasTrajectorySampler, ProbabilisticModelType, SupportsPredictY from ..types import TensorType from .utils import select_nth_output @@ -174,9 +174,9 @@ def sample( tf.debugging.assert_positive(sample_size) tf.debugging.assert_shapes([(at, ["N", None])]) - try: + if isinstance(model, SupportsPredictY): fmean, fvar = model.predict_y(at) - except NotImplementedError: + else: fmean, fvar = model.predict(at) fsd = tf.math.sqrt(fvar) diff --git a/trieste/models/gpflow/interface.py b/trieste/models/gpflow/interface.py index c02e7dc21e..5fad89f679 100644 --- a/trieste/models/gpflow/interface.py +++ b/trieste/models/gpflow/interface.py @@ -33,6 +33,7 @@ SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictJoint, + SupportsPredictY, TrainableProbabilisticModel, ) from ..optimizer import Optimizer @@ -48,6 +49,7 @@ class GPflowPredictor( SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, + SupportsPredictY, HasReparamSampler, TrainableProbabilisticModel, ABC, diff --git a/trieste/models/gpflow/models.py b/trieste/models/gpflow/models.py index 5c7a7318b2..fedc993c40 100644 --- a/trieste/models/gpflow/models.py +++ b/trieste/models/gpflow/models.py @@ -46,6 +46,7 @@ SupportsCovarianceWithTopFidelity, SupportsGetInducingVariables, SupportsGetInternalData, + SupportsPredictY, TrainableProbabilisticModel, TrajectorySampler, ) @@ -1369,7 +1370,9 @@ def covariance_between_points( ) -class MultifidelityAutoregressive(TrainableProbabilisticModel, SupportsCovarianceWithTopFidelity): +class MultifidelityAutoregressive( + TrainableProbabilisticModel, SupportsPredictY, SupportsCovarianceWithTopFidelity +): r""" A :class:`TrainableProbabilisticModel` implementation of the model from :cite:`Kennedy2000`. This is a multi-fidelity model that works with an @@ -1658,7 +1661,7 @@ def covariance_with_top_fidelity(self, query_points: TensorType) -> TensorType: class MultifidelityNonlinearAutoregressive( - TrainableProbabilisticModel, SupportsCovarianceWithTopFidelity + TrainableProbabilisticModel, SupportsPredictY, SupportsCovarianceWithTopFidelity ): r""" A :class:`TrainableProbabilisticModel` implementation of the model from diff --git a/trieste/models/gpflux/interface.py b/trieste/models/gpflux/interface.py index 1c2b5297c4..cd4d1b1b0e 100644 --- a/trieste/models/gpflux/interface.py +++ b/trieste/models/gpflux/interface.py @@ -21,11 +21,11 @@ from gpflow.base import Module from ...types import TensorType -from ..interfaces import SupportsGetObservationNoise +from ..interfaces import SupportsGetObservationNoise, SupportsPredictY from ..optimizer import KerasOptimizer -class GPfluxPredictor(SupportsGetObservationNoise, ABC): +class GPfluxPredictor(SupportsGetObservationNoise, SupportsPredictY, ABC): """ A trainable wrapper for a GPflux deep Gaussian process model. The code assumes subclasses will use the Keras `fit` method for training, and so they should provide access to both a diff --git a/trieste/models/interfaces.py b/trieste/models/interfaces.py index cbcd1dabdd..000d3c77b8 100644 --- a/trieste/models/interfaces.py +++ b/trieste/models/interfaces.py @@ -85,28 +85,6 @@ def sample(self, query_points: TensorType, num_samples: int) -> TensorType: """ raise NotImplementedError - @check_shapes( - "query_points: [broadcast batch..., D]", - "return[0]: [batch..., E...]", - "return[1]: [batch..., E...]", - ) - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: - """ - Return the mean and variance of the independent marginal distributions at each point in - ``query_points`` for the observations, including noise contributions. - - Note that this is not supported by all models. - - :param query_points: The points at which to make predictions, of shape [..., D]. - :return: The mean and variance of the independent marginal distributions at each point in - ``query_points``. For a predictive distribution with event shape E, the mean and - variance will both have shape [...] + E. - """ - pass # (required so that mypy doesn't think this method is abstract) - raise NotImplementedError( - f"Model {self!r} does not support predicting observations, just the latent function" - ) - def log(self, dataset: Optional[Dataset] = None) -> None: """ Log model-specific information at a given optimization step. @@ -190,6 +168,29 @@ def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorTyp raise NotImplementedError +@runtime_checkable +class SupportsPredictY(ProbabilisticModel, Protocol): + """A probabilistic model that supports predict_y.""" + + @abstractmethod + @check_shapes( + "query_points: [broadcast batch..., D]", + "return[0]: [batch..., E...]", + "return[1]: [batch..., E...]", + ) + def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + """ + Return the mean and variance of the independent marginal distributions at each point in + ``query_points`` for the observations, including noise contributions. + + :param query_points: The points at which to make predictions, of shape [..., D]. + :return: The mean and variance of the independent marginal distributions at each point in + ``query_points``. For a predictive distribution with event shape E, the mean and + variance will both have shape [...] + E. + """ + raise NotImplementedError + + @runtime_checkable class SupportsGetKernel(ProbabilisticModel, Protocol): """A probabilistic model that supports get_kernel.""" @@ -421,18 +422,6 @@ def sample(self, query_points: TensorType, num_samples: int) -> TensorType: samples = [model.sample(query_points, num_samples) for model in self._models] return tf.concat(samples, axis=-1) - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: - r""" - :param query_points: The points at which to make predictions, of shape [..., D]. - :return: The predictions from all the wrapped models, concatenated along the event axis in - the same order as they appear in :meth:`__init__`. If the wrapped models have predictive - distributions with event shapes [:math:`E_i`], the mean and variance will both have - shape [..., :math:`\sum_i E_i`]. - :raise NotImplementedError: If any of the models don't implement predict_y. - """ - means, vars_ = zip(*[model.predict_y(query_points) for model in self._models]) - return tf.concat(means, axis=-1), tf.concat(vars_, axis=-1) - def log(self, dataset: Optional[Dataset] = None) -> None: """ Log model-specific information at a given optimization step. @@ -542,6 +531,26 @@ def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorTyp return tf.concat(means, axis=-1), tf.concat(covs, axis=-3) +class PredictYModelStack(ModelStack[SupportsPredictY], SupportsPredictY): + r""" + A :class:`PredictJointModelStack` is a wrapper around a number of + :class:`SupportsPredictY`\ s. + It delegates :meth:`predict_y` to each model. + """ + + def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + r""" + :param query_points: The points at which to make predictions, of shape [..., D]. + :return: The predictions from all the wrapped models, concatenated along the event axis in + the same order as they appear in :meth:`__init__`. If the wrapped models have predictive + distributions with event shapes [:math:`E_i`], the mean and variance will both have + shape [..., :math:`\sum_i E_i`]. + :raise NotImplementedError: If any of the models don't implement predict_y. + """ + means, vars_ = zip(*[model.predict_y(query_points) for model in self._models]) + return tf.concat(means, axis=-1), tf.concat(vars_, axis=-1) + + # It's useful, though a bit ugly, to define the stack constructors for some model type combinations class TrainableSupportsPredictJoint(TrainableProbabilisticModel, SupportsPredictJoint, Protocol): """A model that is both trainable and supports predict_joint.""" @@ -557,6 +566,34 @@ class TrainablePredictJointModelStack( pass +class TrainableSupportsPredictY(TrainableProbabilisticModel, SupportsPredictY, Protocol): + """A model that is both trainable and supports predict_y.""" + + pass + + +class TrainablePredictYModelStack( + TrainableModelStack, PredictYModelStack, ModelStack[TrainableSupportsPredictY] +): + """A stack of models that are both trainable and support predict_y.""" + + pass + + +class SupportsPredictJointPredictY(SupportsPredictJoint, SupportsPredictY, Protocol): + """A model that supports both predict_joint and predict_y.""" + + pass + + +class PredictJointPredictYModelStack( + PredictJointModelStack, PredictYModelStack, ModelStack[SupportsPredictJointPredictY] +): + """A stack of models that support both predict_joint and predict_y.""" + + pass + + class TrainableSupportsPredictJointHasReparamSampler( TrainableSupportsPredictJoint, HasReparamSampler, Protocol ):