From dfd2f8a05d19ac9a430fba56b90876514f89c68d Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 14 Jan 2022 18:59:09 -0500 Subject: [PATCH 1/2] chore(python): update release.sh to use keystore (#954) Source-Link: https://github.com/googleapis/synthtool/commit/69fda12e2994f0b595a397e8bb6e3e9f380524eb Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/release.sh | 2 +- .kokoro/release/common.cfg | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index ff5126c188..eecb84c21b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:dfa9b663b32de8b5b327e32c1da665a80de48876558dd58091d8160c60ad7355 + digest: sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 62bdb892ff..7ca397271d 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") cd github/python-aiplatform python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 5293e75110..08012edcbd 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,8 +23,18 @@ env_vars: { value: "github/python-aiplatform/.kokoro/release.sh" } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google-cloud-pypi-token-keystore-1" + } + } +} + # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } From b8f5f82ae43edfb933305a074c315e2f3239b4b1 Mon Sep 17 00:00:00 2001 From: Morgan Du Date: Fri, 14 Jan 2022 18:30:11 -0800 Subject: [PATCH 2/2] feat: enable feature store online serving (#918) - [x] Added `_instantiate_featurestore_online_client`, and `read` method in `EntityType` class to enable feature store online serving - [x] Enabled construct Pandas DataFrame for online serving result - [x] Added unit tests - [x] Added integration tests --- .../aiplatform/featurestore/entity_type.py | 154 ++++- .../cloud/aiplatform/featurestore/feature.py | 1 - .../aiplatform/featurestore/featurestore.py | 1 - tests/system/aiplatform/test_featurestore.py | 18 + tests/unit/aiplatform/test_featurestores.py | 625 +++++++++++++++++- 5 files changed, 787 insertions(+), 12 deletions(-) diff --git a/google/cloud/aiplatform/featurestore/entity_type.py b/google/cloud/aiplatform/featurestore/entity_type.py index 9b2524e45c..6e993f26b5 100644 --- a/google/cloud/aiplatform/featurestore/entity_type.py +++ b/google/cloud/aiplatform/featurestore/entity_type.py @@ -24,13 +24,17 @@ from google.cloud.aiplatform import base from google.cloud.aiplatform.compat.types import ( entity_type as gca_entity_type, + feature_selector as gca_feature_selector, featurestore_service as gca_featurestore_service, + featurestore_online_service as gca_featurestore_online_service, io as gca_io, ) from google.cloud.aiplatform import featurestore +from google.cloud.aiplatform import initializer from google.cloud.aiplatform import utils from google.cloud.aiplatform.utils import featurestore_utils + _LOGGER = base.Logger(__name__) _ALL_FEATURE_IDS = "*" @@ -40,7 +44,6 @@ class EntityType(base.VertexAiResourceNounWithFutureManager): client_class = utils.FeaturestoreClientWithOverride - _is_client_prediction_client = False _resource_noun = "entityTypes" _getter_method = "get_entity_type" _list_method = "list_entity_types" @@ -114,6 +117,10 @@ def __init__( else featurestore_id, ) + self._featurestore_online_client = self._instantiate_featurestore_online_client( + location=self.location, credentials=credentials, + ) + @property def featurestore_name(self) -> str: """Full qualified resource name of the managed featurestore in which this EntityType is.""" @@ -157,7 +164,7 @@ def update( self, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, - request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + request_metadata: Sequence[Tuple[str, str]] = (), ) -> "EntityType": """Updates an existing managed entityType resource. @@ -189,7 +196,7 @@ def update( System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. request_metadata (Sequence[Tuple[str, str]]): - Optional. Strings which should be sent along with the request as metadata. + Required. Strings which should be sent along with the request as metadata. Returns: EntityType - The updated entityType resource object. """ @@ -1138,3 +1145,144 @@ def ingest_from_gcs( import_feature_values_request=import_feature_values_request, request_metadata=request_metadata, ) + + @staticmethod + def _instantiate_featurestore_online_client( + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> utils.FeaturestoreOnlineServingClientWithOverride: + """Helper method to instantiates featurestore online client. + + Args: + location (str): The location of this featurestore. + credentials (google.auth.credentials.Credentials): + Optional custom credentials to use when interacting with + the featurestore online client. + Returns: + utils.FeaturestoreOnlineServingClientWithOverride: + Initialized featurestore online client with optional overrides. + """ + return initializer.global_config.create_client( + client_class=utils.FeaturestoreOnlineServingClientWithOverride, + credentials=credentials, + location_override=location, + ) + + def read( + self, + entity_ids: Union[str, List[str]], + feature_ids: Union[str, List[str]] = "*", + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + ) -> "pd.DataFrame": # noqa: F821 - skip check for undefined name 'pd' + """Reads feature values for given feature IDs of given entity IDs in this EntityType. + + Args: + entity_ids (Union[str, List[str]]): + Required. ID for a specific entity, or a list of IDs of entities + to read Feature values of. The maximum number of IDs is 100 if a list. + feature_ids (Union[str, List[str]]): + Required. ID for a specific feature, or a list of IDs of Features in the EntityType + for reading feature values. Default to "*", where value of all features will be read. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + + Returns: + pd.DataFrame: entities' feature values in DataFrame + """ + + if isinstance(feature_ids, str): + feature_ids = [feature_ids] + + feature_selector = gca_feature_selector.FeatureSelector( + id_matcher=gca_feature_selector.IdMatcher(ids=feature_ids) + ) + + if isinstance(entity_ids, str): + read_feature_values_request = gca_featurestore_online_service.ReadFeatureValuesRequest( + entity_type=self.resource_name, + entity_id=entity_ids, + feature_selector=feature_selector, + ) + read_feature_values_response = self._featurestore_online_client.read_feature_values( + request=read_feature_values_request, metadata=request_metadata + ) + header = read_feature_values_response.header + entity_views = [read_feature_values_response.entity_view] + elif isinstance(entity_ids, list): + streaming_read_feature_values_request = gca_featurestore_online_service.StreamingReadFeatureValuesRequest( + entity_type=self.resource_name, + entity_ids=entity_ids, + feature_selector=feature_selector, + ) + streaming_read_feature_values_responses = [ + response + for response in self._featurestore_online_client.streaming_read_feature_values( + request=streaming_read_feature_values_request, + metadata=request_metadata, + ) + ] + header = streaming_read_feature_values_responses[0].header + entity_views = [ + response.entity_view + for response in streaming_read_feature_values_responses[1:] + ] + + feature_ids = [ + feature_descriptor.id for feature_descriptor in header.feature_descriptors + ] + + return EntityType._construct_dataframe( + feature_ids=feature_ids, entity_views=entity_views, + ) + + @staticmethod + def _construct_dataframe( + feature_ids: List[str], + entity_views: List[ + gca_featurestore_online_service.ReadFeatureValuesResponse.EntityView + ], + ) -> "pd.DataFrame": # noqa: F821 - skip check for undefined name 'pd' + """Constructs a dataframe using the header and entity_views + + Args: + feature_ids (List[str]): + Required. A list of feature ids corresponding to the feature values for each entity in entity_views. + entity_views (List[gca_featurestore_online_service.ReadFeatureValuesResponse.EntityView]): + Required. A list of Entity views with Feature values. + For each Entity view, it may be + the entity in the Featurestore if values for all + Features were requested, or a projection of the + entity in the Featurestore if values for only + some Features were requested. + + Raises: + ImportError: If pandas is not installed when using this method. + + Returns: + pd.DataFrame - entities feature values in DataFrame + ) + """ + + try: + import pandas as pd + except ImportError: + raise ImportError( + f"Pandas is not installed. Please install pandas to use " + f"{EntityType._construct_dataframe.__name__}" + ) + + data = [] + for entity_view in entity_views: + entity_data = {"entity_id": entity_view.entity_id} + for feature_id, feature_data in zip(feature_ids, entity_view.data): + if feature_data._pb.HasField("value"): + value_type = feature_data.value._pb.WhichOneof("value") + feature_value = getattr(feature_data.value, value_type) + if hasattr(feature_value, "values"): + feature_value = feature_value.values + entity_data[feature_id] = feature_value + else: + entity_data[feature_id] = None + data.append(entity_data) + + return pd.DataFrame(data=data, columns=["entity_id"] + feature_ids) diff --git a/google/cloud/aiplatform/featurestore/feature.py b/google/cloud/aiplatform/featurestore/feature.py index d41344f086..08c91cdb1a 100644 --- a/google/cloud/aiplatform/featurestore/feature.py +++ b/google/cloud/aiplatform/featurestore/feature.py @@ -35,7 +35,6 @@ class Feature(base.VertexAiResourceNounWithFutureManager): client_class = utils.FeaturestoreClientWithOverride - _is_client_prediction_client = False _resource_noun = "features" _getter_method = "get_feature" _list_method = "list_features" diff --git a/google/cloud/aiplatform/featurestore/featurestore.py b/google/cloud/aiplatform/featurestore/featurestore.py index d799e22963..4b98ccfd7d 100644 --- a/google/cloud/aiplatform/featurestore/featurestore.py +++ b/google/cloud/aiplatform/featurestore/featurestore.py @@ -35,7 +35,6 @@ class Featurestore(base.VertexAiResourceNounWithFutureManager): client_class = utils.FeaturestoreClientWithOverride - _is_client_prediction_client = False _resource_noun = "featurestores" _getter_method = "get_featurestore" _list_method = "list_featurestores" diff --git a/tests/system/aiplatform/test_featurestore.py b/tests/system/aiplatform/test_featurestore.py index 65850f7d67..d22119ea22 100644 --- a/tests/system/aiplatform/test_featurestore.py +++ b/tests/system/aiplatform/test_featurestore.py @@ -20,6 +20,8 @@ from google.cloud import aiplatform from tests.system.aiplatform import e2e_base +import pandas as pd + _TEST_USERS_ENTITY_TYPE_GCS_SRC = ( "gs://cloud-samples-data-us-central1/vertex-ai/feature-store/datasets/users.avro" ) @@ -247,3 +249,19 @@ def test_search_features(self, shared_state): assert ( len(list_searched_features) - shared_state["base_list_searched_features"] ) == 6 + + def test_online_reads(self, shared_state): + assert shared_state["user_entity_type"] + assert shared_state["movie_entity_type"] + + user_entity_type = shared_state["user_entity_type"] + movie_entity_type = shared_state["movie_entity_type"] + + user_entity_views = user_entity_type.read(entity_ids="alice") + assert type(user_entity_views) == pd.DataFrame + + movie_entity_views = movie_entity_type.read( + entity_ids=["movie_01", "movie_04"], + feature_ids=[_TEST_MOVIE_TITLE_FEATURE_ID, _TEST_MOVIE_GENRES_FEATURE_ID], + ) + assert type(movie_entity_views) == pd.DataFrame diff --git a/tests/unit/aiplatform/test_featurestores.py b/tests/unit/aiplatform/test_featurestores.py index f76e6ecf22..a92043969e 100644 --- a/tests/unit/aiplatform/test_featurestores.py +++ b/tests/unit/aiplatform/test_featurestores.py @@ -15,8 +15,10 @@ # limitations under the License. # +import copy import pytest import datetime +import pandas as pd from unittest import mock from importlib import reload @@ -34,13 +36,19 @@ from google.cloud.aiplatform_v1.services.featurestore_service import ( client as featurestore_service_client, ) +from google.cloud.aiplatform_v1.services.featurestore_online_serving_service import ( + client as featurestore_online_serving_service_client, +) from google.cloud.aiplatform_v1.types import ( encryption_spec as gca_encryption_spec, entity_type as gca_entity_type, feature as gca_feature, + feature_selector as gca_feature_selector, featurestore as gca_featurestore, featurestore_service as gca_featurestore_service, + featurestore_online_service as gca_featurestore_online_service, io as gca_io, + types as gca_types, ) # project @@ -68,10 +76,76 @@ _TEST_FEATURE_ID = "feature_id" _TEST_FEATURE_NAME = f"{_TEST_ENTITY_TYPE_NAME}/features/{_TEST_FEATURE_ID}" _TEST_FEATURE_INVALID = f"{_TEST_ENTITY_TYPE_NAME}/feature/{_TEST_FEATURE_ID}" -_TEST_FEATURE_VALUE_TYPE = "INT64" +_TEST_FEATURE_VALUE_TYPE_STR = "INT64" +_TEST_FEATURE_VALUE = 99 _TEST_FEATURE_VALUE_TYPE_ENUM = 9 _TEST_FEATURE_ID_INVALID = "1feature_id" +_TEST_BOOL_TYPE = gca_feature.Feature.ValueType.BOOL +_TEST_BOOL_ARR_TYPE = gca_feature.Feature.ValueType.BOOL_ARRAY +_TEST_DOUBLE_TYPE = gca_feature.Feature.ValueType.DOUBLE +_TEST_DOUBLE_ARR_TYPE = gca_feature.Feature.ValueType.DOUBLE_ARRAY +_TEST_INT_TYPE = gca_feature.Feature.ValueType.INT64 +_TEST_INT_ARR_TYPE = gca_feature.Feature.ValueType.INT64_ARRAY +_TEST_STR_TYPE = gca_feature.Feature.ValueType.STRING +_TEST_STR_ARR_TYPE = gca_feature.Feature.ValueType.STRING_ARRAY +_TEST_BYTES_TYPE = gca_feature.Feature.ValueType.BYTES + +_FEATURE_VALUE_TYPE_KEYS = { + _TEST_BOOL_TYPE: "bool_value", + _TEST_BOOL_ARR_TYPE: "bool_array_value", + _TEST_DOUBLE_TYPE: "double_value", + _TEST_DOUBLE_ARR_TYPE: "double_array_value", + _TEST_INT_TYPE: "int64_value", + _TEST_INT_ARR_TYPE: "int64_array_value", + _TEST_STR_TYPE: "string_value", + _TEST_STR_ARR_TYPE: "string_array_value", + _TEST_BYTES_TYPE: "bytes_value", +} + +_TEST_FEATURE_VALUE_TYPE = _TEST_INT_TYPE + +_ARRAY_FEATURE_VALUE_TYPE_TO_GCA_TYPE_MAP = { + _TEST_BOOL_ARR_TYPE: gca_types.BoolArray, + _TEST_DOUBLE_ARR_TYPE: gca_types.DoubleArray, + _TEST_INT_ARR_TYPE: gca_types.Int64Array, + _TEST_STR_ARR_TYPE: gca_types.StringArray, +} + +_TEST_BOOL_COL = "bool_col" +_TEST_BOOL_ARR_COL = "bool_array_col" +_TEST_DOUBLE_COL = "double_col" +_TEST_DOUBLE_ARR_COL = "double_array_col" +_TEST_INT_COL = "int64_col" +_TEST_INT_ARR_COL = "int64_array_col" +_TEST_STR_COL = "string_col" +_TEST_STR_ARR_COL = "string_array_col" +_TEST_BYTES_COL = "bytes_col" + +_TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION = [ + _TEST_BOOL_COL, + _TEST_BOOL_ARR_COL, + _TEST_DOUBLE_COL, + _TEST_DOUBLE_ARR_COL, + _TEST_INT_COL, + _TEST_INT_ARR_COL, + _TEST_STR_COL, + _TEST_STR_ARR_COL, + _TEST_BYTES_COL, +] + +_TEST_FEATURE_VALUE_TYPES_FOR_DF_CONSTRUCTION = [ + _TEST_BOOL_TYPE, + _TEST_BOOL_ARR_TYPE, + _TEST_DOUBLE_TYPE, + _TEST_DOUBLE_ARR_TYPE, + _TEST_INT_TYPE, + _TEST_INT_ARR_TYPE, + _TEST_STR_TYPE, + _TEST_STR_ARR_TYPE, + _TEST_BYTES_TYPE, +] + # misc _TEST_DESCRIPTION = "my description" _TEST_LABELS = {"my_key": "my_value"} @@ -124,7 +198,7 @@ ] _TEST_FEATURE_CONFIGS = { - "my_feature_id_1": {"value_type": _TEST_FEATURE_VALUE_TYPE}, + "my_feature_id_1": {"value_type": _TEST_FEATURE_VALUE_TYPE_STR}, } _TEST_IMPORTING_FEATURE_IDS = ["my_feature_id_1"] @@ -155,6 +229,56 @@ gcs_source=gca_io.GcsSource(uris=_TEST_GCS_CSV_SOURCE_URIS) ) +_TEST_READ_ENTITY_ID = "entity_id_1" +_TEST_READ_ENTITY_IDS = ["entity_id_1"] + +_TEST_BASE_HEADER_PROTO = ( + gca_featurestore_online_service.ReadFeatureValuesResponse.Header() +) +_TEST_BASE_ENTITY_VIEW_PROTO = ( + gca_featurestore_online_service.ReadFeatureValuesResponse.EntityView() +) +_TEST_BASE_DATA_PROTO = ( + gca_featurestore_online_service.ReadFeatureValuesResponse.EntityView.Data() +) + + +def _get_header_proto(feature_ids): + header_proto = copy.deepcopy(_TEST_BASE_HEADER_PROTO) + header_proto.feature_descriptors = [ + gca_featurestore_online_service.ReadFeatureValuesResponse.FeatureDescriptor( + id=feature_id + ) + for feature_id in feature_ids + ] + return header_proto + + +def _get_data_proto(feature_value_type, feature_value): + data_proto = copy.deepcopy(_TEST_BASE_DATA_PROTO) + if feature_value is not None: + if feature_value_type in _ARRAY_FEATURE_VALUE_TYPE_TO_GCA_TYPE_MAP: + array_proto = _ARRAY_FEATURE_VALUE_TYPE_TO_GCA_TYPE_MAP[ + feature_value_type + ]() + array_proto.values = feature_value + feature_value = array_proto + data_proto.value = gca_featurestore_online_service.FeatureValue( + {_FEATURE_VALUE_TYPE_KEYS[feature_value_type]: feature_value} + ) + return data_proto + + +def _get_entity_view_proto(entity_id, feature_value_types, feature_values): + entity_view_proto = copy.deepcopy(_TEST_BASE_ENTITY_VIEW_PROTO) + entity_view_proto.entity_id = entity_id + entity_view_data = [] + for feature_value_type, feature_value in zip(feature_value_types, feature_values): + data = _get_data_proto(feature_value_type, feature_value) + entity_view_data.append(data) + entity_view_proto.data = entity_view_data + return entity_view_proto + # All Featurestore Mocks @pytest.fixture @@ -291,13 +415,53 @@ def import_feature_values_mock(): yield import_feature_values_mock +@pytest.fixture +def read_feature_values_mock(): + with patch.object( + featurestore_online_serving_service_client.FeaturestoreOnlineServingServiceClient, + "read_feature_values", + ) as read_feature_values_mock: + read_feature_values_mock.return_value = gca_featurestore_online_service.ReadFeatureValuesResponse( + header=_get_header_proto(feature_ids=[_TEST_FEATURE_ID]), + entity_view=_get_entity_view_proto( + entity_id=_TEST_READ_ENTITY_ID, + feature_value_types=[_TEST_FEATURE_VALUE_TYPE], + feature_values=[_TEST_FEATURE_VALUE], + ), + ) + yield read_feature_values_mock + + +@pytest.fixture +def streaming_read_feature_values_mock(): + with patch.object( + featurestore_online_serving_service_client.FeaturestoreOnlineServingServiceClient, + "streaming_read_feature_values", + ) as streaming_read_feature_values_mock: + streaming_read_feature_values_mock.return_value = [ + gca_featurestore_online_service.ReadFeatureValuesResponse( + header=_get_header_proto(feature_ids=[_TEST_FEATURE_ID]) + ), + gca_featurestore_online_service.ReadFeatureValuesResponse( + entity_view=_get_entity_view_proto( + entity_id=_TEST_READ_ENTITY_ID, + feature_value_types=[_TEST_FEATURE_VALUE_TYPE], + feature_values=[_TEST_FEATURE_VALUE], + ), + ), + ] + yield streaming_read_feature_values_mock + + # ALL Feature Mocks @pytest.fixture def get_feature_mock(): with patch.object( featurestore_service_client.FeaturestoreServiceClient, "get_feature" ) as get_feature_mock: - get_feature_mock.return_value = gca_feature.Feature(name=_TEST_FEATURE_NAME,) + get_feature_mock.return_value = gca_feature.Feature( + name=_TEST_FEATURE_NAME, value_type=_TEST_FEATURE_VALUE_TYPE + ) yield get_feature_mock @@ -442,7 +606,7 @@ def test_feature_config_return_create_feature_request(self): featureConfig = featurestore_utils._FeatureConfig( feature_id=_TEST_FEATURE_ID, - value_type=_TEST_FEATURE_VALUE_TYPE, + value_type=_TEST_FEATURE_VALUE_TYPE_STR, description=_TEST_DESCRIPTION, labels=_TEST_LABELS, ) @@ -462,7 +626,7 @@ def test_feature_config_return_create_feature_request(self): def test_feature_config_create_feature_request_raises_invalid_feature_id(self): featureConfig = featurestore_utils._FeatureConfig( feature_id=_TEST_FEATURE_ID_INVALID, - value_type=_TEST_FEATURE_VALUE_TYPE, + value_type=_TEST_FEATURE_VALUE_TYPE_STR, description=_TEST_DESCRIPTION, labels=_TEST_LABELS, ) @@ -840,7 +1004,7 @@ def test_create_feature(self, create_feature_mock, sync): my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) my_feature = my_entity_type.create_feature( feature_id=_TEST_FEATURE_ID, - value_type=_TEST_FEATURE_VALUE_TYPE, + value_type=_TEST_FEATURE_VALUE_TYPE_STR, description=_TEST_DESCRIPTION, labels=_TEST_LABELS, ) @@ -1054,6 +1218,453 @@ def test_ingest_from_gcs_with_invalid_gcs_source_type(self): gcs_source_type=_TEST_GCS_SOURCE_TYPE_INVALID, ) + @pytest.mark.usefixtures("get_entity_type_mock", "get_feature_mock") + def test_read_single_entity(self, read_feature_values_mock): + aiplatform.init(project=_TEST_PROJECT) + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + expected_read_feature_values_request = gca_featurestore_online_service.ReadFeatureValuesRequest( + entity_type=my_entity_type.resource_name, + entity_id=_TEST_READ_ENTITY_ID, + feature_selector=gca_feature_selector.FeatureSelector( + id_matcher=gca_feature_selector.IdMatcher(ids=["*"]) + ), + ) + result = my_entity_type.read(entity_ids=_TEST_READ_ENTITY_ID) + read_feature_values_mock.assert_called_once_with( + request=expected_read_feature_values_request, + metadata=_TEST_REQUEST_METADATA, + ) + assert type(result) == pd.DataFrame + assert len(result) == 1 + assert result.entity_id[0] == _TEST_READ_ENTITY_ID + assert result.get(_TEST_FEATURE_ID)[0] == _TEST_FEATURE_VALUE + + @pytest.mark.usefixtures("get_entity_type_mock", "get_feature_mock") + def test_read_multiple_entities(self, streaming_read_feature_values_mock): + aiplatform.init(project=_TEST_PROJECT) + my_entity_type = aiplatform.EntityType(entity_type_name=_TEST_ENTITY_TYPE_NAME) + expected_streaming_read_feature_values_request = gca_featurestore_online_service.StreamingReadFeatureValuesRequest( + entity_type=my_entity_type.resource_name, + entity_ids=_TEST_READ_ENTITY_IDS, + feature_selector=gca_feature_selector.FeatureSelector( + id_matcher=gca_feature_selector.IdMatcher(ids=[_TEST_FEATURE_ID]) + ), + ) + result = my_entity_type.read( + entity_ids=_TEST_READ_ENTITY_IDS, feature_ids=_TEST_FEATURE_ID + ) + streaming_read_feature_values_mock.assert_called_once_with( + request=expected_streaming_read_feature_values_request, + metadata=_TEST_REQUEST_METADATA, + ) + assert type(result) == pd.DataFrame + assert len(result) == 1 + assert result.entity_id[0] == _TEST_READ_ENTITY_ID + assert result.get(_TEST_FEATURE_ID)[0] == _TEST_FEATURE_VALUE + + @pytest.mark.parametrize( + "feature_ids, feature_value_types, entity_ids, feature_values, expected_df", + [ + ( + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + _TEST_FEATURE_VALUE_TYPES_FOR_DF_CONSTRUCTION, + ["entity_01", "entity_02"], + [ + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + True, + [True, True], + 2.2, + [2.2, 4.4], + 2, + [2, 3], + "test1", + ["test2", "test3"], + b"0", + ], + ], + pd.DataFrame( + data=[ + [ + "entity_01", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_02", + True, + [True, True], + 2.2, + [2.2, 4.4], + 2, + [2, 3], + "test1", + ["test2", "test3"], + b"0", + ], + ], + columns=["entity_id"] + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + ), + ), + ( + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + _TEST_FEATURE_VALUE_TYPES_FOR_DF_CONSTRUCTION, + ["entity_01", "entity_02"], + [ + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [None, None, None, None, None, None, None, None, None], + ], + pd.DataFrame( + data=[ + [ + "entity_01", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_02", + None, + None, + None, + None, + None, + None, + None, + None, + None, + ], + ], + columns=["entity_id"] + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + ), + ), + ( + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + _TEST_FEATURE_VALUE_TYPES_FOR_DF_CONSTRUCTION, + ["entity_01"], + [[None, None, None, None, None, None, None, None, None]], + pd.DataFrame( + data=[ + [ + "entity_01", + None, + None, + None, + None, + None, + None, + None, + None, + None, + ] + ], + columns=["entity_id"] + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + ), + ), + ( + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + _TEST_FEATURE_VALUE_TYPES_FOR_DF_CONSTRUCTION, + [ + "entity_01", + "entity_02", + "entity_03", + "entity_04", + "entity_05", + "entity_06", + "entity_07", + "entity_08", + "entity_09", + "entity_10", + ], + [ + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + None, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + False, + None, + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + False, + [True, False], + None, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + False, + [True, False], + 1.2, + None, + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + None, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + None, + "test", + ["test1", "test2"], + b"1", + ], + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + None, + ["test1", "test2"], + b"1", + ], + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + None, + b"1", + ], + [ + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + None, + ], + ], + pd.DataFrame( + data=[ + [ + "entity_01", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_02", + None, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_03", + False, + None, + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_04", + False, + [True, False], + None, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_05", + False, + [True, False], + 1.2, + None, + 1, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_06", + False, + [True, False], + 1.2, + [1.2, 3.4], + None, + [1, 2], + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_07", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + None, + "test", + ["test1", "test2"], + b"1", + ], + [ + "entity_08", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + None, + ["test1", "test2"], + b"1", + ], + [ + "entity_09", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + None, + b"1", + ], + [ + "entity_10", + False, + [True, False], + 1.2, + [1.2, 3.4], + 1, + [1, 2], + "test", + ["test1", "test2"], + None, + ], + ], + columns=["entity_id"] + _TEST_FEATURE_IDS_FOR_DF_CONSTRUCTION, + ), + ), + ], + ) + def test_construct_dataframe( + self, feature_ids, feature_value_types, entity_ids, feature_values, expected_df, + ): + entity_views = [ + _get_entity_view_proto( + entity_id=entity_id, + feature_value_types=feature_value_types, + feature_values=entity_feature_values, + ) + for (entity_id, entity_feature_values) in zip(entity_ids, feature_values) + ] + df = aiplatform.EntityType._construct_dataframe( + feature_ids=feature_ids, entity_views=entity_views + ) + assert df.equals(expected_df) + class TestFeature: def setup_method(self): @@ -1180,7 +1791,7 @@ def test_create_feature(self, create_feature_mock, sync): my_feature = aiplatform.Feature.create( feature_id=_TEST_FEATURE_ID, - value_type=_TEST_FEATURE_VALUE_TYPE, + value_type=_TEST_FEATURE_VALUE_TYPE_STR, entity_type_name=_TEST_ENTITY_TYPE_ID, featurestore_id=_TEST_FEATURESTORE_ID, description=_TEST_DESCRIPTION,