From 75ecfa6e9b1149ce4a489f1342df6ded2b5a83d0 Mon Sep 17 00:00:00 2001 From: tanlocnguyen Date: Thu, 4 Apr 2024 09:46:10 +0700 Subject: [PATCH] feat: Add decimal datatype to spark mapping to feast data type Signed-off-by: tanlocnguyen --- .github/workflows/pr_integration_tests.yml | 2 +- .../workflows/pr_local_integration_tests.yml | 7 +- .github/workflows/unit_tests.yml | 13 +- Makefile | 37 +- java/CONTRIBUTING.md | 4 +- java/pom.xml | 65 +- java/serving/.gitignore | 5 +- java/serving/pom.xml | 27 +- .../serving/registry/AzureRegistryFile.java | 57 + .../service/config/ApplicationProperties.java | 9 + .../service/config/RegistryConfigModule.java | 23 +- .../it/ServingRedisAzureRegistryIT.java | 105 ++ .../serving/it/ServingRedisGSRegistryIT.java | 74 +- .../serving/it/ServingRedisS3RegistryIT.java | 18 +- protos/feast/core/OnDemandFeatureView.proto | 16 +- protos/feast/core/Registry.proto | 2 - protos/feast/core/RequestFeatureView.proto | 51 - protos/feast/core/StreamFeatureView.proto | 7 +- protos/feast/core/Transformation.proto | 32 + protos/feast/registry/RegistryServer.proto | 22 - .../docs/source/feast.protos.feast.core.rst | 16 - sdk/python/docs/source/feast.rst | 8 - sdk/python/feast/__init__.py | 2 - sdk/python/feast/base_feature_view.py | 15 +- sdk/python/feast/cli.py | 1 - sdk/python/feast/constants.py | 3 + sdk/python/feast/diff/registry_diff.py | 27 +- sdk/python/feast/embedded_go/__init__.py | 0 .../embedded_go/online_features_service.py | 5 + sdk/python/feast/feast_object.py | 4 - sdk/python/feast/feature_server.py | 65 +- sdk/python/feast/feature_store.py | 215 +--- sdk/python/feast/infra/contrib/__init__.py | 0 .../feast/infra/feature_servers/__init__.py | 0 .../feast/infra/materialization/__init__.py | 0 .../infra/materialization/contrib/__init__.py | 0 .../bytewax/bytewax_materialization_engine.py | 2 +- .../contrib/ibis_offline_store/ibis.py | 248 ++-- .../infra/offline_stores/offline_store.py | 8 + .../feast/infra/registry/base_registry.py | 86 +- .../feast/infra/registry/caching_registry.py | 29 - .../feast/infra/registry/contrib/__init__.py | 0 .../registry/contrib/postgres/__init__.py | 0 .../infra/registry/proto_registry_utils.py | 24 - sdk/python/feast/infra/registry/registry.py | 47 +- sdk/python/feast/infra/registry/remote.py | 26 - sdk/python/feast/infra/registry/snowflake.py | 48 +- sdk/python/feast/infra/registry/sql.py | 50 +- .../feast/infra/utils/snowflake/__init__.py | 0 .../utils/snowflake/registry/__init__.py | 0 .../registry/snowflake_table_creation.sql | 9 - .../registry/snowflake_table_deletion.sql | 2 - sdk/python/feast/on_demand_feature_view.py | 289 ++++- sdk/python/feast/registry_server.py | 17 - sdk/python/feast/repo_contents.py | 5 - sdk/python/feast/repo_operations.py | 8 - sdk/python/feast/request_feature_view.py | 137 --- sdk/python/feast/stream_feature_view.py | 33 +- sdk/python/feast/transformation/__init__.py | 0 .../pandas_transformation.py} | 27 +- .../transformation/python_transformation.py | 65 + .../substrait_transformation.py} | 22 +- sdk/python/feast/transformation_server.py | 5 + sdk/python/feast/type_map.py | 8 +- sdk/python/feast/ui/yarn.lock | 55 +- sdk/python/pytest.ini | 6 +- .../requirements/py3.10-ci-requirements.txt | 44 +- .../requirements/py3.10-requirements.txt | 6 +- .../requirements/py3.8-ci-requirements.txt | 1053 ----------------- .../requirements/py3.8-requirements.txt | 214 ---- .../requirements/py3.9-ci-requirements.txt | 44 +- .../requirements/py3.9-requirements.txt | 6 +- sdk/python/tests/conftest.py | 3 - .../feature_repos/universal/feature_views.py | 6 +- .../integration/registration/test_registry.py | 60 +- .../tests/unit/diff/test_registry_diff.py | 5 +- .../unit/infra/offline_stores/test_ibis.py | 138 +++ .../unit/infra/test_inference_unit_tests.py | 69 +- sdk/python/tests/unit/test_feature_views.py | 205 +--- .../tests/unit/test_on_demand_feature_view.py | 161 ++- .../tests/unit/test_stream_feature_view.py | 252 ++++ ...on.py => test_substrait_transformation.py} | 50 +- setup.py | 4 +- .../OnDemandFeatureViewOverviewTab.tsx | 2 +- 84 files changed, 1913 insertions(+), 2602 deletions(-) create mode 100644 java/serving/src/main/java/feast/serving/registry/AzureRegistryFile.java create mode 100644 java/serving/src/test/java/feast/serving/it/ServingRedisAzureRegistryIT.java delete mode 100644 protos/feast/core/RequestFeatureView.proto create mode 100644 protos/feast/core/Transformation.proto create mode 100644 sdk/python/feast/embedded_go/__init__.py create mode 100644 sdk/python/feast/infra/contrib/__init__.py create mode 100644 sdk/python/feast/infra/feature_servers/__init__.py create mode 100644 sdk/python/feast/infra/materialization/__init__.py create mode 100644 sdk/python/feast/infra/materialization/contrib/__init__.py create mode 100644 sdk/python/feast/infra/registry/contrib/__init__.py create mode 100644 sdk/python/feast/infra/registry/contrib/postgres/__init__.py create mode 100644 sdk/python/feast/infra/utils/snowflake/__init__.py create mode 100644 sdk/python/feast/infra/utils/snowflake/registry/__init__.py delete mode 100644 sdk/python/feast/request_feature_view.py create mode 100644 sdk/python/feast/transformation/__init__.py rename sdk/python/feast/{on_demand_pandas_transformation.py => transformation/pandas_transformation.py} (57%) create mode 100644 sdk/python/feast/transformation/python_transformation.py rename sdk/python/feast/{on_demand_substrait_transformation.py => transformation/substrait_transformation.py} (54%) delete mode 100644 sdk/python/requirements/py3.8-ci-requirements.txt delete mode 100644 sdk/python/requirements/py3.8-requirements.txt create mode 100644 sdk/python/tests/unit/infra/offline_stores/test_ibis.py create mode 100644 sdk/python/tests/unit/test_stream_feature_view.py rename sdk/python/tests/unit/{test_on_demand_substrait_transformation.py => test_substrait_transformation.py} (72%) diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index b335c0f042f..5e7287351b6 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -167,4 +167,4 @@ jobs: SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} - run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread + run: make test-python-integration \ No newline at end of file diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml index 17ff54b1f84..266cdcc9b9f 100644 --- a/.github/workflows/pr_local_integration_tests.yml +++ b/.github/workflows/pr_local_integration_tests.yml @@ -61,9 +61,4 @@ jobs: run: make install-python-ci-dependencies - name: Test local integration tests if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak - env: - FEAST_USAGE: "False" - IS_TEST: "True" - FEAST_LOCAL_ONLINE_CONTAINER: "True" - FEAST_IS_LOCAL_TEST: "True" - run: pytest -n 8 --cov=./ --cov-report=xml --color=yes --integration -k "not gcs_registry and not s3_registry and not test_lambda_materialization and not test_snowflake_materialization" sdk/python/tests + run: make test-python-integration-local diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 7e2e3b577af..f3f91bb67f3 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -23,17 +23,6 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - name: Install mysql on macOS - if: startsWith(matrix.os, 'macOS') - run: | - brew install mysql - PATH=$PATH:/usr/local/mysql/bin - - name: Work around Homebrew MySQL being broken - # See https://github.com/Homebrew/homebrew-core/issues/130258 for more details. - if: startsWith(matrix.os, 'macOS') - run: | - brew install zlib - ln -sv $(brew --prefix zlib)/lib/libz.dylib $(brew --prefix)/lib/libzlib.dylib - name: Get pip cache dir id: pip-cache run: | @@ -56,7 +45,7 @@ jobs: - name: Install dependencies run: make install-python-ci-dependencies - name: Test Python - run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests + run: make test-python-unit unit-test-ui: diff --git a/Makefile b/Makefile index 0eac7e03a24..d232d9c93fc 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ format: format-python format-java lint: lint-python lint-java -test: test-python test-java +test: test-python-unit test-java protos: compile-protos-python compile-protos-docs @@ -63,32 +63,26 @@ benchmark-python: benchmark-python-local: FEAST_USAGE=False IS_TEST=True FEAST_IS_LOCAL_TEST=True python -m pytest --integration --benchmark --benchmark-autosave --benchmark-save-data sdk/python/tests -test-python: - FEAST_USAGE=False \ - IS_TEST=True \ - python -m pytest -n 8 sdk/python/tests \ +test-python-unit: + python -m pytest -n 8 --color=yes sdk/python/tests test-python-integration: - FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration sdk/python/tests + python -m pytest -n 8 --integration -k "not minio_registry" --color=yes --durations=5 --timeout=1200 --timeout_method=thread sdk/python/tests test-python-integration-local: @(docker info > /dev/null 2>&1 && \ - FEAST_USAGE=False \ - IS_TEST=True \ FEAST_IS_LOCAL_TEST=True \ FEAST_LOCAL_ONLINE_CONTAINER=True \ - python -m pytest -n 8 --integration \ + python -m pytest -n 8 --color=yes --integration \ -k "not gcs_registry and \ not s3_registry and \ not test_lambda_materialization and \ - not test_snowflake" \ + not test_snowflake_materialization" \ sdk/python/tests \ ) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!"; test-python-integration-container: @(docker info > /dev/null 2>&1 && \ - FEAST_USAGE=False \ - IS_TEST=True \ FEAST_LOCAL_ONLINE_CONTAINER=True \ python -m pytest -n 8 --integration sdk/python/tests \ ) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!"; @@ -97,7 +91,6 @@ test-python-universal-spark: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.spark_repo_configuration \ PYTEST_PLUGINS=feast.infra.offline_stores.contrib.spark_offline_store.tests \ - FEAST_USAGE=False IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_historical_retrieval_fails_on_validation and \ not test_historical_retrieval_with_validation and \ @@ -121,7 +114,6 @@ test-python-universal-trino: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.trino_repo_configuration \ PYTEST_PLUGINS=feast.infra.offline_stores.contrib.trino_offline_store.tests \ - FEAST_USAGE=False IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_historical_retrieval_fails_on_validation and \ not test_historical_retrieval_with_validation and \ @@ -148,7 +140,6 @@ test-python-universal-mssql: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.mssql_repo_configuration \ PYTEST_PLUGINS=feast.infra.offline_stores.contrib.mssql_offline_store.tests \ - FEAST_USAGE=False IS_TEST=True \ FEAST_LOCAL_ONLINE_CONTAINER=True \ python -m pytest -n 8 --integration \ -k "not gcs_registry and \ @@ -166,7 +157,6 @@ test-python-universal-athena: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.athena_repo_configuration \ PYTEST_PLUGINS=feast.infra.offline_stores.contrib.athena_offline_store.tests \ - FEAST_USAGE=False IS_TEST=True \ ATHENA_REGION=ap-northeast-2 \ ATHENA_DATA_SOURCE=AwsDataCatalog \ ATHENA_DATABASE=default \ @@ -190,7 +180,6 @@ test-python-universal-athena: test-python-universal-duckdb: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.duckdb_repo_configuration \ - FEAST_USAGE=False IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_nullable_online_store and \ not gcs_registry and \ @@ -204,8 +193,6 @@ test-python-universal-postgres-offline: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.postgres_repo_configuration \ PYTEST_PLUGINS=sdk.python.feast.infra.offline_stores.contrib.postgres_offline_store.tests \ - FEAST_USAGE=False \ - IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_historical_retrieval_with_validation and \ not test_historical_features_persisting and \ @@ -226,8 +213,6 @@ test-python-universal-postgres-online: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.online_stores.contrib.postgres_repo_configuration \ PYTEST_PLUGINS=sdk.python.feast.infra.offline_stores.contrib.postgres_offline_store.tests \ - FEAST_USAGE=False \ - IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_universal_cli and \ not test_go_feature_server and \ @@ -247,8 +232,6 @@ test-python-universal-postgres-online: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.online_stores.contrib.mysql_repo_configuration \ PYTEST_PLUGINS=sdk.python.tests.integration.feature_repos.universal.online_store.mysql \ - FEAST_USAGE=False \ - IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_universal_cli and \ not test_go_feature_server and \ @@ -268,8 +251,6 @@ test-python-universal-cassandra: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.online_stores.contrib.cassandra_repo_configuration \ PYTEST_PLUGINS=sdk.python.tests.integration.feature_repos.universal.online_store.cassandra \ - FEAST_USAGE=False \ - IS_TEST=True \ python -m pytest -x --integration \ sdk/python/tests @@ -277,8 +258,6 @@ test-python-universal-hazelcast: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.online_stores.contrib.hazelcast_repo_configuration \ PYTEST_PLUGINS=sdk.python.tests.integration.feature_repos.universal.online_store.hazelcast \ - FEAST_USAGE=False \ - IS_TEST=True \ python -m pytest -n 8 --integration \ -k "not test_universal_cli and \ not test_go_feature_server and \ @@ -298,8 +277,6 @@ test-python-universal-cassandra-no-cloud-providers: PYTHONPATH='.' \ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.online_stores.contrib.cassandra_repo_configuration \ PYTEST_PLUGINS=sdk.python.tests.integration.feature_repos.universal.online_store.cassandra \ - FEAST_USAGE=False \ - IS_TEST=True \ python -m pytest -x --integration \ -k "not test_lambda_materialization_consistency and \ not test_apply_entity_integration and \ @@ -314,7 +291,7 @@ test-python-universal-cassandra-no-cloud-providers: sdk/python/tests test-python-universal: - FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration sdk/python/tests + python -m pytest -n 8 --integration sdk/python/tests format-python: # Sort diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md index 65d43d0de51..6d53c7b5c24 100644 --- a/java/CONTRIBUTING.md +++ b/java/CONTRIBUTING.md @@ -50,7 +50,7 @@ Automatically format the code to conform the style guide by: ```sh # formats all code in the feast-java repository -mvn spotless:apply +make format-java ``` > If you're using IntelliJ, you can import these [code style settings](https://github.com/google/styleguide/blob/gh-pages/intellij-java-google-style.xml) @@ -66,7 +66,7 @@ Run all Unit tests: make test-java ``` -Run all Integration tests (note: this also runs GCS + S3 based tests which should fail): +Run all Integration tests: ``` make test-java-integration ``` diff --git a/java/pom.xml b/java/pom.xml index 59c67337842..ccb33125962 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -68,6 +68,8 @@ 0.21.0 1.6.6 30.1-jre + 3.4.34 + 4.1.101.Final ${javax.validation.version} + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + + io.netty + netty-common + ${netty.version} + + + io.netty + netty-buffer + ${netty.version} + + + io.netty + netty-handler + ${netty.version} + + + io.netty + netty-transport + ${netty.version} + + + + io.projectreactor + reactor-core + ${reactor.version} + + org.junit.platform junit-platform-engine @@ -246,7 +291,7 @@ - ${license.content} + ${license.content} 1.7 @@ -264,15 +309,15 @@ - - - spotless-check - process-test-classes - - check - - - + + + spotless-check + process-test-classes + + check + + + org.apache.maven.plugins diff --git a/java/serving/.gitignore b/java/serving/.gitignore index 6c6b6d8d8f8..750b7f498bc 100644 --- a/java/serving/.gitignore +++ b/java/serving/.gitignore @@ -34,4 +34,7 @@ feast-serving.jar /.nb-gradle/ ## Feast Temporary Files ## -/temp/ \ No newline at end of file +/temp/ + +## Generated test data ## +**/*.parquet \ No newline at end of file diff --git a/java/serving/pom.xml b/java/serving/pom.xml index 19e54e1362b..6929d65d934 100644 --- a/java/serving/pom.xml +++ b/java/serving/pom.xml @@ -16,8 +16,8 @@ ~ --> + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> 4.0.0 @@ -121,6 +121,19 @@ 5.0.1 + + + + com.azure + azure-storage-blob + 12.25.2 + + + com.azure + azure-identity + 1.11.3 + + org.slf4j @@ -356,11 +369,11 @@ 2.7.4 test - - io.lettuce - lettuce-core - 6.0.2.RELEASE - + + io.lettuce + lettuce-core + 6.0.2.RELEASE + org.apache.commons commons-lang3 diff --git a/java/serving/src/main/java/feast/serving/registry/AzureRegistryFile.java b/java/serving/src/main/java/feast/serving/registry/AzureRegistryFile.java new file mode 100644 index 00000000000..72f6d476d58 --- /dev/null +++ b/java/serving/src/main/java/feast/serving/registry/AzureRegistryFile.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2021 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.registry; + +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobServiceClient; +import com.google.protobuf.InvalidProtocolBufferException; +import feast.proto.core.RegistryProto; +import java.util.Objects; +import java.util.Optional; + +public class AzureRegistryFile implements RegistryFile { + private final BlobClient blobClient; + private String lastKnownETag; + + public AzureRegistryFile(BlobServiceClient blobServiceClient, String url) { + String[] split = url.replace("az://", "").split("/"); + String objectPath = String.join("/", java.util.Arrays.copyOfRange(split, 1, split.length)); + this.blobClient = blobServiceClient.getBlobContainerClient(split[0]).getBlobClient(objectPath); + } + + @Override + public RegistryProto.Registry getContent() { + try { + return RegistryProto.Registry.parseFrom(blobClient.downloadContent().toBytes()); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException( + String.format( + "Couldn't read remote registry: %s. Error: %s", + blobClient.getBlobUrl(), e.getMessage())); + } + } + + @Override + public Optional getContentIfModified() { + String eTag = blobClient.getProperties().getETag(); + if (Objects.equals(eTag, this.lastKnownETag)) { + return Optional.empty(); + } else this.lastKnownETag = eTag; + + return Optional.of(getContent()); + } +} diff --git a/java/serving/src/main/java/feast/serving/service/config/ApplicationProperties.java b/java/serving/src/main/java/feast/serving/service/config/ApplicationProperties.java index 7cef10e61a8..91c5440cb71 100644 --- a/java/serving/src/main/java/feast/serving/service/config/ApplicationProperties.java +++ b/java/serving/src/main/java/feast/serving/service/config/ApplicationProperties.java @@ -95,6 +95,7 @@ public static class FeastProperties { private String gcpProject; private String awsRegion; private String transformationServiceEndpoint; + private String azureStorageAccount; public String getRegistry() { return registry; @@ -205,6 +206,14 @@ public String getTransformationServiceEndpoint() { public void setTransformationServiceEndpoint(String transformationServiceEndpoint) { this.transformationServiceEndpoint = transformationServiceEndpoint; } + + public String getAzureStorageAccount() { + return azureStorageAccount; + } + + public void setAzureStorageAccount(String azureStorageAccount) { + this.azureStorageAccount = azureStorageAccount; + } } /** Store configuration class for database that this Feast Serving uses. */ diff --git a/java/serving/src/main/java/feast/serving/service/config/RegistryConfigModule.java b/java/serving/src/main/java/feast/serving/service/config/RegistryConfigModule.java index cfb4666f07a..5ab951c71cb 100644 --- a/java/serving/src/main/java/feast/serving/service/config/RegistryConfigModule.java +++ b/java/serving/src/main/java/feast/serving/service/config/RegistryConfigModule.java @@ -18,6 +18,9 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import com.google.inject.AbstractModule; @@ -43,11 +46,27 @@ public AmazonS3 awsStorage(ApplicationProperties applicationProperties) { .build(); } + @Provides + public BlobServiceClient azureStorage(ApplicationProperties applicationProperties) { + + BlobServiceClient blobServiceClient = + new BlobServiceClientBuilder() + .endpoint( + String.format( + "https://%s.blob.core.windows.net", + applicationProperties.getFeast().getAzureStorageAccount())) + .credential(new DefaultAzureCredentialBuilder().build()) + .buildClient(); + + return blobServiceClient; + } + @Provides RegistryFile registryFile( ApplicationProperties applicationProperties, Provider storageProvider, - Provider amazonS3Provider) { + Provider amazonS3Provider, + Provider azureProvider) { String registryPath = applicationProperties.getFeast().getRegistry(); Optional scheme = Optional.ofNullable(URI.create(registryPath).getScheme()); @@ -57,6 +76,8 @@ RegistryFile registryFile( return new GSRegistryFile(storageProvider.get(), registryPath); case "s3": return new S3RegistryFile(amazonS3Provider.get(), registryPath); + case "az": + return new AzureRegistryFile(azureProvider.get(), registryPath); case "": case "file": return new LocalRegistryFile(registryPath); diff --git a/java/serving/src/test/java/feast/serving/it/ServingRedisAzureRegistryIT.java b/java/serving/src/test/java/feast/serving/it/ServingRedisAzureRegistryIT.java new file mode 100644 index 00000000000..8ab658fc2a1 --- /dev/null +++ b/java/serving/src/test/java/feast/serving/it/ServingRedisAzureRegistryIT.java @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2021 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.it; + +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.common.StorageSharedKeyCredential; +import com.google.inject.AbstractModule; +import com.google.inject.Provides; +import feast.proto.core.RegistryProto; +import feast.serving.service.config.ApplicationProperties; +import java.io.ByteArrayInputStream; +import org.junit.jupiter.api.BeforeAll; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Container; + +public class ServingRedisAzureRegistryIT extends ServingBaseTests { + private static final String TEST_ACCOUNT_NAME = "devstoreaccount1"; + private static final String TEST_ACCOUNT_KEY = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + private static final int BLOB_STORAGE_PORT = 10000; + private static final String TEST_CONTAINER = "test-container"; + private static final StorageSharedKeyCredential CREDENTIAL = + new StorageSharedKeyCredential(TEST_ACCOUNT_NAME, TEST_ACCOUNT_KEY); + + @Container + static final GenericContainer azureBlobMock = + new GenericContainer<>("mcr.microsoft.com/azure-storage/azurite:latest") + .waitingFor(Wait.forLogMessage("Azurite Blob service successfully listens on.*", 1)) + .withExposedPorts(BLOB_STORAGE_PORT) + .withCommand("azurite-blob", "--blobHost", "0.0.0.0"); + + private static BlobServiceClient createClient() { + return new BlobServiceClientBuilder() + .endpoint( + String.format( + "http://localhost:%d/%s", + azureBlobMock.getMappedPort(BLOB_STORAGE_PORT), TEST_ACCOUNT_NAME)) + .credential(CREDENTIAL) + .buildClient(); + } + + private static void putToStorage(RegistryProto.Registry registry) { + BlobServiceClient client = createClient(); + BlobClient blobClient = + client.getBlobContainerClient(TEST_CONTAINER).getBlobClient("registry.db"); + + blobClient.upload(new ByteArrayInputStream(registry.toByteArray())); + } + + @BeforeAll + static void setUp() { + BlobServiceClient client = createClient(); + client.createBlobContainer(TEST_CONTAINER); + + putToStorage(registryProto); + } + + @Override + ApplicationProperties.FeastProperties createFeastProperties() { + final ApplicationProperties.FeastProperties feastProperties = + TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); + feastProperties.setRegistry(String.format("az://%s/registry.db", TEST_CONTAINER)); + + return feastProperties; + } + + @Override + void updateRegistryFile(RegistryProto.Registry registry) { + putToStorage(registry); + } + + @Override + AbstractModule registryConfig() { + return new AbstractModule() { + @Provides + public BlobServiceClient awsStorage() { + return new BlobServiceClientBuilder() + .endpoint( + String.format( + "http://localhost:%d/%s", + azureBlobMock.getMappedPort(BLOB_STORAGE_PORT), TEST_ACCOUNT_NAME)) + .credential(CREDENTIAL) + .buildClient(); + } + }; + } +} diff --git a/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java b/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java index 925f1887d27..96aa2077c0f 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java +++ b/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java @@ -16,47 +16,54 @@ */ package feast.serving.it; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; - +import com.google.auth.oauth2.AccessToken; +import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.storage.*; -import com.google.cloud.storage.testing.RemoteStorageHelper; +import com.google.inject.AbstractModule; +import com.google.inject.Provides; import feast.proto.core.RegistryProto; import feast.serving.service.config.ApplicationProperties; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.junit.jupiter.Container; public class ServingRedisGSRegistryIT extends ServingBaseTests { - static Storage storage = - RemoteStorageHelper.create() - .getOptions() - .toBuilder() - .setProjectId(System.getProperty("GCP_PROJECT", "kf-feast")) - .build() - .getService(); - static final String bucket = RemoteStorageHelper.generateBucketName(); + private static final String TEST_PROJECT = "test-project"; + private static final String TEST_BUCKET = "test-bucket"; + private static final BlobId blobId = BlobId.of(TEST_BUCKET, "registry.db");; + private static final int GCS_PORT = 4443; - static void putToStorage(BlobId blobId, RegistryProto.Registry registry) { - storage.create(BlobInfo.newBuilder(blobId).build(), registry.toByteArray()); + @Container + static final GenericContainer gcsMock = + new GenericContainer<>("fsouza/fake-gcs-server") + .withExposedPorts(GCS_PORT) + .withCreateContainerCmdModifier( + cmd -> cmd.withEntrypoint("/bin/fake-gcs-server", "-scheme", "http")); - assertArrayEquals(storage.get(blobId).getContent(), registry.toByteArray()); - } + public static final AccessToken credential = new AccessToken("test-token", null); - static BlobId blobId; + static void putToStorage(RegistryProto.Registry registry) { + Storage gcsClient = createClient(); + + gcsClient.create(BlobInfo.newBuilder(blobId).build(), registry.toByteArray()); + } @BeforeAll static void setUp() { - storage.create(BucketInfo.of(bucket)); - blobId = BlobId.of(bucket, "registry.db"); + Storage gcsClient = createClient(); + gcsClient.create(BucketInfo.of(TEST_BUCKET)); - putToStorage(blobId, registryProto); + putToStorage(registryProto); } - @AfterAll - static void tearDown() throws ExecutionException, InterruptedException { - RemoteStorageHelper.forceDelete(storage, bucket, 5, TimeUnit.SECONDS); + private static Storage createClient() { + return StorageOptions.newBuilder() + .setProjectId(TEST_PROJECT) + .setCredentials(ServiceAccountCredentials.create(credential)) + .setHost("http://localhost:" + gcsMock.getMappedPort(GCS_PORT)) + .build() + .getService(); } @Override @@ -71,6 +78,21 @@ ApplicationProperties.FeastProperties createFeastProperties() { @Override void updateRegistryFile(RegistryProto.Registry registry) { - putToStorage(blobId, registry); + putToStorage(registry); + } + + @Override + AbstractModule registryConfig() { + return new AbstractModule() { + @Provides + Storage googleStorage(ApplicationProperties applicationProperties) { + return StorageOptions.newBuilder() + .setProjectId(TEST_PROJECT) + .setCredentials(ServiceAccountCredentials.create(credential)) + .setHost("http://localhost:" + gcsMock.getMappedPort(GCS_PORT)) + .build() + .getService(); + } + }; } } diff --git a/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java b/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java index 12315c9e484..52e1af90655 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java +++ b/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java @@ -17,6 +17,8 @@ package feast.serving.it; import com.adobe.testing.s3mock.testcontainers.S3MockContainer; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; @@ -30,13 +32,18 @@ import org.testcontainers.junit.jupiter.Container; public class ServingRedisS3RegistryIT extends ServingBaseTests { + private static final String TEST_REGION = "us-east-1"; + private static final String TEST_BUCKET = "test-bucket"; @Container static final S3MockContainer s3Mock = new S3MockContainer("2.2.3"); + private static final AWSStaticCredentialsProvider credentials = + new AWSStaticCredentialsProvider(new BasicAWSCredentials("anyAccessKey", "anySecretKey")); private static AmazonS3 createClient() { return AmazonS3ClientBuilder.standard() .withEndpointConfiguration( new AwsClientBuilder.EndpointConfiguration( - String.format("http://localhost:%d", s3Mock.getHttpServerPort()), "us-east-1")) + String.format("http://localhost:%d", s3Mock.getHttpServerPort()), TEST_REGION)) + .withCredentials(credentials) .enablePathStyleAccess() .build(); } @@ -48,13 +55,13 @@ private static void putToStorage(RegistryProto.Registry proto) { metadata.setContentType("application/protobuf"); AmazonS3 s3Client = createClient(); - s3Client.putObject("test-bucket", "registry.db", new ByteArrayInputStream(bytes), metadata); + s3Client.putObject(TEST_BUCKET, "registry.db", new ByteArrayInputStream(bytes), metadata); } @BeforeAll static void setUp() { AmazonS3 s3Client = createClient(); - s3Client.createBucket("test-bucket"); + s3Client.createBucket(TEST_BUCKET); putToStorage(registryProto); } @@ -64,7 +71,7 @@ ApplicationProperties.FeastProperties createFeastProperties() { final ApplicationProperties.FeastProperties feastProperties = TestUtils.createBasicFeastProperties( environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); - feastProperties.setRegistry("s3://test-bucket/registry.db"); + feastProperties.setRegistry(String.format("s3://%s/registry.db", TEST_BUCKET)); return feastProperties; } @@ -82,7 +89,8 @@ public AmazonS3 awsStorage() { return AmazonS3ClientBuilder.standard() .withEndpointConfiguration( new AwsClientBuilder.EndpointConfiguration( - String.format("http://localhost:%d", s3Mock.getHttpServerPort()), "us-east-1")) + String.format("http://localhost:%d", s3Mock.getHttpServerPort()), TEST_REGION)) + .withCredentials(credentials) .enablePathStyleAccess() .build(); } diff --git a/protos/feast/core/OnDemandFeatureView.proto b/protos/feast/core/OnDemandFeatureView.proto index c43b33c1d28..7a5fec16504 100644 --- a/protos/feast/core/OnDemandFeatureView.proto +++ b/protos/feast/core/OnDemandFeatureView.proto @@ -27,6 +27,7 @@ import "feast/core/FeatureView.proto"; import "feast/core/FeatureViewProjection.proto"; import "feast/core/Feature.proto"; import "feast/core/DataSource.proto"; +import "feast/core/Transformation.proto"; message OnDemandFeatureView { // User-specified specifications of this feature view. @@ -48,10 +49,10 @@ message OnDemandFeatureViewSpec { // Map of sources for this feature view. map sources = 4; - oneof transformation { - UserDefinedFunction user_defined_function = 5; - OnDemandSubstraitTransformation on_demand_substrait_transformation = 9; - } + UserDefinedFunction user_defined_function = 5 [deprecated = true]; + + // Oneof with {user_defined_function, on_demand_substrait_transformation} + FeatureTransformationV2 feature_transformation = 10; // Description of the on demand feature view. string description = 6; @@ -61,6 +62,7 @@ message OnDemandFeatureViewSpec { // Owner of the on demand feature view. string owner = 8; + string mode = 11; } message OnDemandFeatureViewMeta { @@ -81,6 +83,8 @@ message OnDemandSource { // Serialized representation of python function. message UserDefinedFunction { + option deprecated = true; + // The function name string name = 1; @@ -90,7 +94,3 @@ message UserDefinedFunction { // The string representation of the udf string body_text = 3; } - -message OnDemandSubstraitTransformation { - bytes substrait_plan = 1; -} \ No newline at end of file diff --git a/protos/feast/core/Registry.proto b/protos/feast/core/Registry.proto index 7d80d8c837f..0c3f8a53f94 100644 --- a/protos/feast/core/Registry.proto +++ b/protos/feast/core/Registry.proto @@ -27,7 +27,6 @@ import "feast/core/FeatureTable.proto"; import "feast/core/FeatureView.proto"; import "feast/core/InfraObject.proto"; import "feast/core/OnDemandFeatureView.proto"; -import "feast/core/RequestFeatureView.proto"; import "feast/core/StreamFeatureView.proto"; import "feast/core/DataSource.proto"; import "feast/core/SavedDataset.proto"; @@ -41,7 +40,6 @@ message Registry { repeated FeatureView feature_views = 6; repeated DataSource data_sources = 12; repeated OnDemandFeatureView on_demand_feature_views = 8; - repeated RequestFeatureView request_feature_views = 9; repeated StreamFeatureView stream_feature_views = 14; repeated FeatureService feature_services = 7; repeated SavedDataset saved_datasets = 11; diff --git a/protos/feast/core/RequestFeatureView.proto b/protos/feast/core/RequestFeatureView.proto deleted file mode 100644 index 4049053c2be..00000000000 --- a/protos/feast/core/RequestFeatureView.proto +++ /dev/null @@ -1,51 +0,0 @@ -// -// Copyright 2021 The Feast Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - - -syntax = "proto3"; -package feast.core; - -option go_package = "github.com/feast-dev/feast/go/protos/feast/core"; -option java_outer_classname = "RequestFeatureViewProto"; -option java_package = "feast.proto.core"; - -import "feast/core/DataSource.proto"; - -message RequestFeatureView { - // User-specified specifications of this feature view. - RequestFeatureViewSpec spec = 1; -} - -// Next available id: 7 -message RequestFeatureViewSpec { - // Name of the feature view. Must be unique. Not updated. - string name = 1; - - // Name of Feast project that this feature view belongs to. - string project = 2; - - // Request data which contains the underlying data schema and list of associated features - DataSource request_data_source = 3; - - // Description of the request feature view. - string description = 4; - - // User defined metadata. - map tags = 5; - - // Owner of the request feature view. - string owner = 6; -} diff --git a/protos/feast/core/StreamFeatureView.proto b/protos/feast/core/StreamFeatureView.proto index 3181bdf3602..cb7da0faf34 100644 --- a/protos/feast/core/StreamFeatureView.proto +++ b/protos/feast/core/StreamFeatureView.proto @@ -29,6 +29,7 @@ import "feast/core/FeatureView.proto"; import "feast/core/Feature.proto"; import "feast/core/DataSource.proto"; import "feast/core/Aggregation.proto"; +import "feast/core/Transformation.proto"; message StreamFeatureView { // User-specified specifications of this feature view. @@ -77,7 +78,8 @@ message StreamFeatureViewSpec { bool online = 12; // Serialized function that is encoded in the streamfeatureview - UserDefinedFunction user_defined_function = 13; + UserDefinedFunction user_defined_function = 13 [deprecated = true]; + // Mode of execution string mode = 14; @@ -87,5 +89,8 @@ message StreamFeatureViewSpec { // Timestamp field for aggregation string timestamp_field = 16; + + // Oneof with {user_defined_function, on_demand_substrait_transformation} + FeatureTransformationV2 feature_transformation = 17; } diff --git a/protos/feast/core/Transformation.proto b/protos/feast/core/Transformation.proto new file mode 100644 index 00000000000..36f1e691fe1 --- /dev/null +++ b/protos/feast/core/Transformation.proto @@ -0,0 +1,32 @@ +syntax = "proto3"; +package feast.core; + +option go_package = "github.com/feast-dev/feast/go/protos/feast/core"; +option java_outer_classname = "FeatureTransformationProto"; +option java_package = "feast.proto.core"; + +import "google/protobuf/duration.proto"; + +// Serialized representation of python function. +message UserDefinedFunctionV2 { + // The function name + string name = 1; + + // The python-syntax function body (serialized by dill) + bytes body = 2; + + // The string representation of the udf + string body_text = 3; +} + +// A feature transformation executed as a user-defined function +message FeatureTransformationV2 { + oneof transformation { + UserDefinedFunctionV2 user_defined_function = 1; + SubstraitTransformationV2 substrait_transformation = 2; + } +} + +message SubstraitTransformationV2 { + bytes substrait_plan = 1; +} diff --git a/protos/feast/registry/RegistryServer.proto b/protos/feast/registry/RegistryServer.proto index ab324f9bd1a..e99987eb2da 100644 --- a/protos/feast/registry/RegistryServer.proto +++ b/protos/feast/registry/RegistryServer.proto @@ -7,7 +7,6 @@ import "feast/core/Registry.proto"; import "feast/core/Entity.proto"; import "feast/core/DataSource.proto"; import "feast/core/FeatureView.proto"; -import "feast/core/RequestFeatureView.proto"; import "feast/core/StreamFeatureView.proto"; import "feast/core/OnDemandFeatureView.proto"; import "feast/core/FeatureService.proto"; @@ -28,10 +27,6 @@ service RegistryServer{ rpc GetFeatureView (GetFeatureViewRequest) returns (feast.core.FeatureView) {} rpc ListFeatureViews (ListFeatureViewsRequest) returns (ListFeatureViewsResponse) {} - // RequestFeatureView RPCs - rpc GetRequestFeatureView (GetRequestFeatureViewRequest) returns (feast.core.RequestFeatureView) {} - rpc ListRequestFeatureViews (ListRequestFeatureViewsRequest) returns (ListRequestFeatureViewsResponse) {} - // StreamFeatureView RPCs rpc GetStreamFeatureView (GetStreamFeatureViewRequest) returns (feast.core.StreamFeatureView) {} rpc ListStreamFeatureViews (ListStreamFeatureViewsRequest) returns (ListStreamFeatureViewsResponse) {} @@ -126,23 +121,6 @@ message ListFeatureViewsResponse { repeated feast.core.FeatureView feature_views = 1; } -// RequestFeatureView - -message GetRequestFeatureViewRequest { - string name = 1; - string project = 2; - bool allow_cache = 3; -} - -message ListRequestFeatureViewsRequest { - string project = 1; - bool allow_cache = 2; -} - -message ListRequestFeatureViewsResponse { - repeated feast.core.RequestFeatureView request_feature_views = 1; -} - // StreamFeatureView message GetStreamFeatureViewRequest { diff --git a/sdk/python/docs/source/feast.protos.feast.core.rst b/sdk/python/docs/source/feast.protos.feast.core.rst index aaed49cd731..5da16d2a267 100644 --- a/sdk/python/docs/source/feast.protos.feast.core.rst +++ b/sdk/python/docs/source/feast.protos.feast.core.rst @@ -228,22 +228,6 @@ feast.protos.feast.core.Registry\_pb2\_grpc module :undoc-members: :show-inheritance: -feast.protos.feast.core.RequestFeatureView\_pb2 module ------------------------------------------------------- - -.. automodule:: feast.protos.feast.core.RequestFeatureView_pb2 - :members: - :undoc-members: - :show-inheritance: - -feast.protos.feast.core.RequestFeatureView\_pb2\_grpc module ------------------------------------------------------------- - -.. automodule:: feast.protos.feast.core.RequestFeatureView_pb2_grpc - :members: - :undoc-members: - :show-inheritance: - feast.protos.feast.core.SavedDataset\_pb2 module ------------------------------------------------ diff --git a/sdk/python/docs/source/feast.rst b/sdk/python/docs/source/feast.rst index b0ed92c4cce..abb8783bf09 100644 --- a/sdk/python/docs/source/feast.rst +++ b/sdk/python/docs/source/feast.rst @@ -273,14 +273,6 @@ feast.repo\_upgrade module :undoc-members: :show-inheritance: -feast.request\_feature\_view module ------------------------------------ - -.. automodule:: feast.request_feature_view - :members: - :undoc-members: - :show-inheritance: - feast.saved\_dataset module --------------------------- diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index d043f1a9738..3eff91d65f1 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -22,7 +22,6 @@ from .field import Field from .on_demand_feature_view import OnDemandFeatureView from .repo_config import RepoConfig -from .request_feature_view import RequestFeatureView from .stream_feature_view import StreamFeatureView from .value_type import ValueType @@ -49,7 +48,6 @@ "BigQuerySource", "FileSource", "RedshiftSource", - "RequestFeatureView", "SnowflakeSource", "PushSource", "RequestSource", diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 975537a3944..31140e28999 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -13,13 +13,20 @@ # limitations under the License. from abc import ABC, abstractmethod from datetime import datetime -from typing import Dict, List, Optional, Type +from typing import Dict, List, Optional, Type, Union from google.protobuf.json_format import MessageToJson -from proto import Message +from google.protobuf.message import Message from feast.feature_view_projection import FeatureViewProjection from feast.field import Field +from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto +from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( + OnDemandFeatureView as OnDemandFeatureViewProto, +) +from feast.protos.feast.core.StreamFeatureView_pb2 import ( + StreamFeatureView as StreamFeatureViewProto, +) class BaseFeatureView(ABC): @@ -89,7 +96,9 @@ def proto_class(self) -> Type[Message]: pass @abstractmethod - def to_proto(self) -> Message: + def to_proto( + self, + ) -> Union[FeatureViewProto, OnDemandFeatureViewProto, StreamFeatureViewProto]: pass @classmethod diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 7ce8aaef2bc..7673eee20db 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -381,7 +381,6 @@ def feature_view_list(ctx: click.Context): table = [] for feature_view in [ *store.list_feature_views(), - *store.list_request_feature_views(), *store.list_on_demand_feature_views(), ]: entities = set() diff --git a/sdk/python/feast/constants.py b/sdk/python/feast/constants.py index c022ecba557..e47da0ad6b7 100644 --- a/sdk/python/feast/constants.py +++ b/sdk/python/feast/constants.py @@ -49,3 +49,6 @@ # Environment variable for feature server docker image tag DOCKER_IMAGE_TAG_ENV_NAME: str = "FEAST_SERVER_DOCKER_IMAGE_TAG" + +# Default feature server registry ttl (seconds) +DEFAULT_FEATURE_SERVER_REGISTRY_TTL = 5 diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py index 15f880e392e..106d34bf486 100644 --- a/sdk/python/feast/diff/registry_diff.py +++ b/sdk/python/feast/diff/registry_diff.py @@ -20,9 +20,6 @@ OnDemandFeatureView as OnDemandFeatureViewProto, ) from feast.protos.feast.core.OnDemandFeatureView_pb2 import OnDemandFeatureViewSpec -from feast.protos.feast.core.RequestFeatureView_pb2 import ( - RequestFeatureView as RequestFeatureViewProto, -) from feast.protos.feast.core.StreamFeatureView_pb2 import ( StreamFeatureView as StreamFeatureViewProto, ) @@ -110,7 +107,6 @@ def tag_objects_for_keep_delete_update_add( FeatureViewProto, FeatureServiceProto, OnDemandFeatureViewProto, - RequestFeatureViewProto, StreamFeatureViewProto, ValidationReferenceProto, ) @@ -144,11 +140,26 @@ def diff_registry_objects( if _field.name in FIELDS_TO_IGNORE: continue elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name): - if _field.name == "user_defined_function": + if _field.name == "feature_transformation": current_spec = cast(OnDemandFeatureViewSpec, current_spec) new_spec = cast(OnDemandFeatureViewSpec, new_spec) - current_udf = current_spec.user_defined_function - new_udf = new_spec.user_defined_function + # Check if the old proto is populated and use that if it is + feature_transformation_udf = ( + current_spec.feature_transformation.user_defined_function + ) + if ( + current_spec.HasField("user_defined_function") + and not feature_transformation_udf + ): + deprecated_udf = current_spec.user_defined_function + else: + deprecated_udf = None + current_udf = ( + deprecated_udf + if deprecated_udf is not None + else feature_transformation_udf + ) + new_udf = new_spec.feature_transformation.user_defined_function for _udf_field in current_udf.DESCRIPTOR.fields: if _udf_field.name == "body": continue @@ -324,7 +335,6 @@ def apply_diff_to_registry( elif feast_object_diff.feast_object_type in [ FeastObjectType.FEATURE_VIEW, FeastObjectType.ON_DEMAND_FEATURE_VIEW, - FeastObjectType.REQUEST_FEATURE_VIEW, FeastObjectType.STREAM_FEATURE_VIEW, ]: feature_view_obj = cast( @@ -368,7 +378,6 @@ def apply_diff_to_registry( elif feast_object_diff.feast_object_type in [ FeastObjectType.FEATURE_VIEW, FeastObjectType.ON_DEMAND_FEATURE_VIEW, - FeastObjectType.REQUEST_FEATURE_VIEW, FeastObjectType.STREAM_FEATURE_VIEW, ]: registry.apply_feature_view( diff --git a/sdk/python/feast/embedded_go/__init__.py b/sdk/python/feast/embedded_go/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/embedded_go/online_features_service.py b/sdk/python/feast/embedded_go/online_features_service.py index bf82fab6a33..c6430b5f6d6 100644 --- a/sdk/python/feast/embedded_go/online_features_service.py +++ b/sdk/python/feast/embedded_go/online_features_service.py @@ -252,6 +252,11 @@ def transformation_callback( # the typeguard requirement. full_feature_names = bool(full_feature_names) + if odfv.mode != "pandas": + raise Exception( + f"OnDemandFeatureView mode '{odfv.mode} not supported by EmbeddedOnlineFeatureServer." + ) + output = odfv.get_transformed_features_df( input_record.to_pandas(), full_feature_names=full_feature_names ) diff --git a/sdk/python/feast/feast_object.py b/sdk/python/feast/feast_object.py index 7cccf26455f..2d06d8d669d 100644 --- a/sdk/python/feast/feast_object.py +++ b/sdk/python/feast/feast_object.py @@ -11,12 +11,10 @@ from .protos.feast.core.FeatureService_pb2 import FeatureServiceSpec from .protos.feast.core.FeatureView_pb2 import FeatureViewSpec from .protos.feast.core.OnDemandFeatureView_pb2 import OnDemandFeatureViewSpec -from .protos.feast.core.RequestFeatureView_pb2 import RequestFeatureViewSpec from .protos.feast.core.StreamFeatureView_pb2 import StreamFeatureViewSpec from .protos.feast.core.ValidationProfile_pb2 import ( ValidationReference as ValidationReferenceProto, ) -from .request_feature_view import RequestFeatureView from .saved_dataset import ValidationReference from .stream_feature_view import StreamFeatureView @@ -24,7 +22,6 @@ FeastObject = Union[ FeatureView, OnDemandFeatureView, - RequestFeatureView, BatchFeatureView, StreamFeatureView, Entity, @@ -36,7 +33,6 @@ FeastObjectSpecProto = Union[ FeatureViewSpec, OnDemandFeatureViewSpec, - RequestFeatureViewSpec, StreamFeatureViewSpec, EntitySpecV2, FeatureServiceSpec, diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index 618aefb2f28..4b0e50a06da 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -1,10 +1,10 @@ import json +import sys import threading import traceback import warnings from typing import List, Optional -import gunicorn.app.base import pandas as pd from dateutil import parser from fastapi import FastAPI, HTTPException, Request, Response, status @@ -15,6 +15,7 @@ import feast from feast import proto_json, utils +from feast.constants import DEFAULT_FEATURE_SERVER_REGISTRY_TTL from feast.data_source import PushMode from feast.errors import PushSourceNotFoundException from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest @@ -45,7 +46,10 @@ class MaterializeIncrementalRequest(BaseModel): feature_views: Optional[List[str]] = None -def get_app(store: "feast.FeatureStore", registry_ttl_sec: int = 5): +def get_app( + store: "feast.FeatureStore", + registry_ttl_sec: int = DEFAULT_FEATURE_SERVER_REGISTRY_TTL, +): proto_json.patch() app = FastAPI() @@ -202,24 +206,27 @@ def materialize_incremental(body=Depends(get_body)): return app -class FeastServeApplication(gunicorn.app.base.BaseApplication): - def __init__(self, store: "feast.FeatureStore", **options): - self._app = get_app( - store=store, - registry_ttl_sec=options.get("registry_ttl_sec", 5), - ) - self._options = options - super().__init__() +if sys.platform != "win32": + import gunicorn.app.base - def load_config(self): - for key, value in self._options.items(): - if key.lower() in self.cfg.settings and value is not None: - self.cfg.set(key.lower(), value) + class FeastServeApplication(gunicorn.app.base.BaseApplication): + def __init__(self, store: "feast.FeatureStore", **options): + self._app = get_app( + store=store, + registry_ttl_sec=options["registry_ttl_sec"], + ) + self._options = options + super().__init__() + + def load_config(self): + for key, value in self._options.items(): + if key.lower() in self.cfg.settings and value is not None: + self.cfg.set(key.lower(), value) - self.cfg.set("worker_class", "uvicorn.workers.UvicornWorker") + self.cfg.set("worker_class", "uvicorn.workers.UvicornWorker") - def load(self): - return self._app + def load(self): + return self._app def start_server( @@ -229,13 +236,19 @@ def start_server( no_access_log: bool, workers: int, keep_alive_timeout: int, - registry_ttl_sec: int = 5, + registry_ttl_sec: int, ): - FeastServeApplication( - store=store, - bind=f"{host}:{port}", - accesslog=None if no_access_log else "-", - workers=workers, - keepalive=keep_alive_timeout, - registry_ttl_sec=registry_ttl_sec, - ).run() + if sys.platform != "win32": + FeastServeApplication( + store=store, + bind=f"{host}:{port}", + accesslog=None if no_access_log else "-", + workers=workers, + keepalive=keep_alive_timeout, + registry_ttl_sec=registry_ttl_sec, + ).run() + else: + import uvicorn + + app = get_app(store, registry_ttl_sec) + uvicorn.run(app, host=host, port=port, access_log=(not no_access_log)) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 44236248fe1..83aaafd6863 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -91,7 +91,6 @@ from feast.protos.feast.types.Value_pb2 import RepeatedValue, Value from feast.repo_config import RepoConfig, load_repo_config from feast.repo_contents import RepoContents -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, SavedDatasetStorage, ValidationReference from feast.stream_feature_view import StreamFeatureView from feast.type_map import python_values_to_proto_values @@ -266,23 +265,6 @@ def list_feature_views(self, allow_cache: bool = False) -> List[FeatureView]: """ return self._list_feature_views(allow_cache) - @log_exceptions_and_usage - def list_request_feature_views( - self, allow_cache: bool = False - ) -> List[RequestFeatureView]: - """ - Retrieves the list of feature views from the registry. - - Args: - allow_cache: Whether to allow returning entities from a cached registry. - - Returns: - A list of feature views. - """ - return self._registry.list_request_feature_views( - self.project, allow_cache=allow_cache - ) - def _list_feature_views( self, allow_cache: bool = False, @@ -562,7 +544,6 @@ def _validate_all_feature_views( self, views_to_update: List[FeatureView], odfvs_to_update: List[OnDemandFeatureView], - request_views_to_update: List[RequestFeatureView], sfvs_to_update: List[StreamFeatureView], ): """Validates all feature views.""" @@ -577,7 +558,6 @@ def _validate_all_feature_views( [ *views_to_update, *odfvs_to_update, - *request_views_to_update, *sfvs_to_update, ] ) @@ -716,7 +696,6 @@ def plan( ... feature_views=[driver_hourly_stats_view], ... on_demand_feature_views=list(), ... stream_feature_views=list(), - ... request_feature_views=list(), ... entities=[driver], ... feature_services=list())) # register entity and feature view """ @@ -724,7 +703,6 @@ def plan( self._validate_all_feature_views( desired_repo_contents.feature_views, desired_repo_contents.on_demand_feature_views, - desired_repo_contents.request_feature_views, desired_repo_contents.stream_feature_views, ) _validate_data_sources(desired_repo_contents.data_sources) @@ -781,7 +759,6 @@ def apply( Entity, FeatureView, OnDemandFeatureView, - RequestFeatureView, BatchFeatureView, StreamFeatureView, FeatureService, @@ -848,9 +825,6 @@ def apply( ) ] sfvs_to_update = [ob for ob in objects if isinstance(ob, StreamFeatureView)] - request_views_to_update = [ - ob for ob in objects if isinstance(ob, RequestFeatureView) - ] odfvs_to_update = [ob for ob in objects if isinstance(ob, OnDemandFeatureView)] services_to_update = [ob for ob in objects if isinstance(ob, FeatureService)] data_sources_set_to_update = { @@ -877,16 +851,6 @@ def apply( if fv.stream_source: data_sources_set_to_update.add(fv.stream_source) - if request_views_to_update: - warnings.warn( - "Request feature view is deprecated. " - "Please use request data source instead", - DeprecationWarning, - ) - - for rfv in request_views_to_update: - data_sources_set_to_update.add(rfv.request_source) - for odfv in odfvs_to_update: for v in odfv.source_request_sources.values(): data_sources_set_to_update.add(v) @@ -898,7 +862,7 @@ def apply( # Validate all feature views and make inferences. self._validate_all_feature_views( - views_to_update, odfvs_to_update, request_views_to_update, sfvs_to_update + views_to_update, odfvs_to_update, sfvs_to_update ) self._make_inferences( data_sources_to_update, @@ -912,9 +876,7 @@ def apply( # Add all objects to the registry and update the provider's infrastructure. for ds in data_sources_to_update: self._registry.apply_data_source(ds, project=self.project, commit=False) - for view in itertools.chain( - views_to_update, odfvs_to_update, request_views_to_update, sfvs_to_update - ): + for view in itertools.chain(views_to_update, odfvs_to_update, sfvs_to_update): self._registry.apply_feature_view(view, project=self.project, commit=False) for ent in entities_to_update: self._registry.apply_entity(ent, project=self.project, commit=False) @@ -943,9 +905,6 @@ def apply( and not isinstance(ob, StreamFeatureView) ) ] - request_views_to_delete = [ - ob for ob in objects_to_delete if isinstance(ob, RequestFeatureView) - ] odfvs_to_delete = [ ob for ob in objects_to_delete if isinstance(ob, OnDemandFeatureView) ] @@ -974,10 +933,6 @@ def apply( self._registry.delete_feature_view( view.name, project=self.project, commit=False ) - for request_view in request_views_to_delete: - self._registry.delete_feature_view( - request_view.name, project=self.project, commit=False - ) for odfv in odfvs_to_delete: self._registry.delete_feature_view( odfv.name, project=self.project, commit=False @@ -1088,43 +1043,26 @@ def get_historical_features( _feature_refs = self._get_features(features) ( all_feature_views, - all_request_feature_views, all_on_demand_feature_views, ) = self._get_feature_views_to_use(features) - if all_request_feature_views: - warnings.warn( - "Request feature view is deprecated. " - "Please use request data source instead", - DeprecationWarning, - ) - # TODO(achal): _group_feature_refs returns the on demand feature views, but it's not passed into the provider. # This is a weird interface quirk - we should revisit the `get_historical_features` to # pass in the on demand feature views as well. - fvs, odfvs, request_fvs, request_fv_refs = _group_feature_refs( + fvs, odfvs = _group_feature_refs( _feature_refs, all_feature_views, - all_request_feature_views, all_on_demand_feature_views, ) feature_views = list(view for view, _ in fvs) on_demand_feature_views = list(view for view, _ in odfvs) - request_feature_views = list(view for view, _ in request_fvs) set_usage_attribute("odfv", bool(on_demand_feature_views)) - set_usage_attribute("request_fv", bool(request_feature_views)) # Check that the right request data is present in the entity_df if type(entity_df) == pd.DataFrame: if self.config.coerce_tz_aware: entity_df = utils.make_df_tzaware(cast(pd.DataFrame, entity_df)) - for fv in request_feature_views: - for feature in fv.features: - if feature.name not in entity_df.columns: - raise RequestDataNotFoundInEntityDfException( - feature_name=feature.name, feature_view_name=fv.name - ) for odfv in on_demand_feature_views: odfv_request_data_schema = odfv.get_request_data_schema() for feature_name in odfv_request_data_schema.keys(): @@ -1135,9 +1073,6 @@ def get_historical_features( ) _validate_feature_refs(_feature_refs, full_feature_names) - # Drop refs that refer to RequestFeatureViews since they don't need to be fetched and - # already exist in the entity_df - _feature_refs = [ref for ref in _feature_refs if ref not in request_fv_refs] provider = self._get_provider() job = provider.get_historical_features( @@ -1615,19 +1550,11 @@ def _get_online_features( _feature_refs = self._get_features(features, allow_cache=True) ( requested_feature_views, - requested_request_feature_views, requested_on_demand_feature_views, ) = self._get_feature_views_to_use( features=features, allow_cache=True, hide_dummy_entity=False ) - if requested_request_feature_views: - warnings.warn( - "Request feature view is deprecated. " - "Please use request data source instead", - DeprecationWarning, - ) - ( entity_name_to_join_key_map, entity_type_map, @@ -1648,19 +1575,12 @@ def _get_online_features( num_rows = _validate_entity_values(entity_proto_values) _validate_feature_refs(_feature_refs, full_feature_names) - ( - grouped_refs, - grouped_odfv_refs, - grouped_request_fv_refs, - _, - ) = _group_feature_refs( + (grouped_refs, grouped_odfv_refs,) = _group_feature_refs( _feature_refs, requested_feature_views, - requested_request_feature_views, requested_on_demand_feature_views, ) set_usage_attribute("odfv", bool(grouped_odfv_refs)) - set_usage_attribute("request_fv", bool(grouped_request_fv_refs)) # All requested features should be present in the result. requested_result_row_names = { @@ -1673,23 +1593,14 @@ def _get_online_features( feature_views = list(view for view, _ in grouped_refs) - needed_request_data, needed_request_fv_features = self.get_needed_request_data( - grouped_odfv_refs, grouped_request_fv_refs - ) + needed_request_data = self.get_needed_request_data(grouped_odfv_refs) join_key_values: Dict[str, List[Value]] = {} request_data_features: Dict[str, List[Value]] = {} # Entity rows may be either entities or request data. for join_key_or_entity_name, values in entity_proto_values.items(): # Found request data - if ( - join_key_or_entity_name in needed_request_data - or join_key_or_entity_name in needed_request_fv_features - ): - if join_key_or_entity_name in needed_request_fv_features: - # If the data was requested as a feature then - # make sure it appears in the result. - requested_result_row_names.add(join_key_or_entity_name) + if join_key_or_entity_name in needed_request_data: request_data_features[join_key_or_entity_name] = values else: if join_key_or_entity_name in join_keys_set: @@ -1711,7 +1622,7 @@ def _get_online_features( join_key_values[join_key] = values self.ensure_request_data_values_exist( - needed_request_data, needed_request_fv_features, request_data_features + needed_request_data, request_data_features ) # Populate online features response proto with join keys and request data features @@ -1870,33 +1781,21 @@ def _populate_result_rows_from_columnar( @staticmethod def get_needed_request_data( grouped_odfv_refs: List[Tuple[OnDemandFeatureView, List[str]]], - grouped_request_fv_refs: List[Tuple[RequestFeatureView, List[str]]], - ) -> Tuple[Set[str], Set[str]]: + ) -> Set[str]: needed_request_data: Set[str] = set() - needed_request_fv_features: Set[str] = set() for odfv, _ in grouped_odfv_refs: odfv_request_data_schema = odfv.get_request_data_schema() needed_request_data.update(odfv_request_data_schema.keys()) - for request_fv, _ in grouped_request_fv_refs: - for feature in request_fv.features: - needed_request_fv_features.add(feature.name) - return needed_request_data, needed_request_fv_features + return needed_request_data @staticmethod def ensure_request_data_values_exist( needed_request_data: Set[str], - needed_request_fv_features: Set[str], request_data_features: Dict[str, List[Any]], ): - if len(needed_request_data) + len(needed_request_fv_features) != len( - request_data_features.keys() - ): + if len(needed_request_data) != len(request_data_features.keys()): missing_features = [ - x - for x in itertools.chain( - needed_request_data, needed_request_fv_features - ) - if x not in request_data_features + x for x in needed_request_data if x not in request_data_features ] raise RequestDataNotFoundInEntityRowsException( feature_names=missing_features @@ -2096,26 +1995,52 @@ def _augment_response_with_on_demand_transforms( ) initial_response = OnlineResponse(online_features_response) - initial_response_df = initial_response.to_df() + initial_response_df: Optional[pd.DataFrame] = None + initial_response_dict: Optional[Dict[str, List[Any]]] = None # Apply on demand transformations and augment the result rows odfv_result_names = set() for odfv_name, _feature_refs in odfv_feature_refs.items(): odfv = requested_odfv_map[odfv_name] - transformed_features_df = odfv.get_transformed_features_df( - initial_response_df, - full_feature_names, + if odfv.mode == "python": + if initial_response_dict is None: + initial_response_dict = initial_response.to_dict() + transformed_features_dict: Dict[ + str, List[Any] + ] = odfv.get_transformed_features( + initial_response_dict, + full_feature_names, + ) + elif odfv.mode in {"pandas", "substrait"}: + if initial_response_df is None: + initial_response_df = initial_response.to_df() + transformed_features_df: pd.DataFrame = odfv.get_transformed_features( + initial_response_df, + full_feature_names, + ) + else: + raise Exception( + f"Invalid OnDemandFeatureMode: {odfv.mode}. Expected one of 'pandas', 'python', or 'substrait'." + ) + + transformed_features = ( + transformed_features_dict + if odfv.mode == "python" + else transformed_features_df ) - selected_subset = [ - f for f in transformed_features_df.columns if f in _feature_refs - ] + transformed_columns = ( + transformed_features.columns + if isinstance(transformed_features, pd.DataFrame) + else transformed_features + ) + selected_subset = [f for f in transformed_columns if f in _feature_refs] - proto_values = [ - python_values_to_proto_values( - transformed_features_df[feature].values, ValueType.UNKNOWN + proto_values = [] + for selected_feature in selected_subset: + feature_vector = transformed_features[selected_feature] + proto_values.append( + python_values_to_proto_values(feature_vector, ValueType.UNKNOWN) ) - for feature in selected_subset - ] odfv_result_names |= set(selected_subset) @@ -2161,7 +2086,7 @@ def _get_feature_views_to_use( features: Optional[Union[List[str], FeatureService]], allow_cache=False, hide_dummy_entity: bool = True, - ) -> Tuple[List[FeatureView], List[RequestFeatureView], List[OnDemandFeatureView]]: + ) -> Tuple[List[FeatureView], List[OnDemandFeatureView]]: fvs = { fv.name: fv for fv in [ @@ -2172,13 +2097,6 @@ def _get_feature_views_to_use( ] } - request_fvs = { - fv.name: fv - for fv in self._registry.list_request_feature_views( - project=self.project, allow_cache=allow_cache - ) - } - od_fvs = { fv.name: fv for fv in self._registry.list_on_demand_feature_views( @@ -2187,7 +2105,7 @@ def _get_feature_views_to_use( } if isinstance(features, FeatureService): - fvs_to_use, request_fvs_to_use, od_fvs_to_use = [], [], [] + fvs_to_use, od_fvs_to_use = [], [] for fv_name, projection in [ (projection.name, projection) for projection in features.feature_view_projections @@ -2196,10 +2114,6 @@ def _get_feature_views_to_use( fvs_to_use.append( fvs[fv_name].with_projection(copy.copy(projection)) ) - elif fv_name in request_fvs: - request_fvs_to_use.append( - request_fvs[fv_name].with_projection(copy.copy(projection)) - ) elif fv_name in od_fvs: odfv = od_fvs[fv_name].with_projection(copy.copy(projection)) od_fvs_to_use.append(odfv) @@ -2214,11 +2128,10 @@ def _get_feature_views_to_use( f"{fv_name} which doesn't exist. Please make sure that you have created the feature view" f'{fv_name} and that you have registered it by running "apply".' ) - views_to_use = (fvs_to_use, request_fvs_to_use, od_fvs_to_use) + views_to_use = (fvs_to_use, od_fvs_to_use) else: views_to_use = ( [*fvs.values()], - [*request_fvs.values()], [*od_fvs.values()], ) @@ -2456,24 +2369,15 @@ def _validate_feature_refs(feature_refs: List[str], full_feature_names: bool = F def _group_feature_refs( features: List[str], all_feature_views: List[FeatureView], - all_request_feature_views: List[RequestFeatureView], all_on_demand_feature_views: List[OnDemandFeatureView], ) -> Tuple[ - List[Tuple[FeatureView, List[str]]], - List[Tuple[OnDemandFeatureView, List[str]]], - List[Tuple[RequestFeatureView, List[str]]], - Set[str], + List[Tuple[FeatureView, List[str]]], List[Tuple[OnDemandFeatureView, List[str]]] ]: """Get list of feature views and corresponding feature names based on feature references""" # view name to view proto view_index = {view.projection.name_to_use(): view for view in all_feature_views} - # request view name to proto - request_view_index = { - view.projection.name_to_use(): view for view in all_request_feature_views - } - # on demand view to on demand view proto on_demand_view_index = { view.projection.name_to_use(): view for view in all_on_demand_feature_views @@ -2481,8 +2385,6 @@ def _group_feature_refs( # view name to feature names views_features = defaultdict(set) - request_views_features = defaultdict(set) - request_view_refs = set() # on demand view name to feature names on_demand_view_features = defaultdict(set) @@ -2503,26 +2405,17 @@ def _group_feature_refs( ].source_feature_view_projections.values(): for input_feat in input_fv_projection.features: views_features[input_fv_projection.name].add(input_feat.name) - elif view_name in request_view_index: - request_view_index[view_name].projection.get_feature( - feat_name - ) # For validation - request_views_features[view_name].add(feat_name) - request_view_refs.add(ref) else: raise FeatureViewNotFoundException(view_name) fvs_result: List[Tuple[FeatureView, List[str]]] = [] odfvs_result: List[Tuple[OnDemandFeatureView, List[str]]] = [] - request_fvs_result: List[Tuple[RequestFeatureView, List[str]]] = [] for view_name, feature_names in views_features.items(): fvs_result.append((view_index[view_name], list(feature_names))) - for view_name, feature_names in request_views_features.items(): - request_fvs_result.append((request_view_index[view_name], list(feature_names))) for view_name, feature_names in on_demand_view_features.items(): odfvs_result.append((on_demand_view_index[view_name], list(feature_names))) - return fvs_result, odfvs_result, request_fvs_result, request_view_refs + return fvs_result, odfvs_result def _print_materialization_log( diff --git a/sdk/python/feast/infra/contrib/__init__.py b/sdk/python/feast/infra/contrib/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/feature_servers/__init__.py b/sdk/python/feast/infra/feature_servers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/materialization/__init__.py b/sdk/python/feast/infra/materialization/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/materialization/contrib/__init__.py b/sdk/python/feast/infra/materialization/contrib/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py index 060a47ce585..d82e0920e25 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py @@ -309,7 +309,7 @@ def _create_kubernetes_job(self, job_id, paths, feature_view): def _create_configuration_map(self, job_id, paths, feature_view, namespace): """Create a Kubernetes configmap for this job""" - feature_store_configuration = yaml.dump(self.repo_config.dict()) + feature_store_configuration = yaml.dump(self.repo_config.dict(by_alias=True)) materialization_config = yaml.dump( {"paths": paths, "feature_view": feature_view.name} diff --git a/sdk/python/feast/infra/offline_stores/contrib/ibis_offline_store/ibis.py b/sdk/python/feast/infra/offline_stores/contrib/ibis_offline_store/ibis.py index 72e0d970c60..8787d701581 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/ibis_offline_store/ibis.py +++ b/sdk/python/feast/infra/offline_stores/contrib/ibis_offline_store/ibis.py @@ -72,112 +72,6 @@ def _get_entity_df_event_timestamp_range( return entity_df_event_timestamp_range - @staticmethod - def _get_historical_features_one( - feature_view: FeatureView, - entity_table: Table, - feature_refs: List[str], - full_feature_names: bool, - timestamp_range: Tuple, - acc_table: Table, - event_timestamp_col: str, - ) -> Table: - fv_table: Table = ibis.read_parquet(feature_view.batch_source.name) - - for old_name, new_name in feature_view.batch_source.field_mapping.items(): - if old_name in fv_table.columns: - fv_table = fv_table.rename({new_name: old_name}) - - timestamp_field = feature_view.batch_source.timestamp_field - - # TODO mutate only if tz-naive - fv_table = fv_table.mutate( - **{ - timestamp_field: fv_table[timestamp_field].cast( - dt.Timestamp(timezone="UTC") - ) - } - ) - - full_name_prefix = feature_view.projection.name_alias or feature_view.name - - feature_refs = [ - fr.split(":")[1] - for fr in feature_refs - if fr.startswith(f"{full_name_prefix}:") - ] - - timestamp_range_start_minus_ttl = ( - timestamp_range[0] - feature_view.ttl - if feature_view.ttl and feature_view.ttl > timedelta(0, 0, 0, 0, 0, 0, 0) - else timestamp_range[0] - ) - - timestamp_range_start_minus_ttl = ibis.literal( - timestamp_range_start_minus_ttl.strftime("%Y-%m-%d %H:%M:%S.%f") - ).cast(dt.Timestamp(timezone="UTC")) - - timestamp_range_end = ibis.literal( - timestamp_range[1].strftime("%Y-%m-%d %H:%M:%S.%f") - ).cast(dt.Timestamp(timezone="UTC")) - - fv_table = fv_table.filter( - ibis.and_( - fv_table[timestamp_field] <= timestamp_range_end, - fv_table[timestamp_field] >= timestamp_range_start_minus_ttl, - ) - ) - - # join_key_map = feature_view.projection.join_key_map or {e.name: e.name for e in feature_view.entity_columns} - # predicates = [fv_table[k] == entity_table[v] for k, v in join_key_map.items()] - - if feature_view.projection.join_key_map: - predicates = [ - fv_table[k] == entity_table[v] - for k, v in feature_view.projection.join_key_map.items() - ] - else: - predicates = [ - fv_table[e.name] == entity_table[e.name] - for e in feature_view.entity_columns - ] - - predicates.append( - fv_table[timestamp_field] <= entity_table[event_timestamp_col] - ) - - fv_table = fv_table.inner_join( - entity_table, predicates, lname="", rname="{name}_y" - ) - - fv_table = ( - fv_table.group_by(by="entity_row_id") - .order_by(ibis.desc(fv_table[timestamp_field])) - .mutate(rn=ibis.row_number()) - ) - - fv_table = fv_table.filter(fv_table["rn"] == ibis.literal(0)) - - select_cols = ["entity_row_id"] - select_cols.extend(feature_refs) - fv_table = fv_table.select(select_cols) - - if full_feature_names: - fv_table = fv_table.rename( - {f"{full_name_prefix}__{feature}": feature for feature in feature_refs} - ) - - acc_table = acc_table.left_join( - fv_table, - predicates=[fv_table.entity_row_id == acc_table.entity_row_id], - lname="", - rname="{name}_yyyy", - ) - - acc_table = acc_table.drop(s.endswith("_yyyy")) - - return acc_table - @staticmethod def _to_utc(entity_df: pd.DataFrame, event_timestamp_col): entity_df_event_timestamp = entity_df.loc[ @@ -228,9 +122,11 @@ def get_historical_features( entity_schema=entity_schema, ) + # TODO get range with ibis timestamp_range = IbisOfflineStore._get_entity_df_event_timestamp_range( entity_df, event_timestamp_col ) + entity_df = IbisOfflineStore._to_utc(entity_df, event_timestamp_col) entity_table = ibis.memtable(entity_df) @@ -238,20 +134,61 @@ def get_historical_features( entity_table, feature_views, event_timestamp_col ) - res: Table = entity_table + def read_fv(feature_view, feature_refs, full_feature_names): + fv_table: Table = ibis.read_parquet(feature_view.batch_source.name) - for fv in feature_views: - res = IbisOfflineStore._get_historical_features_one( - fv, - entity_table, + for old_name, new_name in feature_view.batch_source.field_mapping.items(): + if old_name in fv_table.columns: + fv_table = fv_table.rename({new_name: old_name}) + + timestamp_field = feature_view.batch_source.timestamp_field + + # TODO mutate only if tz-naive + fv_table = fv_table.mutate( + **{ + timestamp_field: fv_table[timestamp_field].cast( + dt.Timestamp(timezone="UTC") + ) + } + ) + + full_name_prefix = feature_view.projection.name_alias or feature_view.name + + feature_refs = [ + fr.split(":")[1] + for fr in feature_refs + if fr.startswith(f"{full_name_prefix}:") + ] + + if full_feature_names: + fv_table = fv_table.rename( + { + f"{full_name_prefix}__{feature}": feature + for feature in feature_refs + } + ) + + feature_refs = [ + f"{full_name_prefix}__{feature}" for feature in feature_refs + ] + + return ( + fv_table, + feature_view.batch_source.timestamp_field, + feature_view.projection.join_key_map + or {e.name: e.name for e in feature_view.entity_columns}, feature_refs, - full_feature_names, - timestamp_range, - res, - event_timestamp_col, + feature_view.ttl, ) - res = res.drop("entity_row_id") + res = point_in_time_join( + entity_table=entity_table, + feature_tables=[ + read_fv(feature_view, feature_refs, full_feature_names) + for feature_view in feature_views + ], + event_timestamp_col=event_timestamp_col, + ) return IbisRetrievalJob( res, @@ -285,6 +222,10 @@ def pull_all_from_table_or_query( table = table.select(*fields) + # TODO get rid of this fix + if "__log_date" in table.columns: + table = table.drop("__log_date") + table = table.filter( ibis.and_( table[timestamp_field] >= ibis.literal(start_date), @@ -320,6 +261,7 @@ def write_logged_features( else: kwargs = {} + # TODO always write to directory table.to_parquet( f"{destination.path}/{uuid.uuid4().hex}-{{i}}.parquet", **kwargs ) @@ -405,3 +347,77 @@ def persist( @property def metadata(self) -> Optional[RetrievalMetadata]: return self._metadata + + +def point_in_time_join( + entity_table: Table, + feature_tables: List[Tuple[Table, str, Dict[str, str], List[str], timedelta]], + event_timestamp_col="event_timestamp", +): + # TODO handle ttl + all_entities = [event_timestamp_col] + for feature_table, timestamp_field, join_key_map, _, _ in feature_tables: + all_entities.extend(join_key_map.values()) + + r = ibis.literal("") + + for e in set(all_entities): + r = r.concat(entity_table[e].cast("string")) # type: ignore + + entity_table = entity_table.mutate(entity_row_id=r) + + acc_table = entity_table + + for ( + feature_table, + timestamp_field, + join_key_map, + feature_refs, + ttl, + ) in feature_tables: + predicates = [ + feature_table[k] == entity_table[v] for k, v in join_key_map.items() + ] + + predicates.append( + feature_table[timestamp_field] <= entity_table[event_timestamp_col], + ) + + if ttl: + predicates.append( + feature_table[timestamp_field] + >= entity_table[event_timestamp_col] - ibis.literal(ttl) + ) + + feature_table = feature_table.inner_join( + entity_table, predicates, lname="", rname="{name}_y" + ) + + feature_table = feature_table.drop(s.endswith("_y")) + + feature_table = ( + feature_table.group_by(by="entity_row_id") + .order_by(ibis.desc(feature_table[timestamp_field])) + .mutate(rn=ibis.row_number()) + ) + + feature_table = feature_table.filter( + feature_table["rn"] == ibis.literal(0) + ).drop("rn") + + select_cols = ["entity_row_id"] + select_cols.extend(feature_refs) + feature_table = feature_table.select(select_cols) + + acc_table = acc_table.left_join( + feature_table, + predicates=[feature_table.entity_row_id == acc_table.entity_row_id], + lname="", + rname="{name}_yyyy", + ) + + acc_table = acc_table.drop(s.endswith("_yyyy")) + + acc_table = acc_table.drop("entity_row_id") + + return acc_table diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 30135feccb3..6c16ef26439 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -81,6 +81,10 @@ def to_df( if self.on_demand_feature_views: # TODO(adchia): Fix requirement to specify dependent feature views in feature_refs for odfv in self.on_demand_feature_views: + if odfv.mode not in {"pandas", "substrait"}: + raise Exception( + f'OnDemandFeatureView mode "{odfv.mode}" not supported for offline processing.' + ) features_df = features_df.join( odfv.get_transformed_features_df( features_df, @@ -124,6 +128,10 @@ def to_arrow( features_df = self._to_df_internal(timeout=timeout) if self.on_demand_feature_views: for odfv in self.on_demand_feature_views: + if odfv.mode not in {"pandas", "substrait"}: + raise Exception( + f'OnDemandFeatureView mode "{odfv.mode}" not supported for offline processing.' + ) features_df = features_df.join( odfv.get_transformed_features_df( features_df, diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index 9ee3bbbabcf..c67164103ec 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import json +import warnings from abc import ABC, abstractmethod from collections import defaultdict from datetime import datetime from typing import Any, Dict, List, Optional from google.protobuf.json_format import MessageToJson -from proto import Message +from google.protobuf.message import Message from feast.base_feature_view import BaseFeatureView from feast.data_source import DataSource @@ -29,9 +30,10 @@ from feast.on_demand_feature_view import OnDemandFeatureView from feast.project_metadata import ProjectMetadata from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView +from feast.transformation.pandas_transformation import PandasTransformation +from feast.transformation.substrait_transformation import SubstraitTransformation class BaseRegistry(ABC): @@ -347,41 +349,6 @@ def list_feature_views( """ raise NotImplementedError - # request feature view operations - @abstractmethod - def get_request_feature_view( - self, name: str, project: str, allow_cache: bool = False - ) -> RequestFeatureView: - """ - Retrieves a request feature view. - - Args: - name: Name of request feature view - project: Feast project that this feature view belongs to - allow_cache: Allow returning feature view from the cached registry - - Returns: - Returns either the specified feature view, or raises an exception if - none is found - """ - raise NotImplementedError - - @abstractmethod - def list_request_feature_views( - self, project: str, allow_cache: bool = False - ) -> List[RequestFeatureView]: - """ - Retrieve a list of request feature views from the registry - - Args: - allow_cache: Allow returning feature views from the cached registry - project: Filter feature views based on project name - - Returns: - List of request feature views - """ - raise NotImplementedError - @abstractmethod def apply_materialization( self, @@ -662,18 +629,40 @@ def to_dict(self, project: str) -> Dict[str, List[Any]]: key=lambda on_demand_feature_view: on_demand_feature_view.name, ): odfv_dict = self._message_to_sorted_dict(on_demand_feature_view.to_proto()) - - odfv_dict["spec"]["userDefinedFunction"][ - "body" - ] = on_demand_feature_view.transformation.udf_string - registry_dict["onDemandFeatureViews"].append(odfv_dict) - for request_feature_view in sorted( - self.list_request_feature_views(project=project), - key=lambda request_feature_view: request_feature_view.name, - ): - registry_dict["requestFeatureViews"].append( - self._message_to_sorted_dict(request_feature_view.to_proto()) + # We are logging a warning because the registry object may be read from a proto that is not updated + # i.e., we have to submit dual writes but in order to ensure the read behavior succeeds we have to load + # both objects to compare any changes in the registry + warnings.warn( + "We will be deprecating the usage of spec.userDefinedFunction in a future release please upgrade cautiously.", + DeprecationWarning, ) + if on_demand_feature_view.feature_transformation: + if isinstance( + on_demand_feature_view.feature_transformation, PandasTransformation + ): + if "userDefinedFunction" not in odfv_dict["spec"]: + odfv_dict["spec"]["userDefinedFunction"] = {} + odfv_dict["spec"]["userDefinedFunction"][ + "body" + ] = on_demand_feature_view.feature_transformation.udf_string + odfv_dict["spec"]["featureTransformation"]["userDefinedFunction"][ + "body" + ] = on_demand_feature_view.feature_transformation.udf_string + elif isinstance( + on_demand_feature_view.feature_transformation, + SubstraitTransformation, + ): + odfv_dict["spec"]["featureTransformation"]["substraitPlan"][ + "body" + ] = on_demand_feature_view.feature_transformation.substrait_plan + else: + odfv_dict["spec"]["featureTransformation"]["userDefinedFunction"][ + "body" + ] = None + odfv_dict["spec"]["featureTransformation"]["substraitPlan"][ + "body" + ] = None + registry_dict["onDemandFeatureViews"].append(odfv_dict) for stream_feature_view in sorted( self.list_stream_feature_views(project=project), key=lambda stream_feature_view: stream_feature_view.name, @@ -684,6 +673,7 @@ def to_dict(self, project: str) -> Dict[str, List[Any]]: "body" ] = stream_feature_view.udf_string registry_dict["streamFeatureViews"].append(sfv_dict) + for saved_dataset in sorted( self.list_saved_datasets(project=project), key=lambda item: item.name ): diff --git a/sdk/python/feast/infra/registry/caching_registry.py b/sdk/python/feast/infra/registry/caching_registry.py index 4c408b0a462..3101b073d51 100644 --- a/sdk/python/feast/infra/registry/caching_registry.py +++ b/sdk/python/feast/infra/registry/caching_registry.py @@ -14,7 +14,6 @@ from feast.infra.registry.base_registry import BaseRegistry from feast.on_demand_feature_view import OnDemandFeatureView from feast.project_metadata import ProjectMetadata -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView @@ -145,34 +144,6 @@ def list_on_demand_feature_views( ) return self._list_on_demand_feature_views(project) - @abstractmethod - def _get_request_feature_view(self, name: str, project: str) -> RequestFeatureView: - pass - - def get_request_feature_view( - self, name: str, project: str, allow_cache: bool = False - ) -> RequestFeatureView: - if allow_cache: - self._refresh_cached_registry_if_necessary() - return proto_registry_utils.get_request_feature_view( - self.cached_registry_proto, name, project - ) - return self._get_request_feature_view(name, project) - - @abstractmethod - def _list_request_feature_views(self, project: str) -> List[RequestFeatureView]: - pass - - def list_request_feature_views( - self, project: str, allow_cache: bool = False - ) -> List[RequestFeatureView]: - if allow_cache: - self._refresh_cached_registry_if_necessary() - return proto_registry_utils.list_request_feature_views( - self.cached_registry_proto, project - ) - return self._list_request_feature_views(project) - @abstractmethod def _get_stream_feature_view(self, name: str, project: str) -> StreamFeatureView: pass diff --git a/sdk/python/feast/infra/registry/contrib/__init__.py b/sdk/python/feast/infra/registry/contrib/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/registry/contrib/postgres/__init__.py b/sdk/python/feast/infra/registry/contrib/postgres/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/registry/proto_registry_utils.py b/sdk/python/feast/infra/registry/proto_registry_utils.py index e93f513b691..4d2e16cb022 100644 --- a/sdk/python/feast/infra/registry/proto_registry_utils.py +++ b/sdk/python/feast/infra/registry/proto_registry_utils.py @@ -19,7 +19,6 @@ from feast.project_metadata import ProjectMetadata from feast.protos.feast.core.Registry_pb2 import ProjectMetadata as ProjectMetadataProto from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView @@ -99,16 +98,6 @@ def get_stream_feature_view( raise FeatureViewNotFoundException(name, project) -def get_request_feature_view(registry_proto: RegistryProto, name: str, project: str): - for feature_view_proto in registry_proto.feature_views: - if ( - feature_view_proto.spec.name == name - and feature_view_proto.spec.project == project - ): - return RequestFeatureView.from_proto(feature_view_proto) - raise FeatureViewNotFoundException(name, project) - - def get_on_demand_feature_view( registry_proto: RegistryProto, name: str, project: str ) -> OnDemandFeatureView: @@ -180,19 +169,6 @@ def list_feature_views( return feature_views -@registry_proto_cache -def list_request_feature_views( - registry_proto: RegistryProto, project: str -) -> List[RequestFeatureView]: - feature_views: List[RequestFeatureView] = [] - for request_feature_view_proto in registry_proto.request_feature_views: - if request_feature_view_proto.spec.project == project: - feature_views.append( - RequestFeatureView.from_proto(request_feature_view_proto) - ) - return feature_views - - @registry_proto_cache def list_stream_feature_views( registry_proto: RegistryProto, project: str diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index a9d6c44f38c..d949b6079da 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -20,7 +20,7 @@ from urllib.parse import urlparse from google.protobuf.internal.containers import RepeatedCompositeFieldContainer -from proto import Message +from google.protobuf.message import Message from feast import usage from feast.base_feature_view import BaseFeatureView @@ -46,7 +46,6 @@ from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.repo_config import RegistryConfig from feast.repo_contents import RepoContents -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView @@ -73,7 +72,6 @@ class FeastObjectType(Enum): ENTITY = "entity" FEATURE_VIEW = "feature view" ON_DEMAND_FEATURE_VIEW = "on demand feature view" - REQUEST_FEATURE_VIEW = "request feature view" STREAM_FEATURE_VIEW = "stream feature view" FEATURE_SERVICE = "feature service" @@ -88,9 +86,6 @@ def get_objects_from_registry( FeastObjectType.ON_DEMAND_FEATURE_VIEW: registry.list_on_demand_feature_views( project=project ), - FeastObjectType.REQUEST_FEATURE_VIEW: registry.list_request_feature_views( - project=project - ), FeastObjectType.STREAM_FEATURE_VIEW: registry.list_stream_feature_views( project=project, ), @@ -108,7 +103,6 @@ def get_objects_from_repo_contents( FeastObjectType.ENTITY: repo_contents.entities, FeastObjectType.FEATURE_VIEW: repo_contents.feature_views, FeastObjectType.ON_DEMAND_FEATURE_VIEW: repo_contents.on_demand_feature_views, - FeastObjectType.REQUEST_FEATURE_VIEW: repo_contents.request_feature_views, FeastObjectType.STREAM_FEATURE_VIEW: repo_contents.stream_feature_views, FeastObjectType.FEATURE_SERVICE: repo_contents.feature_services, } @@ -402,10 +396,6 @@ def apply_feature_view( existing_feature_views_of_same_type = ( self.cached_registry_proto.on_demand_feature_views ) - elif isinstance(feature_view, RequestFeatureView): - existing_feature_views_of_same_type = ( - self.cached_registry_proto.request_feature_views - ) else: raise ValueError(f"Unexpected feature view type: {type(feature_view)}") @@ -532,24 +522,6 @@ def list_feature_views( ) return proto_registry_utils.list_feature_views(registry_proto, project) - def get_request_feature_view( - self, name: str, project: str, allow_cache: bool = False - ): - registry_proto = self._get_registry_proto( - project=project, allow_cache=allow_cache - ) - return proto_registry_utils.get_request_feature_view( - registry_proto, name, project - ) - - def list_request_feature_views( - self, project: str, allow_cache: bool = False - ) -> List[RequestFeatureView]: - registry_proto = self._get_registry_proto( - project=project, allow_cache=allow_cache - ) - return proto_registry_utils.list_request_feature_views(registry_proto, project) - def get_feature_view( self, name: str, project: str, allow_cache: bool = False ) -> FeatureView: @@ -601,18 +573,6 @@ def delete_feature_view(self, name: str, project: str, commit: bool = True): self.commit() return - for idx, existing_request_feature_view_proto in enumerate( - self.cached_registry_proto.request_feature_views - ): - if ( - existing_request_feature_view_proto.spec.name == name - and existing_request_feature_view_proto.spec.project == project - ): - del self.cached_registry_proto.request_feature_views[idx] - if commit: - self.commit() - return - for idx, existing_on_demand_feature_view_proto in enumerate( self.cached_registry_proto.on_demand_feature_views ): @@ -890,10 +850,7 @@ def _existing_feature_view_names_to_fvs(self) -> Dict[str, Message]: for fv in self.cached_registry_proto.on_demand_feature_views } fvs = {fv.spec.name: fv for fv in self.cached_registry_proto.feature_views} - request_fvs = { - fv.spec.name: fv for fv in self.cached_registry_proto.request_feature_views - } sfv = { fv.spec.name: fv for fv in self.cached_registry_proto.stream_feature_views } - return {**odfvs, **fvs, **request_fvs, **sfv} + return {**odfvs, **fvs, **sfv} diff --git a/sdk/python/feast/infra/registry/remote.py b/sdk/python/feast/infra/registry/remote.py index 67d61ffec78..f93e1ab1c03 100644 --- a/sdk/python/feast/infra/registry/remote.py +++ b/sdk/python/feast/infra/registry/remote.py @@ -19,7 +19,6 @@ from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.registry import RegistryServer_pb2, RegistryServer_pb2_grpc from feast.repo_config import RegistryConfig -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView @@ -215,31 +214,6 @@ def list_feature_views( for feature_view in response.feature_views ] - def get_request_feature_view( - self, name: str, project: str, allow_cache: bool = False - ) -> RequestFeatureView: - request = RegistryServer_pb2.GetRequestFeatureViewRequest( - name=name, project=project, allow_cache=allow_cache - ) - - response = self.stub.GetRequestFeatureView(request) - - return RequestFeatureView.from_proto(response) - - def list_request_feature_views( - self, project: str, allow_cache: bool = False - ) -> List[RequestFeatureView]: - request = RegistryServer_pb2.ListRequestFeatureViewsRequest( - project=project, allow_cache=allow_cache - ) - - response = self.stub.ListRequestFeatureViews(request) - - return [ - RequestFeatureView.from_proto(request_feature_view) - for request_feature_view in response.request_feature_views - ] - def apply_materialization( self, feature_view: FeatureView, diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index cdf79c78b5f..326d2e02266 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -44,9 +44,6 @@ OnDemandFeatureView as OnDemandFeatureViewProto, ) from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto -from feast.protos.feast.core.RequestFeatureView_pb2 import ( - RequestFeatureView as RequestFeatureViewProto, -) from feast.protos.feast.core.SavedDataset_pb2 import SavedDataset as SavedDatasetProto from feast.protos.feast.core.StreamFeatureView_pb2 import ( StreamFeatureView as StreamFeatureViewProto, @@ -55,7 +52,6 @@ ValidationReference as ValidationReferenceProto, ) from feast.repo_config import RegistryConfig -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView @@ -370,7 +366,6 @@ def delete_feature_view(self, name: str, project: str, commit: bool = True): deleted_count = 0 for table in { "FEATURE_VIEWS", - "REQUEST_FEATURE_VIEWS", "ON_DEMAND_FEATURE_VIEWS", "STREAM_FEATURE_VIEWS", }: @@ -529,25 +524,6 @@ def get_on_demand_feature_view( FeatureViewNotFoundException, ) - def get_request_feature_view( - self, name: str, project: str, allow_cache: bool = False - ) -> RequestFeatureView: - if allow_cache: - self._refresh_cached_registry_if_necessary() - return proto_registry_utils.get_request_feature_view( - self.cached_registry_proto, name, project - ) - return self._get_object( - "REQUEST_FEATURE_VIEWS", - name, - project, - RequestFeatureViewProto, - RequestFeatureView, - "REQUEST_FEATURE_VIEW_NAME", - "REQUEST_FEATURE_VIEW_PROTO", - FeatureViewNotFoundException, - ) - def get_saved_dataset( self, name: str, project: str, allow_cache: bool = False ) -> SavedDataset: @@ -709,22 +685,6 @@ def list_on_demand_feature_views( "ON_DEMAND_FEATURE_VIEW_PROTO", ) - def list_request_feature_views( - self, project: str, allow_cache: bool = False - ) -> List[RequestFeatureView]: - if allow_cache: - self._refresh_cached_registry_if_necessary() - return proto_registry_utils.list_request_feature_views( - self.cached_registry_proto, project - ) - return self._list_objects( - "REQUEST_FEATURE_VIEWS", - project, - RequestFeatureViewProto, - RequestFeatureView, - "REQUEST_FEATURE_VIEW_PROTO", - ) - def list_saved_datasets( self, project: str, allow_cache: bool = False ) -> List[SavedDataset]: @@ -809,7 +769,7 @@ def apply_materialization( fv_column_name = fv_table_str[:-1] python_class, proto_class = self._infer_fv_classes(feature_view) - if python_class in {RequestFeatureView, OnDemandFeatureView}: + if python_class in {OnDemandFeatureView}: raise ValueError( f"Cannot apply materialization for feature {feature_view.name} of type {python_class}" ) @@ -933,7 +893,6 @@ def proto(self) -> RegistryProto: (self.list_feature_views, r.feature_views), (self.list_data_sources, r.data_sources), (self.list_on_demand_feature_views, r.on_demand_feature_views), - (self.list_request_feature_views, r.request_feature_views), (self.list_stream_feature_views, r.stream_feature_views), (self.list_feature_services, r.feature_services), (self.list_saved_datasets, r.saved_datasets), @@ -968,7 +927,6 @@ def _get_all_projects(self) -> Set[str]: "ENTITIES", "FEATURE_VIEWS", "ON_DEMAND_FEATURE_VIEWS", - "REQUEST_FEATURE_VIEWS", "STREAM_FEATURE_VIEWS", ] @@ -1010,8 +968,6 @@ def _infer_fv_classes(self, feature_view): python_class, proto_class = FeatureView, FeatureViewProto elif isinstance(feature_view, OnDemandFeatureView): python_class, proto_class = OnDemandFeatureView, OnDemandFeatureViewProto - elif isinstance(feature_view, RequestFeatureView): - python_class, proto_class = RequestFeatureView, RequestFeatureViewProto else: raise ValueError(f"Unexpected feature view type: {type(feature_view)}") return python_class, proto_class @@ -1023,8 +979,6 @@ def _infer_fv_table(self, feature_view) -> str: table = "FEATURE_VIEWS" elif isinstance(feature_view, OnDemandFeatureView): table = "ON_DEMAND_FEATURE_VIEWS" - elif isinstance(feature_view, RequestFeatureView): - table = "REQUEST_FEATURE_VIEWS" else: raise ValueError(f"Unexpected feature view type: {type(feature_view)}") return table diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index 597c9b85136..f9030a68759 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -50,9 +50,6 @@ OnDemandFeatureView as OnDemandFeatureViewProto, ) from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto -from feast.protos.feast.core.RequestFeatureView_pb2 import ( - RequestFeatureView as RequestFeatureViewProto, -) from feast.protos.feast.core.SavedDataset_pb2 import SavedDataset as SavedDatasetProto from feast.protos.feast.core.StreamFeatureView_pb2 import ( StreamFeatureView as StreamFeatureViewProto, @@ -61,7 +58,6 @@ ValidationReference as ValidationReferenceProto, ) from feast.repo_config import RegistryConfig -from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView @@ -96,16 +92,6 @@ Column("user_metadata", LargeBinary, nullable=True), ) -request_feature_views = Table( - "request_feature_views", - metadata, - Column("feature_view_name", String(50), primary_key=True), - Column("project_id", String(50), primary_key=True), - Column("last_updated_timestamp", BigInteger, nullable=False), - Column("feature_view_proto", LargeBinary, nullable=False), - Column("user_metadata", LargeBinary, nullable=True), -) - stream_feature_views = Table( "stream_feature_views", metadata, @@ -216,7 +202,6 @@ def teardown(self): feature_views, feature_services, on_demand_feature_views, - request_feature_views, saved_datasets, validation_references, }: @@ -292,18 +277,6 @@ def _get_on_demand_feature_view( not_found_exception=FeatureViewNotFoundException, ) - def _get_request_feature_view(self, name: str, project: str): - return self._get_object( - table=request_feature_views, - name=name, - project=project, - proto_class=RequestFeatureViewProto, - python_class=RequestFeatureView, - id_field_name="feature_view_name", - proto_field_name="feature_view_proto", - not_found_exception=FeatureViewNotFoundException, - ) - def _get_feature_service(self, name: str, project: str) -> FeatureService: return self._get_object( table=feature_services, @@ -363,7 +336,6 @@ def delete_feature_view(self, name: str, project: str, commit: bool = True): deleted_count = 0 for table in { feature_views, - request_feature_views, on_demand_feature_views, stream_feature_views, }: @@ -459,15 +431,6 @@ def _list_saved_datasets(self, project: str) -> List[SavedDataset]: "saved_dataset_proto", ) - def _list_request_feature_views(self, project: str) -> List[RequestFeatureView]: - return self._list_objects( - request_feature_views, - project, - RequestFeatureViewProto, - RequestFeatureView, - "feature_view_proto", - ) - def _list_on_demand_feature_views(self, project: str) -> List[OnDemandFeatureView]: return self._list_objects( on_demand_feature_views, @@ -532,7 +495,7 @@ def apply_materialization( table = self._infer_fv_table(feature_view) python_class, proto_class = self._infer_fv_classes(feature_view) - if python_class in {RequestFeatureView, OnDemandFeatureView}: + if python_class in {OnDemandFeatureView}: raise ValueError( f"Cannot apply materialization for feature {feature_view.name} of type {python_class}" ) @@ -628,8 +591,6 @@ def _infer_fv_table(self, feature_view): table = feature_views elif isinstance(feature_view, OnDemandFeatureView): table = on_demand_feature_views - elif isinstance(feature_view, RequestFeatureView): - table = request_feature_views else: raise ValueError(f"Unexpected feature view type: {type(feature_view)}") return table @@ -641,8 +602,6 @@ def _infer_fv_classes(self, feature_view): python_class, proto_class = FeatureView, FeatureViewProto elif isinstance(feature_view, OnDemandFeatureView): python_class, proto_class = OnDemandFeatureView, OnDemandFeatureViewProto - elif isinstance(feature_view, RequestFeatureView): - python_class, proto_class = RequestFeatureView, RequestFeatureViewProto else: raise ValueError(f"Unexpected feature view type: {type(feature_view)}") return python_class, proto_class @@ -671,7 +630,6 @@ def proto(self) -> RegistryProto: (self.list_feature_views, r.feature_views), (self.list_data_sources, r.data_sources), (self.list_on_demand_feature_views, r.on_demand_feature_views), - (self.list_request_feature_views, r.request_feature_views), (self.list_stream_feature_views, r.stream_feature_views), (self.list_feature_services, r.feature_services), (self.list_saved_datasets, r.saved_datasets), @@ -733,7 +691,10 @@ def _apply_object( } update_stmt = ( update(table) - .where(getattr(table.c, id_field_name) == name) + .where( + getattr(table.c, id_field_name) == name, + table.c.project_id == project, + ) .values( values, ) @@ -905,7 +866,6 @@ def _get_all_projects(self) -> Set[str]: entities, data_sources, feature_views, - request_feature_views, on_demand_feature_views, stream_feature_views, }: diff --git a/sdk/python/feast/infra/utils/snowflake/__init__.py b/sdk/python/feast/infra/utils/snowflake/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/utils/snowflake/registry/__init__.py b/sdk/python/feast/infra/utils/snowflake/registry/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql index 4b53d6bb3f6..aa35caeac4a 100644 --- a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql +++ b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql @@ -57,15 +57,6 @@ CREATE TABLE IF NOT EXISTS REGISTRY_PATH."ON_DEMAND_FEATURE_VIEWS" ( PRIMARY KEY (on_demand_feature_view_name, project_id) ); -CREATE TABLE IF NOT EXISTS REGISTRY_PATH."REQUEST_FEATURE_VIEWS" ( - request_feature_view_name VARCHAR, - project_id VARCHAR, - last_updated_timestamp TIMESTAMP_LTZ NOT NULL, - request_feature_view_proto BINARY NOT NULL, - user_metadata BINARY, - PRIMARY KEY (request_feature_view_name, project_id) -); - CREATE TABLE IF NOT EXISTS REGISTRY_PATH."SAVED_DATASETS" ( saved_dataset_name VARCHAR, project_id VARCHAR, diff --git a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql index 7f5c1991eac..a355c72062b 100644 --- a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql +++ b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql @@ -12,8 +12,6 @@ DROP TABLE IF EXISTS REGISTRY_PATH."MANAGED_INFRA"; DROP TABLE IF EXISTS REGISTRY_PATH."ON_DEMAND_FEATURE_VIEWS"; -DROP TABLE IF EXISTS REGISTRY_PATH."REQUEST_FEATURE_VIEWS"; - DROP TABLE IF EXISTS REGISTRY_PATH."SAVED_DATASETS"; DROP TABLE IF EXISTS REGISTRY_PATH."STREAM_FEATURE_VIEWS"; diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index 586286a3d47..f83500cbc9b 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -17,8 +17,6 @@ from feast.feature_view import FeatureView from feast.feature_view_projection import FeatureViewProjection from feast.field import Field, from_value_type -from feast.on_demand_pandas_transformation import OnDemandPandasTransformation -from feast.on_demand_substrait_transformation import OnDemandSubstraitTransformation from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( OnDemandFeatureView as OnDemandFeatureViewProto, ) @@ -27,6 +25,15 @@ OnDemandFeatureViewSpec, OnDemandSource, ) +from feast.protos.feast.core.Transformation_pb2 import ( + FeatureTransformationV2 as FeatureTransformationProto, +) +from feast.protos.feast.core.Transformation_pb2 import ( + UserDefinedFunctionV2 as UserDefinedFunctionProto, +) +from feast.transformation.pandas_transformation import PandasTransformation +from feast.transformation.python_transformation import PythonTransformation +from feast.transformation.substrait_transformation import SubstraitTransformation from feast.type_map import ( feast_value_type_to_pandas_type, python_type_to_feast_value_type, @@ -51,7 +58,7 @@ class OnDemandFeatureView(BaseFeatureView): sources with type FeatureViewProjection. source_request_sources: A map from input source names to the actual input sources with type RequestSource. - transformation: The user defined transformation. + feature_transformation: The user defined transformation. description: A human-readable description. tags: A dictionary of key-value pairs to store arbitrary metadata. owner: The owner of the on demand feature view, typically the email of the primary @@ -62,7 +69,10 @@ class OnDemandFeatureView(BaseFeatureView): features: List[Field] source_feature_view_projections: Dict[str, FeatureViewProjection] source_request_sources: Dict[str, RequestSource] - transformation: Union[OnDemandPandasTransformation] + feature_transformation: Union[ + PandasTransformation, PythonTransformation, SubstraitTransformation + ] + mode: str description: str tags: Dict[str, str] owner: str @@ -82,7 +92,10 @@ def __init__( # noqa: C901 ], udf: Optional[FunctionType] = None, udf_string: str = "", - transformation: Optional[Union[OnDemandPandasTransformation]] = None, + feature_transformation: Union[ + PandasTransformation, PythonTransformation, SubstraitTransformation + ], + mode: str = "pandas", description: str = "", tags: Optional[Dict[str, str]] = None, owner: str = "", @@ -100,7 +113,8 @@ def __init__( # noqa: C901 udf (deprecated): The user defined transformation function, which must take pandas dataframes as inputs. udf_string (deprecated): The source code version of the udf (for diffing and displaying in Web UI) - transformation: The user defined transformation. + feature_transformation: The user defined transformation. + mode: Mode of execution (e.g., Pandas or Python native) description (optional): A human-readable description. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the on demand feature view, typically the email @@ -114,16 +128,28 @@ def __init__( # noqa: C901 owner=owner, ) - if not transformation: + if mode not in {"python", "pandas", "substrait"}: + raise Exception( + f"Unknown mode {mode}. OnDemandFeatureView only supports python or pandas UDFs and substrait." + ) + else: + self.mode = mode + if not feature_transformation: if udf: warnings.warn( - "udf and udf_string parameters are deprecated. Please use transformation=OnDemandPandasTransformation(udf, udf_string) instead.", + "udf and udf_string parameters are deprecated. Please use transformation=PandasTransformation(udf, udf_string) instead.", DeprecationWarning, ) - transformation = OnDemandPandasTransformation(udf, udf_string) + # Note inspecting the return signature won't work with isinstance so this is the best alternative + if mode == "pandas": + feature_transformation = PandasTransformation(udf, udf_string) + elif mode == "python": + feature_transformation = PythonTransformation(udf, udf_string) + else: + pass else: raise Exception( - "OnDemandFeatureView needs to be initialized with either transformation or udf arguments" + "OnDemandFeatureView needs to be initialized with either feature_transformation or udf arguments" ) self.source_feature_view_projections: Dict[str, FeatureViewProjection] = {} @@ -138,7 +164,7 @@ def __init__( # noqa: C901 odfv_source.name ] = odfv_source.projection - self.transformation = transformation + self.feature_transformation = feature_transformation @property def proto_class(self) -> Type[OnDemandFeatureViewProto]: @@ -150,7 +176,8 @@ def __copy__(self): schema=self.features, sources=list(self.source_feature_view_projections.values()) + list(self.source_request_sources.values()), - transformation=self.transformation, + feature_transformation=self.feature_transformation, + mode=self.mode, description=self.description, tags=self.tags, owner=self.owner, @@ -171,7 +198,8 @@ def __eq__(self, other): self.source_feature_view_projections != other.source_feature_view_projections or self.source_request_sources != other.source_request_sources - or self.transformation != other.transformation + or self.mode != other.mode + or self.feature_transformation != other.feature_transformation ): return False @@ -205,16 +233,23 @@ def to_proto(self) -> OnDemandFeatureViewProto: request_data_source=request_sources.to_proto() ) + feature_transformation = FeatureTransformationProto( + user_defined_function=self.feature_transformation.to_proto() + if isinstance( + self.feature_transformation, + (PandasTransformation, PythonTransformation), + ) + else None, + substrait_transformation=self.feature_transformation.to_proto() + if isinstance(self.feature_transformation, SubstraitTransformation) + else None, + ) spec = OnDemandFeatureViewSpec( name=self.name, features=[feature.to_proto() for feature in self.features], sources=sources, - user_defined_function=self.transformation.to_proto() - if type(self.transformation) == OnDemandPandasTransformation - else None, - on_demand_substrait_transformation=self.transformation.to_proto() # type: ignore - if type(self.transformation) == OnDemandSubstraitTransformation - else None, + feature_transformation=feature_transformation, + mode=self.mode, description=self.description, tags=self.tags, owner=self.owner, @@ -223,12 +258,17 @@ def to_proto(self) -> OnDemandFeatureViewProto: return OnDemandFeatureViewProto(spec=spec, meta=meta) @classmethod - def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): + def from_proto( + cls, + on_demand_feature_view_proto: OnDemandFeatureViewProto, + skip_udf: bool = False, + ): """ Creates an on demand feature view from a protobuf representation. Args: on_demand_feature_view_proto: A protobuf representation of an on-demand feature view. + skip_udf: A boolean indicating whether to skip loading the udf Returns: A OnDemandFeatureView object based on the on-demand feature view protobuf. @@ -254,18 +294,37 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): ) if ( - on_demand_feature_view_proto.spec.WhichOneof("transformation") + on_demand_feature_view_proto.spec.feature_transformation.WhichOneof( + "transformation" + ) == "user_defined_function" + and on_demand_feature_view_proto.spec.feature_transformation.user_defined_function.body_text + != "" + ): + transformation = PandasTransformation.from_proto( + on_demand_feature_view_proto.spec.feature_transformation.user_defined_function + ) + elif ( + on_demand_feature_view_proto.spec.feature_transformation.WhichOneof( + "transformation" + ) + == "substrait_transformation" ): - transformation = OnDemandPandasTransformation.from_proto( - on_demand_feature_view_proto.spec.user_defined_function + transformation = SubstraitTransformation.from_proto( + on_demand_feature_view_proto.spec.feature_transformation.substrait_transformation ) elif ( - on_demand_feature_view_proto.spec.WhichOneof("transformation") - == "on_demand_substrait_transformation" + hasattr(on_demand_feature_view_proto.spec, "user_defined_function") + and on_demand_feature_view_proto.spec.feature_transformation.user_defined_function.body_text + == "" ): - transformation = OnDemandSubstraitTransformation.from_proto( - on_demand_feature_view_proto.spec.on_demand_substrait_transformation + backwards_compatible_udf = UserDefinedFunctionProto( + name=on_demand_feature_view_proto.spec.user_defined_function.name, + body=on_demand_feature_view_proto.spec.user_defined_function.body, + body_text=on_demand_feature_view_proto.spec.user_defined_function.body_text, + ) + transformation = PandasTransformation.from_proto( + user_defined_function_proto=backwards_compatible_udf, ) else: raise Exception("At least one transformation type needs to be provided") @@ -280,7 +339,8 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): for feature in on_demand_feature_view_proto.spec.features ], sources=sources, - transformation=transformation, + feature_transformation=transformation, + mode=on_demand_feature_view_proto.spec.mode, description=on_demand_feature_view_proto.spec.description, tags=dict(on_demand_feature_view_proto.spec.tags), owner=on_demand_feature_view_proto.spec.owner, @@ -319,12 +379,17 @@ def get_request_data_schema(self) -> Dict[str, ValueType]: ) return schema + def _get_projected_feature_name(self, feature: str) -> str: + return f"{self.projection.name_to_use()}__{feature}" + def get_transformed_features_df( self, df_with_features: pd.DataFrame, full_feature_names: bool = False, ) -> pd.DataFrame: # Apply on demand transformations + if not isinstance(df_with_features, pd.DataFrame): + raise TypeError("get_transformed_features_df only accepts pd.DataFrame") columns_to_cleanup = [] for source_fv_projection in self.source_feature_view_projections.values(): for feature in source_fv_projection.features: @@ -339,14 +404,15 @@ def get_transformed_features_df( columns_to_cleanup.append(full_feature_ref) # Compute transformed values and apply to each result row - - df_with_transformed_features = self.transformation.transform(df_with_features) + df_with_transformed_features: pd.DataFrame = ( + self.feature_transformation.transform(df_with_features) + ) # Work out whether the correct columns names are used. rename_columns: Dict[str, str] = {} for feature in self.features: short_name = feature.name - long_name = f"{self.projection.name_to_use()}__{feature.name}" + long_name = self._get_projected_feature_name(feature.name) if ( short_name in df_with_transformed_features.columns and full_feature_names @@ -360,7 +426,135 @@ def get_transformed_features_df( df_with_features.drop(columns=columns_to_cleanup, inplace=True) return df_with_transformed_features.rename(columns=rename_columns) + def get_transformed_features_dict( + self, + feature_dict: Dict[str, Any], # type: ignore + ) -> Dict[str, Any]: + + # we need a mapping from full feature name to short and back to do a renaming + # The simplest thing to do is to make the full reference, copy the columns with the short reference + # and rerun + columns_to_cleanup: List[str] = [] + for source_fv_projection in self.source_feature_view_projections.values(): + for feature in source_fv_projection.features: + full_feature_ref = f"{source_fv_projection.name}__{feature.name}" + if full_feature_ref in feature_dict.keys(): + # Make sure the partial feature name is always present + feature_dict[feature.name] = feature_dict[full_feature_ref] + columns_to_cleanup.append(str(feature.name)) + elif feature.name in feature_dict.keys(): + # Make sure the full feature name is always present + feature_dict[full_feature_ref] = feature_dict[feature.name] + columns_to_cleanup.append(str(full_feature_ref)) + + output_dict: Dict[str, Any] = self.feature_transformation.transform( + feature_dict + ) + for feature_name in columns_to_cleanup: + del output_dict[feature_name] + return output_dict + + def get_transformed_features( + self, + features: Union[Dict[str, Any], pd.DataFrame], + full_feature_names: bool = False, + ) -> Union[Dict[str, Any], pd.DataFrame]: + # TODO: classic inheritance pattern....maybe fix this + if self.mode == "python" and isinstance(features, Dict): + # note full_feature_names is not needed for the dictionary + return self.get_transformed_features_dict( + feature_dict=features, + ) + elif self.mode in {"pandas", "substrait"} and isinstance( + features, pd.DataFrame + ): + return self.get_transformed_features_df( + df_with_features=features, + full_feature_names=full_feature_names, + ) + else: + raise Exception( + f'Invalid OnDemandFeatureMode: {self.mode}. Expected one of "pandas" or "python".' + ) + def infer_features(self) -> None: + if self.mode in {"pandas", "substrait"}: + self._infer_features_df() + elif self.mode == "python": + self._infer_features_dict() + else: + raise Exception( + f'Invalid OnDemandFeatureMode: {self.mode}. Expected one of "pandas" or "python".' + ) + + def _infer_features_dict(self): + """ + Infers the set of features associated to this feature view from the input source. + + Raises: + RegistryInferenceFailure: The set of features could not be inferred. + """ + rand_dict_value: Dict[str, Any] = { + "float": [1.0], + "int": [1], + "str": ["hello world"], + "bytes": [str.encode("hello world")], + "bool": [True], + "datetime64[ns]": [datetime.utcnow()], + } + + feature_dict = {} + for feature_view_projection in self.source_feature_view_projections.values(): + for feature in feature_view_projection.features: + dtype = feast_value_type_to_pandas_type(feature.dtype.to_value_type()) + feature_dict[f"{feature_view_projection.name}__{feature.name}"] = ( + rand_dict_value[dtype] if dtype in rand_dict_value else [None] + ) + feature_dict[f"{feature.name}"] = ( + rand_dict_value[dtype] if dtype in rand_dict_value else [None] + ) + for request_data in self.source_request_sources.values(): + for field in request_data.schema: + dtype = feast_value_type_to_pandas_type(field.dtype.to_value_type()) + feature_dict[f"{field.name}"] = ( + rand_dict_value[dtype] if dtype in rand_dict_value else [None] + ) + + output_dict: Dict[str, List[Any]] = self.feature_transformation.transform( + feature_dict + ) + inferred_features = [] + for f, dt in output_dict.items(): + inferred_features.append( + Field( + name=f, + dtype=from_value_type( + python_type_to_feast_value_type( + f, type_name=type(dt[0]).__name__ + ) + ), + ) + ) + + if self.features: + missing_features = [] + for specified_features in self.features: + if specified_features not in inferred_features: + missing_features.append(specified_features) + if missing_features: + raise SpecifiedFeaturesNotPresentError( + missing_features, inferred_features, self.name + ) + else: + self.features = inferred_features + + if not self.features: + raise RegistryInferenceFailure( + "OnDemandFeatureView", + f"Could not infer Features for the feature view '{self.name}'.", + ) + + def _infer_features_df(self) -> None: """ Infers the set of features associated to this feature view from the input source. @@ -390,7 +584,8 @@ def infer_features(self) -> None: dtype = feast_value_type_to_pandas_type(field.dtype.to_value_type()) sample_val = rand_df_value[dtype] if dtype in rand_df_value else None df[f"{field.name}"] = pd.Series(sample_val, dtype=dtype) - output_df: pd.DataFrame = self.transformation.transform(df) + + output_df: pd.DataFrame = self.feature_transformation.transform(df) inferred_features = [] for f, dt in zip(output_df.columns, output_df.dtypes): inferred_features.append( @@ -446,6 +641,7 @@ def on_demand_feature_view( FeatureViewProjection, ] ], + mode: str = "pandas", description: str = "", tags: Optional[Dict[str, str]] = None, owner: str = "", @@ -459,6 +655,7 @@ def on_demand_feature_view( sources: A map from input source names to the actual input sources, which may be feature views, or request data sources. These sources serve as inputs to the udf, which will refer to them by name. + mode: The mode of execution (e.g,. Pandas or Python Native) description (optional): A human-readable description. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the on demand feature view, typically the email @@ -472,6 +669,7 @@ def mainify(obj) -> None: obj.__module__ = "__main__" def decorator(user_function): + return_annotation = inspect.signature(user_function).return_annotation if ( return_annotation @@ -487,7 +685,7 @@ def decorator(user_function): input_fields: Field = [] for s in sources: - if type(s) == FeatureView: + if isinstance(s, FeatureView): fields = s.projection.features else: fields = s.features @@ -506,19 +704,33 @@ def decorator(user_function): expr = user_function(ibis.table(input_fields, "t")) - transformation = OnDemandSubstraitTransformation( + transformation = SubstraitTransformation( substrait_plan=compiler.compile(expr).SerializeToString() ) else: udf_string = dill.source.getsource(user_function) mainify(user_function) - transformation = OnDemandPandasTransformation(user_function, udf_string) + if mode == "pandas": + if return_annotation not in (inspect._empty, pd.DataFrame): + raise TypeError( + f"return signature for {user_function} is {return_annotation} but should be pd.DataFrame" + ) + transformation = PandasTransformation(user_function, udf_string) + elif mode == "python": + if return_annotation not in (inspect._empty, Dict[str, Any]): + raise TypeError( + f"return signature for {user_function} is {return_annotation} but should be Dict[str, Any]" + ) + transformation = PythonTransformation(user_function, udf_string) + elif mode == "substrait": + pass on_demand_feature_view_obj = OnDemandFeatureView( name=user_function.__name__, sources=sources, schema=schema, - transformation=transformation, + feature_transformation=transformation, + mode=mode, description=description, tags=tags, owner=owner, @@ -546,3 +758,8 @@ def feature_view_to_batch_feature_view(fv: FeatureView) -> BatchFeatureView: bfv.features = copy.copy(fv.features) bfv.entities = copy.copy(fv.entities) return bfv + + +def _empty_odfv_udf_fn(x: Any) -> Any: + # just an identity mapping, otherwise we risk tripping some downstream tests + return x diff --git a/sdk/python/feast/registry_server.py b/sdk/python/feast/registry_server.py index 221715480e5..7de0cc43e14 100644 --- a/sdk/python/feast/registry_server.py +++ b/sdk/python/feast/registry_server.py @@ -59,23 +59,6 @@ def ListFeatureViews(self, request, context): ] ) - def GetRequestFeatureView( - self, request: RegistryServer_pb2.GetRequestFeatureViewRequest, context - ): - return self.proxied_registry.get_request_feature_view( - name=request.name, project=request.project, allow_cache=request.allow_cache - ).to_proto() - - def ListRequestFeatureViews(self, request, context): - return RegistryServer_pb2.ListRequestFeatureViewsResponse( - request_feature_views=[ - request_feature_view.to_proto() - for request_feature_view in self.proxied_registry.list_request_feature_views( - project=request.project, allow_cache=request.allow_cache - ) - ] - ) - def GetStreamFeatureView( self, request: RegistryServer_pb2.GetStreamFeatureViewRequest, context ): diff --git a/sdk/python/feast/repo_contents.py b/sdk/python/feast/repo_contents.py index fe5cbd284bc..33b99f29b26 100644 --- a/sdk/python/feast/repo_contents.py +++ b/sdk/python/feast/repo_contents.py @@ -19,7 +19,6 @@ from feast.feature_view import FeatureView from feast.on_demand_feature_view import OnDemandFeatureView from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto -from feast.request_feature_view import RequestFeatureView from feast.stream_feature_view import StreamFeatureView @@ -31,7 +30,6 @@ class RepoContents(NamedTuple): data_sources: List[DataSource] feature_views: List[FeatureView] on_demand_feature_views: List[OnDemandFeatureView] - request_feature_views: List[RequestFeatureView] stream_feature_views: List[StreamFeatureView] entities: List[Entity] feature_services: List[FeatureService] @@ -46,9 +44,6 @@ def to_registry_proto(self) -> RegistryProto: registry_proto.on_demand_feature_views.extend( [fv.to_proto() for fv in self.on_demand_feature_views] ) - registry_proto.request_feature_views.extend( - [fv.to_proto() for fv in self.request_feature_views] - ) registry_proto.feature_services.extend( [fs.to_proto() for fs in self.feature_services] ) diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 120f6e7a422..000e0004388 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -29,7 +29,6 @@ from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import RepoConfig from feast.repo_contents import RepoContents -from feast.request_feature_view import RequestFeatureView from feast.stream_feature_view import StreamFeatureView from feast.usage import log_exceptions_and_usage @@ -114,7 +113,6 @@ def parse_repo(repo_root: Path) -> RepoContents: feature_services=[], on_demand_feature_views=[], stream_feature_views=[], - request_feature_views=[], ) for repo_file in get_repo_files(repo_root): @@ -196,10 +194,6 @@ def parse_repo(repo_root: Path) -> RepoContents: (obj is odfv) for odfv in res.on_demand_feature_views ): res.on_demand_feature_views.append(obj) - elif isinstance(obj, RequestFeatureView) and not any( - (obj is rfv) for rfv in res.request_feature_views - ): - res.request_feature_views.append(obj) res.entities.append(DUMMY_ENTITY) return res @@ -250,7 +244,6 @@ def extract_objects_for_apply_delete(project, registry, repo): Union[ Entity, FeatureView, - RequestFeatureView, OnDemandFeatureView, StreamFeatureView, FeatureService, @@ -264,7 +257,6 @@ def extract_objects_for_apply_delete(project, registry, repo): Union[ Entity, FeatureView, - RequestFeatureView, OnDemandFeatureView, StreamFeatureView, FeatureService, diff --git a/sdk/python/feast/request_feature_view.py b/sdk/python/feast/request_feature_view.py deleted file mode 100644 index 7248ffe9890..00000000000 --- a/sdk/python/feast/request_feature_view.py +++ /dev/null @@ -1,137 +0,0 @@ -import copy -import warnings -from typing import Dict, List, Optional, Type - -from feast.base_feature_view import BaseFeatureView -from feast.data_source import RequestSource -from feast.feature_view_projection import FeatureViewProjection -from feast.field import Field -from feast.protos.feast.core.RequestFeatureView_pb2 import ( - RequestFeatureView as RequestFeatureViewProto, -) -from feast.protos.feast.core.RequestFeatureView_pb2 import RequestFeatureViewSpec -from feast.usage import log_exceptions - - -class RequestFeatureView(BaseFeatureView): - """ - [Experimental] A RequestFeatureView defines a logical group of features that should - be available as an input to an on demand feature view at request time. - - Attributes: - name: The unique name of the request feature view. - request_source: The request source that specifies the schema and - features of the request feature view. - features: The list of features defined as part of this request feature view. - description: A human-readable description. - tags: A dictionary of key-value pairs to store arbitrary metadata. - owner: The owner of the request feature view, typically the email of the primary - maintainer. - """ - - name: str - request_source: RequestSource - features: List[Field] - description: str - tags: Dict[str, str] - owner: str - - @log_exceptions - def __init__( - self, - name: str, - request_data_source: RequestSource, - description: str = "", - tags: Optional[Dict[str, str]] = None, - owner: str = "", - ): - """ - Creates a RequestFeatureView object. - - Args: - name: The unique name of the request feature view. - request_data_source: The request data source that specifies the schema and - features of the request feature view. - description (optional): A human-readable description. - tags (optional): A dictionary of key-value pairs to store arbitrary metadata. - owner (optional): The owner of the request feature view, typically the email - of the primary maintainer. - """ - warnings.warn( - "Request feature view is deprecated. " - "Please use request data source instead", - DeprecationWarning, - ) - - if isinstance(request_data_source.schema, Dict): - new_features = [ - Field(name=name, dtype=dtype) - for name, dtype in request_data_source.schema.items() - ] - else: - new_features = request_data_source.schema - - super().__init__( - name=name, - features=new_features, - description=description, - tags=tags, - owner=owner, - ) - self.request_source = request_data_source - - @property - def proto_class(self) -> Type[RequestFeatureViewProto]: - return RequestFeatureViewProto - - def to_proto(self) -> RequestFeatureViewProto: - """ - Converts an request feature view object to its protobuf representation. - - Returns: - A RequestFeatureViewProto protobuf. - """ - spec = RequestFeatureViewSpec( - name=self.name, - request_data_source=self.request_source.to_proto(), - description=self.description, - tags=self.tags, - owner=self.owner, - ) - - return RequestFeatureViewProto(spec=spec) - - @classmethod - def from_proto(cls, request_feature_view_proto: RequestFeatureViewProto): - """ - Creates a request feature view from a protobuf representation. - - Args: - request_feature_view_proto: A protobuf representation of an request feature view. - - Returns: - A RequestFeatureView object based on the request feature view protobuf. - """ - - request_feature_view_obj = cls( - name=request_feature_view_proto.spec.name, - request_data_source=RequestSource.from_proto( - request_feature_view_proto.spec.request_data_source - ), - description=request_feature_view_proto.spec.description, - tags=dict(request_feature_view_proto.spec.tags), - owner=request_feature_view_proto.spec.owner, - ) - - # FeatureViewProjections are not saved in the RequestFeatureView proto. - # Create the default projection. - request_feature_view_obj.projection = FeatureViewProjection.from_definition( - request_feature_view_obj - ) - - return request_feature_view_obj - - def __copy__(self): - fv = RequestFeatureView(name=self.name, request_data_source=self.request_source) - fv.projection = copy.copy(self.projection) - return fv diff --git a/sdk/python/feast/stream_feature_view.py b/sdk/python/feast/stream_feature_view.py index 13abbc5e284..301cf6cba57 100644 --- a/sdk/python/feast/stream_feature_view.py +++ b/sdk/python/feast/stream_feature_view.py @@ -25,6 +25,13 @@ from feast.protos.feast.core.StreamFeatureView_pb2 import ( StreamFeatureViewSpec as StreamFeatureViewSpecProto, ) +from feast.protos.feast.core.Transformation_pb2 import ( + FeatureTransformationV2 as FeatureTransformationProto, +) +from feast.protos.feast.core.Transformation_pb2 import ( + UserDefinedFunctionV2 as UserDefinedFunctionProtoV2, +) +from feast.transformation.pandas_transformation import PandasTransformation warnings.simplefilter("once", RuntimeWarning) @@ -73,6 +80,7 @@ class StreamFeatureView(FeatureView): materialization_intervals: List[Tuple[datetime, datetime]] udf: Optional[FunctionType] udf_string: Optional[str] + feature_transformation: Optional[PandasTransformation] def __init__( self, @@ -91,6 +99,7 @@ def __init__( timestamp_field: Optional[str] = "", udf: Optional[FunctionType] = None, udf_string: Optional[str] = "", + feature_transformation: Optional[Union[PandasTransformation]] = None, ): if not flags_helper.is_test(): warnings.warn( @@ -118,6 +127,7 @@ def __init__( self.timestamp_field = timestamp_field or "" self.udf = udf self.udf_string = udf_string + self.feature_transformation = feature_transformation super().__init__( name=name, @@ -171,19 +181,30 @@ def to_proto(self): stream_source_proto = self.stream_source.to_proto() stream_source_proto.data_source_class_type = f"{self.stream_source.__class__.__module__}.{self.stream_source.__class__.__name__}" - udf_proto = None + udf_proto, feature_transformation = None, None if self.udf: udf_proto = UserDefinedFunctionProto( name=self.udf.__name__, body=dill.dumps(self.udf, recurse=True), body_text=self.udf_string, ) + udf_proto_v2 = UserDefinedFunctionProtoV2( + name=self.udf.__name__, + body=dill.dumps(self.udf, recurse=True), + body_text=self.udf_string, + ) + + feature_transformation = FeatureTransformationProto( + user_defined_function=udf_proto_v2, + ) + spec = StreamFeatureViewSpecProto( name=self.name, entities=self.entities, entity_columns=[field.to_proto() for field in self.entity_columns], features=[field.to_proto() for field in self.schema], user_defined_function=udf_proto, + feature_transformation=feature_transformation, description=self.description, tags=self.tags, owner=self.owner, @@ -220,6 +241,11 @@ def from_proto(cls, sfv_proto): if sfv_proto.spec.HasField("user_defined_function") else None ) + # feature_transformation = ( + # sfv_proto.spec.feature_transformation.user_defined_function.body_text + # if sfv_proto.spec.HasField("feature_transformation") + # else None + # ) stream_feature_view = cls( name=sfv_proto.spec.name, description=sfv_proto.spec.description, @@ -238,6 +264,9 @@ def from_proto(cls, sfv_proto): mode=sfv_proto.spec.mode, udf=udf, udf_string=udf_string, + feature_transformation=PandasTransformation(udf, udf_string) + if udf + else None, aggregations=[ Aggregation.from_proto(agg_proto) for agg_proto in sfv_proto.spec.aggregations @@ -294,6 +323,7 @@ def __copy__(self): timestamp_field=self.timestamp_field, source=self.stream_source if self.stream_source else self.batch_source, udf=self.udf, + feature_transformation=self.feature_transformation, ) fv.entities = self.entities fv.features = copy.copy(self.features) @@ -343,6 +373,7 @@ def decorator(user_function): schema=schema, udf=user_function, udf_string=udf_string, + feature_transformation=PandasTransformation(user_function, udf_string), description=description, tags=tags, online=online, diff --git a/sdk/python/feast/transformation/__init__.py b/sdk/python/feast/transformation/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/on_demand_pandas_transformation.py b/sdk/python/feast/transformation/pandas_transformation.py similarity index 57% rename from sdk/python/feast/on_demand_pandas_transformation.py rename to sdk/python/feast/transformation/pandas_transformation.py index 32cb44b429b..1838a882f27 100644 --- a/sdk/python/feast/on_demand_pandas_transformation.py +++ b/sdk/python/feast/transformation/pandas_transformation.py @@ -3,15 +3,15 @@ import dill import pandas as pd -from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( - UserDefinedFunction as UserDefinedFunctionProto, +from feast.protos.feast.core.Transformation_pb2 import ( + UserDefinedFunctionV2 as UserDefinedFunctionProto, ) -class OnDemandPandasTransformation: +class PandasTransformation: def __init__(self, udf: FunctionType, udf_string: str = ""): """ - Creates an OnDemandPandasTransformation object. + Creates an PandasTransformation object. Args: udf: The user defined transformation function, which must take pandas @@ -21,13 +21,22 @@ def __init__(self, udf: FunctionType, udf_string: str = ""): self.udf = udf self.udf_string = udf_string - def transform(self, df: pd.DataFrame) -> pd.DataFrame: - return self.udf.__call__(df) + def transform(self, input_df: pd.DataFrame) -> pd.DataFrame: + if not isinstance(input_df, pd.DataFrame): + raise TypeError( + f"input_df should be type pd.DataFrame but got {type(input_df).__name__}" + ) + output_df = self.udf.__call__(input_df) + if not isinstance(output_df, pd.DataFrame): + raise TypeError( + f"output_df should be type pd.DataFrame but got {type(output_df).__name__}" + ) + return output_df def __eq__(self, other): - if not isinstance(other, OnDemandPandasTransformation): + if not isinstance(other, PandasTransformation): raise TypeError( - "Comparisons should only involve OnDemandPandasTransformation class objects." + "Comparisons should only involve PandasTransformation class objects." ) if ( @@ -47,7 +56,7 @@ def to_proto(self) -> UserDefinedFunctionProto: @classmethod def from_proto(cls, user_defined_function_proto: UserDefinedFunctionProto): - return OnDemandPandasTransformation( + return PandasTransformation( udf=dill.loads(user_defined_function_proto.body), udf_string=user_defined_function_proto.body_text, ) diff --git a/sdk/python/feast/transformation/python_transformation.py b/sdk/python/feast/transformation/python_transformation.py new file mode 100644 index 00000000000..9519f23c05c --- /dev/null +++ b/sdk/python/feast/transformation/python_transformation.py @@ -0,0 +1,65 @@ +from types import FunctionType +from typing import Dict + +import dill + +from feast.protos.feast.core.Transformation_pb2 import ( + UserDefinedFunctionV2 as UserDefinedFunctionProto, +) + + +class PythonTransformation: + def __init__(self, udf: FunctionType, udf_string: str = ""): + """ + Creates an PythonTransformation object. + Args: + udf: The user defined transformation function, which must take pandas + dataframes as inputs. + udf_string: The source code version of the udf (for diffing and displaying in Web UI) + """ + self.udf = udf + self.udf_string = udf_string + + def transform(self, input_dict: Dict) -> Dict: + if not isinstance(input_dict, Dict): + raise TypeError( + f"input_dict should be type Dict[str, Any] but got {type(input_dict).__name__}" + ) + # Ensuring that the inputs are included as well + output_dict = self.udf.__call__(input_dict) + if not isinstance(output_dict, Dict): + raise TypeError( + f"output_dict should be type Dict[str, Any] but got {type(output_dict).__name__}" + ) + return {**input_dict, **output_dict} + + def __eq__(self, other): + if not isinstance(other, PythonTransformation): + raise TypeError( + "Comparisons should only involve PythonTransformation class objects." + ) + + if not super().__eq__(other): + return False + + if ( + self.udf_string != other.udf_string + or self.udf.__code__.co_code != other.udf.__code__.co_code + ): + return False + + return True + + def to_proto(self) -> UserDefinedFunctionProto: + return UserDefinedFunctionProto( + name=self.udf.__name__, + body=dill.dumps(self.udf, recurse=True), + body_text=self.udf_string, + ) + + @classmethod + def from_proto(cls, user_defined_function_proto: UserDefinedFunctionProto): + return PythonTransformation( + udf=dill.loads(user_defined_function_proto.body), + udf_string=user_defined_function_proto.body_text, + ) diff --git a/sdk/python/feast/on_demand_substrait_transformation.py b/sdk/python/feast/transformation/substrait_transformation.py similarity index 54% rename from sdk/python/feast/on_demand_substrait_transformation.py rename to sdk/python/feast/transformation/substrait_transformation.py index 4e92e77dc8a..b3dbe7a4b49 100644 --- a/sdk/python/feast/on_demand_substrait_transformation.py +++ b/sdk/python/feast/transformation/substrait_transformation.py @@ -2,15 +2,15 @@ import pyarrow import pyarrow.substrait as substrait # type: ignore # noqa -from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( - OnDemandSubstraitTransformation as OnDemandSubstraitTransformationProto, +from feast.protos.feast.core.Transformation_pb2 import ( + SubstraitTransformationV2 as SubstraitTransformationProto, ) -class OnDemandSubstraitTransformation: +class SubstraitTransformation: def __init__(self, substrait_plan: bytes): """ - Creates an OnDemandSubstraitTransformation object. + Creates an SubstraitTransformation object. Args: substrait_plan: The user-provided substrait plan. @@ -27,9 +27,9 @@ def table_provider(names, schema: pyarrow.Schema): return table.to_pandas() def __eq__(self, other): - if not isinstance(other, OnDemandSubstraitTransformation): + if not isinstance(other, SubstraitTransformation): raise TypeError( - "Comparisons should only involve OnDemandSubstraitTransformation class objects." + "Comparisons should only involve SubstraitTransformation class objects." ) if not super().__eq__(other): @@ -37,14 +37,14 @@ def __eq__(self, other): return self.substrait_plan == other.substrait_plan - def to_proto(self) -> OnDemandSubstraitTransformationProto: - return OnDemandSubstraitTransformationProto(substrait_plan=self.substrait_plan) + def to_proto(self) -> SubstraitTransformationProto: + return SubstraitTransformationProto(substrait_plan=self.substrait_plan) @classmethod def from_proto( cls, - on_demand_substrait_transformation_proto: OnDemandSubstraitTransformationProto, + substrait_transformation_proto: SubstraitTransformationProto, ): - return OnDemandSubstraitTransformation( - substrait_plan=on_demand_substrait_transformation_proto.substrait_plan + return SubstraitTransformation( + substrait_plan=substrait_transformation_proto.substrait_plan ) diff --git a/sdk/python/feast/transformation_server.py b/sdk/python/feast/transformation_server.py index 83f4af749e3..34fe3eac766 100644 --- a/sdk/python/feast/transformation_server.py +++ b/sdk/python/feast/transformation_server.py @@ -47,6 +47,11 @@ def TransformFeatures(self, request, context): df = pa.ipc.open_file(request.transformation_input.arrow_value).read_pandas() + if odfv.mode != "pandas": + raise Exception( + f'OnDemandFeatureView mode "{odfv.mode}" not supported by TransformationServer.' + ) + result_df = odfv.get_transformed_features_df(df, True) result_arrow = pa.Table.from_pandas(result_df) sink = pa.BufferOutputStream() diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index ad3e273d37b..8dc07414b5c 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -13,6 +13,7 @@ # limitations under the License. import json +import re from collections import defaultdict from datetime import datetime, timezone from typing import ( @@ -752,7 +753,7 @@ def _non_empty_value(value: Any) -> bool: def spark_to_feast_value_type(spark_type_as_str: str) -> ValueType: # TODO not all spark types are convertible - # Current non-convertible types: interval, map, struct, structfield, decimal, binary + # Current non-convertible types: interval, map, struct, structfield, binary type_map: Dict[str, ValueType] = { "null": ValueType.UNKNOWN, "byte": ValueType.BYTES, @@ -762,6 +763,7 @@ def spark_to_feast_value_type(spark_type_as_str: str) -> ValueType: "bigint": ValueType.INT64, "long": ValueType.INT64, "double": ValueType.DOUBLE, + "decimal": ValueType.DOUBLE, "float": ValueType.FLOAT, "boolean": ValueType.BOOL, "timestamp": ValueType.UNIX_TIMESTAMP, @@ -774,6 +776,10 @@ def spark_to_feast_value_type(spark_type_as_str: str) -> ValueType: "array": ValueType.BOOL_LIST, "array": ValueType.UNIX_TIMESTAMP_LIST, } + decimal_regex_pattern = r"^decimal\([0-9]{1,2},[0-9]{1,2}\)$" + if re.match(decimal_regex_pattern, spark_type_as_str): + spark_type_as_str = "decimal" + # TODO: Find better way of doing this. if not isinstance(spark_type_as_str, str) or spark_type_as_str not in type_map: return ValueType.NULL diff --git a/sdk/python/feast/ui/yarn.lock b/sdk/python/feast/ui/yarn.lock index 06f4d3f12bb..48cbd308033 100644 --- a/sdk/python/feast/ui/yarn.lock +++ b/sdk/python/feast/ui/yarn.lock @@ -3511,21 +3511,21 @@ bluebird@^3.5.5: resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f" integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== -body-parser@1.20.0: - version "1.20.0" - resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.0.tgz#3de69bd89011c11573d7bfee6a64f11b6bd27cc5" - integrity sha512-DfJ+q6EPcGKZD1QWUjSpqp+Q7bDQTsQIF4zfUAtZ6qk+H/3/QRhg9CEp39ss+/T2vw0+HaidC0ecJj/DRLIaKg== +body-parser@1.20.2: + version "1.20.2" + resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.2.tgz#6feb0e21c4724d06de7ff38da36dad4f57a747fd" + integrity sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA== dependencies: bytes "3.1.2" - content-type "~1.0.4" + content-type "~1.0.5" debug "2.6.9" depd "2.0.0" destroy "1.2.0" http-errors "2.0.0" iconv-lite "0.4.24" on-finished "2.4.1" - qs "6.10.3" - raw-body "2.5.1" + qs "6.11.0" + raw-body "2.5.2" type-is "~1.6.18" unpipe "1.0.0" @@ -3966,6 +3966,11 @@ content-type@~1.0.4: resolved "https://registry.yarnpkg.com/content-type/-/content-type-1.0.4.tgz#e138cc75e040c727b1966fe5e5f8c9aee256fe3b" integrity sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA== +content-type@~1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/content-type/-/content-type-1.0.5.tgz#8b773162656d1d1086784c8f23a54ce6d73d7918" + integrity sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA== + convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0: version "1.8.0" resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.8.0.tgz#f3373c32d21b4d780dd8004514684fb791ca4369" @@ -3978,10 +3983,10 @@ cookie-signature@1.0.6: resolved "https://registry.yarnpkg.com/cookie-signature/-/cookie-signature-1.0.6.tgz#e303a882b342cc3ee8ca513a79999734dab3ae2c" integrity sha1-4wOogrNCzD7oylE6eZmXNNqzriw= -cookie@0.5.0: - version "0.5.0" - resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.5.0.tgz#d1f5d71adec6558c58f389987c366aa47e994f8b" - integrity sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw== +cookie@0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.6.0.tgz#2798b04b071b0ecbff0dbb62a505a8efa4e19051" + integrity sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw== core-js-compat@^3.21.0, core-js-compat@^3.22.1: version "3.22.5" @@ -5266,16 +5271,16 @@ expect@^27.5.1: jest-message-util "^27.5.1" express@^4.17.3: - version "4.18.1" - resolved "https://registry.yarnpkg.com/express/-/express-4.18.1.tgz#7797de8b9c72c857b9cd0e14a5eea80666267caf" - integrity sha512-zZBcOX9TfehHQhtupq57OF8lFZ3UZi08Y97dwFCkD8p9d/d2Y3M+ykKcwaMDEL+4qyUolgBDX6AblpR3fL212Q== + version "4.19.2" + resolved "https://registry.yarnpkg.com/express/-/express-4.19.2.tgz#e25437827a3aa7f2a827bc8171bbbb664a356465" + integrity sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q== dependencies: accepts "~1.3.8" array-flatten "1.1.1" - body-parser "1.20.0" + body-parser "1.20.2" content-disposition "0.5.4" content-type "~1.0.4" - cookie "0.5.0" + cookie "0.6.0" cookie-signature "1.0.6" debug "2.6.9" depd "2.0.0" @@ -5291,7 +5296,7 @@ express@^4.17.3: parseurl "~1.3.3" path-to-regexp "0.1.7" proxy-addr "~2.0.7" - qs "6.10.3" + qs "6.11.0" range-parser "~1.2.1" safe-buffer "5.2.1" send "0.18.0" @@ -8591,10 +8596,10 @@ q@^1.1.2: resolved "https://registry.yarnpkg.com/q/-/q-1.5.1.tgz#7e32f75b41381291d04611f1bf14109ac00651d7" integrity sha1-fjL3W0E4EpHQRhHxvxQQmsAGUdc= -qs@6.10.3: - version "6.10.3" - resolved "https://registry.yarnpkg.com/qs/-/qs-6.10.3.tgz#d6cde1b2ffca87b5aa57889816c5f81535e22e8e" - integrity sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ== +qs@6.11.0: + version "6.11.0" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.11.0.tgz#fd0d963446f7a65e1367e01abd85429453f0c37a" + integrity sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q== dependencies: side-channel "^1.0.4" @@ -8647,10 +8652,10 @@ range-parser@^1.2.1, range-parser@~1.2.1: resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031" integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg== -raw-body@2.5.1: - version "2.5.1" - resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.5.1.tgz#fe1b1628b181b700215e5fd42389f98b71392857" - integrity sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig== +raw-body@2.5.2: + version "2.5.2" + resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.5.2.tgz#99febd83b90e08975087e8f1f9419a149366b68a" + integrity sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA== dependencies: bytes "3.1.2" http-errors "2.0.0" diff --git a/sdk/python/pytest.ini b/sdk/python/pytest.ini index 07a5e869dc4..83317d36c98 100644 --- a/sdk/python/pytest.ini +++ b/sdk/python/pytest.ini @@ -1,4 +1,8 @@ [pytest] markers = universal_offline_stores: mark a test as using all offline stores. - universal_online_stores: mark a test as using all online stores. \ No newline at end of file + universal_online_stores: mark a test as using all online stores. + +env = + FEAST_USAGE=False + IS_TEST=True \ No newline at end of file diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 8f0ef90d77e..ac1994da379 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -61,11 +61,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.34.65 +boto3==1.34.69 # via # feast (setup.py) # moto -botocore==1.34.65 +botocore==1.34.69 # via # boto3 # moto @@ -82,7 +82,7 @@ cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth -cassandra-driver==3.29.0 +cassandra-driver==3.29.1 # via feast (setup.py) certifi==2024.2.2 # via @@ -164,6 +164,12 @@ docker==7.0.0 # testcontainers docutils==0.19 # via sphinx +duckdb==0.10.1 + # via + # duckdb-engine + # ibis-framework +duckdb-engine==0.11.2 + # via ibis-framework entrypoints==0.4 # via altair exceptiongroup==1.2.0 @@ -199,7 +205,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.17.1 +google-api-core[grpc]==2.18.0 # via # feast (setup.py) # firebase-admin @@ -211,9 +217,9 @@ google-api-core[grpc]==2.17.1 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.122.0 +google-api-python-client==2.123.0 # via firebase-admin -google-auth==2.28.2 +google-auth==2.29.0 # via # google-api-core # google-api-python-client @@ -258,7 +264,7 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.11 +great-expectations==0.18.12 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -310,7 +316,7 @@ httpx==0.27.0 # via # feast (setup.py) # jupyterlab -ibis-framework==8.0.0 +ibis-framework[duckdb]==8.0.0 # via # feast (setup.py) # ibis-substrait @@ -331,7 +337,7 @@ importlib-metadata==6.11.0 # via # dask # feast (setup.py) -importlib-resources==6.3.1 +importlib-resources==6.4.0 # via feast (setup.py) iniconfig==2.0.0 # via pytest @@ -459,7 +465,7 @@ moreorless==0.4.0 # via bowler moto==4.2.14 # via feast (setup.py) -msal==1.27.0 +msal==1.28.0 # via # azure-identity # msal-extensions @@ -483,7 +489,7 @@ mypy-protobuf==3.3.0 # via feast (setup.py) nbclient==0.10.0 # via nbconvert -nbconvert==7.16.2 +nbconvert==7.16.3 # via jupyter-server nbformat==5.10.3 # via @@ -581,6 +587,7 @@ prompt-toolkit==3.0.43 # via ipython proto-plus==1.23.0 # via + # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage # google-cloud-bigtable @@ -687,6 +694,7 @@ pytest==7.4.4 # feast (setup.py) # pytest-benchmark # pytest-cov + # pytest-env # pytest-lazy-fixture # pytest-mock # pytest-ordering @@ -696,6 +704,8 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==4.1.0 # via feast (setup.py) +pytest-env==1.1.3 + # via feast (setup.py) pytest-lazy-fixture==0.6.3 # via feast (setup.py) pytest-mock==1.10.4 @@ -773,7 +783,7 @@ requests==2.31.0 # snowflake-connector-python # sphinx # trino -requests-oauthlib==1.4.0 +requests-oauthlib==2.0.0 # via kubernetes responses==0.25.0 # via moto @@ -844,8 +854,13 @@ sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==1.4.52 # via + # duckdb-engine # feast (setup.py) + # ibis-framework # sqlalchemy + # sqlalchemy-views +sqlalchemy-views==0.3.2 + # via ibis-framework sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy sqlglot==20.11.0 @@ -882,6 +897,7 @@ tomli==2.0.1 # pip-tools # pyproject-hooks # pytest + # pytest-env tomlkit==0.12.4 # via snowflake-connector-python toolz==0.12.1 @@ -919,7 +935,7 @@ traitlets==5.14.2 # nbformat trino==0.328.0 # via feast (setup.py) -typeguard==4.1.5 +typeguard==4.2.1 # via feast (setup.py) types-protobuf==3.19.22 # via @@ -984,7 +1000,7 @@ urllib3==1.26.18 # requests # responses # rockset -uvicorn[standard]==0.28.0 +uvicorn[standard]==0.29.0 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index e17a588538e..6603171d450 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -62,7 +62,7 @@ importlib-metadata==6.11.0 # via # dask # feast (setup.py) -importlib-resources==6.3.1 +importlib-resources==6.4.0 # via feast (setup.py) jinja2==3.1.3 # via feast (setup.py) @@ -158,7 +158,7 @@ toolz==0.12.1 # partd tqdm==4.66.2 # via feast (setup.py) -typeguard==4.1.5 +typeguard==4.2.1 # via feast (setup.py) types-protobuf==4.24.0.20240311 # via mypy-protobuf @@ -176,7 +176,7 @@ tzdata==2024.1 # via pandas urllib3==2.2.1 # via requests -uvicorn[standard]==0.28.0 +uvicorn[standard]==0.29.0 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt deleted file mode 100644 index 3d3dbe764b9..00000000000 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ /dev/null @@ -1,1053 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.8 -# by the following command: -# -# pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt -# -alabaster==0.7.13 - # via sphinx -altair==4.2.2 - # via great-expectations -annotated-types==0.6.0 - # via pydantic -anyio==4.3.0 - # via - # httpx - # jupyter-server - # starlette - # watchfiles -appdirs==1.4.4 - # via fissix -argon2-cffi==23.1.0 - # via jupyter-server -argon2-cffi-bindings==21.2.0 - # via argon2-cffi -arrow==1.3.0 - # via isoduration -asn1crypto==1.5.1 - # via snowflake-connector-python -assertpy==1.1 - # via feast (setup.py) -asttokens==2.4.1 - # via stack-data -async-lru==2.0.4 - # via jupyterlab -async-timeout==4.0.3 - # via redis -atpublic==3.1.2 - # via ibis-framework -attrs==23.2.0 - # via - # bowler - # jsonschema - # referencing -avro==1.11.3 - # via feast (setup.py) -azure-core==1.30.1 - # via - # azure-identity - # azure-storage-blob -azure-identity==1.15.0 - # via feast (setup.py) -azure-storage-blob==12.19.1 - # via feast (setup.py) -babel==2.14.0 - # via - # jupyterlab-server - # sphinx -backcall==0.2.0 - # via ipython -backports-zoneinfo==0.2.1 - # via - # trino - # tzlocal -beautifulsoup4==4.12.3 - # via nbconvert -black==22.12.0 - # via feast (setup.py) -bleach==6.1.0 - # via nbconvert -boto3==1.34.60 - # via - # feast (setup.py) - # moto -botocore==1.34.60 - # via - # boto3 - # moto - # s3transfer -bowler==0.9.0 - # via feast (setup.py) -build==1.1.1 - # via - # feast (setup.py) - # pip-tools -bytewax==0.15.1 - # via feast (setup.py) -cachecontrol==0.14.0 - # via firebase-admin -cachetools==5.3.3 - # via google-auth -cassandra-driver==3.29.0 - # via feast (setup.py) -certifi==2024.2.2 - # via - # httpcore - # httpx - # kubernetes - # minio - # requests - # snowflake-connector-python -cffi==1.16.0 - # via - # argon2-cffi-bindings - # cryptography - # snowflake-connector-python -cfgv==3.4.0 - # via pre-commit -charset-normalizer==3.3.2 - # via - # requests - # snowflake-connector-python -click==8.1.7 - # via - # black - # bowler - # dask - # feast (setup.py) - # geomet - # great-expectations - # moreorless - # pip-tools - # uvicorn -cloudpickle==3.0.0 - # via dask -colorama==0.4.6 - # via - # feast (setup.py) - # great-expectations -comm==0.2.2 - # via - # ipykernel - # ipywidgets -coverage[toml]==7.4.3 - # via pytest-cov -cryptography==42.0.5 - # via - # azure-identity - # azure-storage-blob - # feast (setup.py) - # great-expectations - # moto - # msal - # pyjwt - # pyopenssl - # snowflake-connector-python - # types-pyopenssl - # types-redis -dask==2023.5.0 - # via feast (setup.py) -db-dtypes==1.2.0 - # via google-cloud-bigquery -debugpy==1.8.1 - # via ipykernel -decorator==5.1.1 - # via ipython -defusedxml==0.7.1 - # via nbconvert -deprecation==2.1.0 - # via testcontainers -dill==0.3.8 - # via - # bytewax - # feast (setup.py) - # multiprocess -distlib==0.3.8 - # via virtualenv -docker==7.0.0 - # via - # feast (setup.py) - # testcontainers -docutils==0.19 - # via sphinx -entrypoints==0.4 - # via altair -exceptiongroup==1.2.0 - # via - # anyio - # pytest -execnet==2.0.2 - # via pytest-xdist -executing==2.0.1 - # via stack-data -fastapi==0.110.0 - # via feast (setup.py) -fastjsonschema==2.19.1 - # via nbformat -filelock==3.13.1 - # via - # snowflake-connector-python - # virtualenv -firebase-admin==5.4.0 - # via feast (setup.py) -fissix==21.11.13 - # via bowler -flake8==6.0.0 - # via feast (setup.py) -fqdn==1.5.1 - # via jsonschema -fsspec==2023.12.2 - # via - # dask - # feast (setup.py) -geojson==2.5.0 - # via rockset -geomet==0.2.1.post1 - # via cassandra-driver -google-api-core[grpc]==2.17.1 - # via - # feast (setup.py) - # firebase-admin - # google-api-python-client - # google-cloud-bigquery - # google-cloud-bigquery-storage - # google-cloud-bigtable - # google-cloud-core - # google-cloud-datastore - # google-cloud-firestore - # google-cloud-storage -google-api-python-client==2.122.0 - # via firebase-admin -google-auth==2.28.2 - # via - # google-api-core - # google-api-python-client - # google-auth-httplib2 - # google-cloud-core - # google-cloud-storage - # kubernetes -google-auth-httplib2==0.2.0 - # via google-api-python-client -google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) -google-cloud-bigquery-storage==2.24.0 - # via feast (setup.py) -google-cloud-bigtable==2.23.0 - # via feast (setup.py) -google-cloud-core==2.4.1 - # via - # google-cloud-bigquery - # google-cloud-bigtable - # google-cloud-datastore - # google-cloud-firestore - # google-cloud-storage -google-cloud-datastore==2.19.0 - # via feast (setup.py) -google-cloud-firestore==2.15.0 - # via firebase-admin -google-cloud-storage==2.15.0 - # via - # feast (setup.py) - # firebase-admin -google-crc32c==1.5.0 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.0 - # via - # google-cloud-bigquery - # google-cloud-storage -googleapis-common-protos[grpc]==1.63.0 - # via - # feast (setup.py) - # google-api-core - # grpc-google-iam-v1 - # grpcio-status -great-expectations==0.18.10 - # via feast (setup.py) -greenlet==3.0.3 - # via sqlalchemy -grpc-google-iam-v1==0.13.0 - # via google-cloud-bigtable -grpcio==1.62.1 - # via - # feast (setup.py) - # google-api-core - # google-cloud-bigquery - # googleapis-common-protos - # grpc-google-iam-v1 - # grpcio-health-checking - # grpcio-reflection - # grpcio-status - # grpcio-testing - # grpcio-tools -grpcio-health-checking==1.62.1 - # via feast (setup.py) -grpcio-reflection==1.62.1 - # via feast (setup.py) -grpcio-status==1.62.1 - # via google-api-core -grpcio-testing==1.62.1 - # via feast (setup.py) -grpcio-tools==1.62.1 - # via feast (setup.py) -gunicorn==21.2.0 - # via feast (setup.py) -h11==0.14.0 - # via - # httpcore - # uvicorn -happybase==1.2.0 - # via feast (setup.py) -hazelcast-python-client==5.3.0 - # via feast (setup.py) -hiredis==2.3.2 - # via feast (setup.py) -httpcore==1.0.4 - # via httpx -httplib2==0.22.0 - # via - # google-api-python-client - # google-auth-httplib2 -httptools==0.6.1 - # via uvicorn -httpx==0.27.0 - # via - # feast (setup.py) - # jupyterlab -ibis-framework==4.1.0 - # via - # feast (setup.py) - # ibis-substrait -ibis-substrait==2.29.1 - # via feast (setup.py) -identify==2.5.35 - # via pre-commit -idna==3.6 - # via - # anyio - # httpx - # jsonschema - # requests - # snowflake-connector-python -imagesize==1.4.1 - # via sphinx -importlib-metadata==6.11.0 - # via - # build - # dask - # feast (setup.py) - # jupyter-client - # jupyter-lsp - # jupyterlab - # jupyterlab-server - # nbconvert - # sphinx - # typeguard -importlib-resources==6.1.3 - # via - # feast (setup.py) - # jsonschema - # jsonschema-specifications - # jupyterlab -iniconfig==2.0.0 - # via pytest -ipykernel==6.29.3 - # via jupyterlab -ipython==8.12.3 - # via - # great-expectations - # ipykernel - # ipywidgets -ipywidgets==8.1.2 - # via great-expectations -isodate==0.6.1 - # via azure-storage-blob -isoduration==20.11.0 - # via jsonschema -isort==5.13.2 - # via feast (setup.py) -jedi==0.19.1 - # via ipython -jinja2==3.1.3 - # via - # altair - # feast (setup.py) - # great-expectations - # jupyter-server - # jupyterlab - # jupyterlab-server - # moto - # nbconvert - # sphinx -jmespath==1.0.1 - # via - # boto3 - # botocore -json5==0.9.22 - # via jupyterlab-server -jsonpatch==1.33 - # via great-expectations -jsonpointer==2.4 - # via - # jsonpatch - # jsonschema -jsonschema[format-nongpl]==4.21.1 - # via - # altair - # feast (setup.py) - # great-expectations - # jupyter-events - # jupyterlab-server - # nbformat -jsonschema-specifications==2023.12.1 - # via jsonschema -jupyter-client==8.6.1 - # via - # ipykernel - # jupyter-server - # nbclient -jupyter-core==5.7.2 - # via - # ipykernel - # jupyter-client - # jupyter-server - # jupyterlab - # nbclient - # nbconvert - # nbformat -jupyter-events==0.9.1 - # via jupyter-server -jupyter-lsp==2.2.4 - # via jupyterlab -jupyter-server==2.13.0 - # via - # jupyter-lsp - # jupyterlab - # jupyterlab-server - # notebook - # notebook-shim -jupyter-server-terminals==0.5.3 - # via jupyter-server -jupyterlab==4.1.4 - # via notebook -jupyterlab-pygments==0.3.0 - # via nbconvert -jupyterlab-server==2.25.4 - # via - # jupyterlab - # notebook -jupyterlab-widgets==3.0.10 - # via ipywidgets -kubernetes==20.13.0 - # via feast (setup.py) -locket==1.0.0 - # via partd -makefun==1.15.2 - # via great-expectations -markdown-it-py==3.0.0 - # via rich -markupsafe==2.1.5 - # via - # jinja2 - # nbconvert - # werkzeug -marshmallow==3.21.1 - # via great-expectations -matplotlib-inline==0.1.6 - # via - # ipykernel - # ipython -mccabe==0.7.0 - # via flake8 -mdurl==0.1.2 - # via markdown-it-py -minio==7.1.0 - # via feast (setup.py) -mistune==3.0.2 - # via - # great-expectations - # nbconvert -mmh3==4.1.0 - # via feast (setup.py) -mock==2.0.0 - # via feast (setup.py) -moreorless==0.4.0 - # via bowler -moto==4.2.14 - # via feast (setup.py) -msal==1.27.0 - # via - # azure-identity - # msal-extensions -msal-extensions==1.1.0 - # via azure-identity -msgpack==1.0.8 - # via cachecontrol -multipledispatch==0.6.0 - # via ibis-framework -multiprocess==0.70.16 - # via bytewax -mypy==1.9.0 - # via - # feast (setup.py) - # sqlalchemy -mypy-extensions==1.0.0 - # via - # black - # mypy -mypy-protobuf==3.3.0 - # via feast (setup.py) -nbclient==0.9.1 - # via nbconvert -nbconvert==7.16.2 - # via jupyter-server -nbformat==5.10.2 - # via - # great-expectations - # jupyter-server - # nbclient - # nbconvert -nest-asyncio==1.6.0 - # via ipykernel -nodeenv==1.8.0 - # via pre-commit -notebook==7.1.1 - # via great-expectations -notebook-shim==0.2.4 - # via - # jupyterlab - # notebook -numpy==1.24.4 - # via - # altair - # db-dtypes - # feast (setup.py) - # great-expectations - # ibis-framework - # pandas - # pyarrow - # scipy -oauthlib==3.2.2 - # via requests-oauthlib -overrides==7.7.0 - # via jupyter-server -packaging==24.0 - # via - # build - # dask - # db-dtypes - # deprecation - # docker - # google-cloud-bigquery - # great-expectations - # gunicorn - # ibis-substrait - # ipykernel - # jupyter-server - # jupyterlab - # jupyterlab-server - # marshmallow - # msal-extensions - # nbconvert - # pytest - # snowflake-connector-python - # sphinx -pandas==1.5.3 ; python_version < "3.9" - # via - # altair - # db-dtypes - # feast (setup.py) - # google-cloud-bigquery - # great-expectations - # ibis-framework - # snowflake-connector-python -pandocfilters==1.5.1 - # via nbconvert -parso==0.8.3 - # via jedi -parsy==2.1 - # via ibis-framework -partd==1.4.1 - # via dask -pathspec==0.12.1 - # via black -pbr==6.0.0 - # via mock -pexpect==4.9.0 - # via ipython -pickleshare==0.7.5 - # via ipython -pip-tools==7.4.1 - # via feast (setup.py) -pkgutil-resolve-name==1.3.10 - # via jsonschema -platformdirs==3.11.0 - # via - # black - # jupyter-core - # snowflake-connector-python - # virtualenv -pluggy==1.4.0 - # via pytest -ply==3.11 - # via thriftpy2 -portalocker==2.8.2 - # via msal-extensions -pre-commit==3.3.1 - # via feast (setup.py) -prometheus-client==0.20.0 - # via jupyter-server -prompt-toolkit==3.0.43 - # via ipython -proto-plus==1.23.0 - # via - # feast (setup.py) - # google-cloud-bigquery - # google-cloud-bigquery-storage - # google-cloud-bigtable - # google-cloud-datastore - # google-cloud-firestore -protobuf==4.25.3 - # via - # feast (setup.py) - # google-api-core - # google-cloud-bigquery - # google-cloud-bigquery-storage - # google-cloud-bigtable - # google-cloud-datastore - # google-cloud-firestore - # googleapis-common-protos - # grpc-google-iam-v1 - # grpcio-health-checking - # grpcio-reflection - # grpcio-status - # grpcio-testing - # grpcio-tools - # ibis-substrait - # mypy-protobuf - # proto-plus -psutil==5.9.0 - # via - # feast (setup.py) - # ipykernel -psycopg2-binary==2.9.9 - # via feast (setup.py) -ptyprocess==0.7.0 - # via - # pexpect - # terminado -pure-eval==0.2.2 - # via stack-data -py==1.11.0 - # via feast (setup.py) -py-cpuinfo==9.0.0 - # via pytest-benchmark -py4j==0.10.9.7 - # via pyspark -pyarrow==15.0.1 - # via - # db-dtypes - # feast (setup.py) - # google-cloud-bigquery - # snowflake-connector-python -pyasn1==0.5.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.3.0 - # via google-auth -pybindgen==0.22.1 - # via feast (setup.py) -pycodestyle==2.10.0 - # via flake8 -pycparser==2.21 - # via cffi -pydantic==2.6.4 - # via - # fastapi - # feast (setup.py) - # great-expectations -pydantic-core==2.16.3 - # via pydantic -pyflakes==3.0.1 - # via flake8 -pygments==2.17.2 - # via - # feast (setup.py) - # ipython - # nbconvert - # rich - # sphinx -pyjwt[crypto]==2.8.0 - # via - # msal - # snowflake-connector-python -pymssql==2.2.11 - # via feast (setup.py) -pymysql==1.1.0 - # via feast (setup.py) -pyodbc==5.1.0 - # via feast (setup.py) -pyopenssl==24.1.0 - # via snowflake-connector-python -pyparsing==3.1.2 - # via - # great-expectations - # httplib2 -pyproject-hooks==1.0.0 - # via - # build - # pip-tools -pyspark==3.5.1 - # via feast (setup.py) -pytest==7.4.4 - # via - # feast (setup.py) - # pytest-benchmark - # pytest-cov - # pytest-lazy-fixture - # pytest-mock - # pytest-ordering - # pytest-timeout - # pytest-xdist -pytest-benchmark==3.4.1 - # via feast (setup.py) -pytest-cov==4.1.0 - # via feast (setup.py) -pytest-lazy-fixture==0.6.3 - # via feast (setup.py) -pytest-mock==1.10.4 - # via feast (setup.py) -pytest-ordering==0.6 - # via feast (setup.py) -pytest-timeout==1.4.2 - # via feast (setup.py) -pytest-xdist==3.5.0 - # via feast (setup.py) -python-dateutil==2.9.0.post0 - # via - # arrow - # botocore - # google-cloud-bigquery - # great-expectations - # ibis-framework - # jupyter-client - # kubernetes - # moto - # pandas - # rockset - # trino -python-dotenv==1.0.1 - # via uvicorn -python-json-logger==2.0.7 - # via jupyter-events -pytz==2024.1 - # via - # babel - # great-expectations - # ibis-framework - # pandas - # snowflake-connector-python - # trino -pyyaml==6.0.1 - # via - # dask - # feast (setup.py) - # ibis-substrait - # jupyter-events - # kubernetes - # pre-commit - # responses - # uvicorn -pyzmq==25.1.2 - # via - # ipykernel - # jupyter-client - # jupyter-server -redis==4.6.0 - # via feast (setup.py) -referencing==0.33.0 - # via - # jsonschema - # jsonschema-specifications - # jupyter-events -regex==2023.12.25 - # via feast (setup.py) -requests==2.31.0 - # via - # azure-core - # cachecontrol - # docker - # feast (setup.py) - # google-api-core - # google-cloud-bigquery - # google-cloud-storage - # great-expectations - # jupyterlab-server - # kubernetes - # moto - # msal - # requests-oauthlib - # responses - # snowflake-connector-python - # sphinx - # trino -requests-oauthlib==1.4.0 - # via kubernetes -responses==0.25.0 - # via moto -rfc3339-validator==0.1.4 - # via - # jsonschema - # jupyter-events -rfc3986-validator==0.1.1 - # via - # jsonschema - # jupyter-events -rich==13.7.1 - # via ibis-framework -rockset==2.1.1 - # via feast (setup.py) -rpds-py==0.18.0 - # via - # jsonschema - # referencing -rsa==4.9 - # via google-auth -ruamel-yaml==0.17.17 - # via great-expectations -ruamel-yaml-clib==0.2.8 - # via ruamel-yaml -s3transfer==0.10.0 - # via boto3 -scipy==1.10.1 - # via great-expectations -send2trash==1.8.2 - # via jupyter-server -six==1.16.0 - # via - # asttokens - # azure-core - # bleach - # geomet - # happybase - # isodate - # kubernetes - # mock - # multipledispatch - # python-dateutil - # rfc3339-validator - # thriftpy2 -sniffio==1.3.1 - # via - # anyio - # httpx -snowballstemmer==2.2.0 - # via sphinx -snowflake-connector-python[pandas]==3.7.1 - # via feast (setup.py) -sortedcontainers==2.4.0 - # via snowflake-connector-python -soupsieve==2.5 - # via beautifulsoup4 -sphinx==6.2.1 - # via feast (setup.py) -sphinxcontrib-applehelp==1.0.4 - # via sphinx -sphinxcontrib-devhelp==1.0.2 - # via sphinx -sphinxcontrib-htmlhelp==2.0.1 - # via sphinx -sphinxcontrib-jsmath==1.0.1 - # via sphinx -sphinxcontrib-qthelp==1.0.3 - # via sphinx -sphinxcontrib-serializinghtml==1.1.5 - # via sphinx -sqlalchemy[mypy]==1.4.52 - # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a38 - # via sqlalchemy -sqlglot==10.6.4 - # via ibis-framework -stack-data==0.6.3 - # via ipython -starlette==0.36.3 - # via fastapi -tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) -terminado==0.18.1 - # via - # jupyter-server - # jupyter-server-terminals -testcontainers==3.7.1 - # via feast (setup.py) -thriftpy2==0.4.20 - # via happybase -tinycss2==1.2.1 - # via nbconvert -toml==0.10.2 - # via feast (setup.py) -tomli==2.0.1 - # via - # black - # build - # coverage - # jupyterlab - # mypy - # pip-tools - # pyproject-hooks - # pytest -tomlkit==0.12.4 - # via snowflake-connector-python -toolz==0.12.1 - # via - # altair - # dask - # ibis-framework - # partd -tornado==6.4 - # via - # ipykernel - # jupyter-client - # jupyter-server - # jupyterlab - # notebook - # terminado -tqdm==4.66.2 - # via - # feast (setup.py) - # great-expectations -traitlets==5.14.2 - # via - # comm - # ipykernel - # ipython - # ipywidgets - # jupyter-client - # jupyter-core - # jupyter-events - # jupyter-server - # jupyterlab - # matplotlib-inline - # nbclient - # nbconvert - # nbformat -trino==0.328.0 - # via feast (setup.py) -typeguard==4.1.5 - # via feast (setup.py) -types-protobuf==3.19.22 - # via - # feast (setup.py) - # mypy-protobuf -types-pymysql==1.1.0.1 - # via feast (setup.py) -types-pyopenssl==24.0.0.20240311 - # via types-redis -types-python-dateutil==2.8.19.20240311 - # via - # arrow - # feast (setup.py) -types-pytz==2024.1.0.20240203 - # via feast (setup.py) -types-pyyaml==6.0.12.20240311 - # via feast (setup.py) -types-redis==4.6.0.20240311 - # via feast (setup.py) -types-requests==2.30.0.0 - # via feast (setup.py) -types-setuptools==69.1.0.20240310 - # via feast (setup.py) -types-tabulate==0.9.0.20240106 - # via feast (setup.py) -types-urllib3==1.26.25.14 - # via types-requests -typing-extensions==4.10.0 - # via - # annotated-types - # anyio - # async-lru - # azure-core - # azure-storage-blob - # black - # fastapi - # great-expectations - # ibis-framework - # ipython - # mypy - # pydantic - # pydantic-core - # rich - # snowflake-connector-python - # sqlalchemy2-stubs - # starlette - # typeguard - # uvicorn -tzlocal==5.2 - # via - # great-expectations - # trino -uri-template==1.3.0 - # via jsonschema -uritemplate==4.1.1 - # via google-api-python-client -urllib3==1.26.18 - # via - # botocore - # docker - # feast (setup.py) - # great-expectations - # kubernetes - # minio - # requests - # responses - # rockset - # snowflake-connector-python -uvicorn[standard]==0.28.0 - # via feast (setup.py) -uvloop==0.19.0 - # via uvicorn -virtualenv==20.23.0 - # via - # feast (setup.py) - # pre-commit -volatile==2.1.0 - # via bowler -watchfiles==0.21.0 - # via uvicorn -wcwidth==0.2.13 - # via prompt-toolkit -webcolors==1.13 - # via jsonschema -webencodings==0.5.1 - # via - # bleach - # tinycss2 -websocket-client==1.7.0 - # via - # jupyter-server - # kubernetes -websockets==12.0 - # via uvicorn -werkzeug==3.0.1 - # via moto -wheel==0.43.0 - # via pip-tools -widgetsnbextension==4.0.10 - # via ipywidgets -wrapt==1.16.0 - # via testcontainers -xmltodict==0.13.0 - # via moto -zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt deleted file mode 100644 index e689c011c57..00000000000 --- a/sdk/python/requirements/py3.8-requirements.txt +++ /dev/null @@ -1,214 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.8 -# by the following command: -# -# pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt -# -annotated-types==0.6.0 - # via pydantic -anyio==4.3.0 - # via - # httpx - # starlette - # watchfiles -appdirs==1.4.4 - # via fissix -attrs==23.2.0 - # via - # bowler - # jsonschema - # referencing -bowler==0.9.0 - # via feast (setup.py) -certifi==2024.2.2 - # via - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via requests -click==8.1.7 - # via - # bowler - # dask - # feast (setup.py) - # moreorless - # uvicorn -cloudpickle==3.0.0 - # via dask -colorama==0.4.6 - # via feast (setup.py) -dask==2023.2.1 - # via feast (setup.py) -dill==0.3.8 - # via feast (setup.py) -exceptiongroup==1.2.0 - # via anyio -fastapi==0.110.0 - # via feast (setup.py) -fissix==21.11.13 - # via bowler -fsspec==2024.2.0 - # via dask -greenlet==3.0.3 - # via sqlalchemy -gunicorn==21.2.0 - # via feast (setup.py) -h11==0.14.0 - # via - # httpcore - # uvicorn -httpcore==1.0.4 - # via httpx -httptools==0.6.1 - # via uvicorn -httpx==0.27.0 - # via feast (setup.py) -idna==3.6 - # via - # anyio - # httpx - # requests -importlib-metadata==6.11.0 - # via - # dask - # feast (setup.py) - # typeguard -importlib-resources==6.1.3 - # via - # feast (setup.py) - # jsonschema - # jsonschema-specifications -jinja2==3.1.3 - # via feast (setup.py) -jsonschema==4.21.1 - # via feast (setup.py) -jsonschema-specifications==2023.12.1 - # via jsonschema -locket==1.0.0 - # via partd -markupsafe==2.1.5 - # via jinja2 -mmh3==4.1.0 - # via feast (setup.py) -moreorless==0.4.0 - # via bowler -mypy==1.9.0 - # via sqlalchemy -mypy-extensions==1.0.0 - # via mypy -mypy-protobuf==3.5.0 - # via feast (setup.py) -numpy==1.24.4 - # via - # feast (setup.py) - # pandas - # pyarrow -packaging==24.0 - # via - # dask - # gunicorn -pandas==2.0.3 - # via feast (setup.py) -partd==1.4.1 - # via dask -pkgutil-resolve-name==1.3.10 - # via jsonschema -proto-plus==1.23.0 - # via feast (setup.py) -protobuf==4.25.3 - # via - # feast (setup.py) - # mypy-protobuf - # proto-plus -pyarrow==15.0.1 - # via feast (setup.py) -pydantic==2.6.4 - # via - # fastapi - # feast (setup.py) -pydantic-core==2.16.3 - # via pydantic -pygments==2.17.2 - # via feast (setup.py) -python-dateutil==2.9.0.post0 - # via pandas -python-dotenv==1.0.1 - # via uvicorn -pytz==2024.1 - # via pandas -pyyaml==6.0.1 - # via - # dask - # feast (setup.py) - # uvicorn -referencing==0.33.0 - # via - # jsonschema - # jsonschema-specifications -requests==2.31.0 - # via feast (setup.py) -rpds-py==0.18.0 - # via - # jsonschema - # referencing -six==1.16.0 - # via python-dateutil -sniffio==1.3.1 - # via - # anyio - # httpx -sqlalchemy[mypy]==1.4.52 - # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a38 - # via sqlalchemy -starlette==0.36.3 - # via fastapi -tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) -toml==0.10.2 - # via feast (setup.py) -tomli==2.0.1 - # via mypy -toolz==0.12.1 - # via - # dask - # partd -tqdm==4.66.2 - # via feast (setup.py) -typeguard==4.1.5 - # via feast (setup.py) -types-protobuf==4.24.0.20240311 - # via mypy-protobuf -typing-extensions==4.10.0 - # via - # annotated-types - # anyio - # fastapi - # mypy - # pydantic - # pydantic-core - # sqlalchemy2-stubs - # starlette - # typeguard - # uvicorn -tzdata==2024.1 - # via pandas -urllib3==2.2.1 - # via requests -uvicorn[standard]==0.28.0 - # via feast (setup.py) -uvloop==0.19.0 - # via uvicorn -volatile==2.1.0 - # via bowler -watchfiles==0.21.0 - # via uvicorn -websockets==12.0 - # via uvicorn -zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index dc96554431b..367b5dc050c 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -61,11 +61,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.34.65 +boto3==1.34.69 # via # feast (setup.py) # moto -botocore==1.34.65 +botocore==1.34.69 # via # boto3 # moto @@ -82,7 +82,7 @@ cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth -cassandra-driver==3.29.0 +cassandra-driver==3.29.1 # via feast (setup.py) certifi==2024.2.2 # via @@ -164,6 +164,12 @@ docker==7.0.0 # testcontainers docutils==0.19 # via sphinx +duckdb==0.10.1 + # via + # duckdb-engine + # ibis-framework +duckdb-engine==0.11.2 + # via ibis-framework entrypoints==0.4 # via altair exceptiongroup==1.2.0 @@ -199,7 +205,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.17.1 +google-api-core[grpc]==2.18.0 # via # feast (setup.py) # firebase-admin @@ -211,9 +217,9 @@ google-api-core[grpc]==2.17.1 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.122.0 +google-api-python-client==2.123.0 # via firebase-admin -google-auth==2.28.2 +google-auth==2.29.0 # via # google-api-core # google-api-python-client @@ -258,7 +264,7 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.11 +great-expectations==0.18.12 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -310,7 +316,7 @@ httpx==0.27.0 # via # feast (setup.py) # jupyterlab -ibis-framework==8.0.0 +ibis-framework[duckdb]==8.0.0 # via # feast (setup.py) # ibis-substrait @@ -339,7 +345,7 @@ importlib-metadata==6.11.0 # nbconvert # sphinx # typeguard -importlib-resources==6.3.1 +importlib-resources==6.4.0 # via feast (setup.py) iniconfig==2.0.0 # via pytest @@ -467,7 +473,7 @@ moreorless==0.4.0 # via bowler moto==4.2.14 # via feast (setup.py) -msal==1.27.0 +msal==1.28.0 # via # azure-identity # msal-extensions @@ -491,7 +497,7 @@ mypy-protobuf==3.3.0 # via feast (setup.py) nbclient==0.10.0 # via nbconvert -nbconvert==7.16.2 +nbconvert==7.16.3 # via jupyter-server nbformat==5.10.3 # via @@ -589,6 +595,7 @@ prompt-toolkit==3.0.43 # via ipython proto-plus==1.23.0 # via + # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage # google-cloud-bigtable @@ -695,6 +702,7 @@ pytest==7.4.4 # feast (setup.py) # pytest-benchmark # pytest-cov + # pytest-env # pytest-lazy-fixture # pytest-mock # pytest-ordering @@ -704,6 +712,8 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==4.1.0 # via feast (setup.py) +pytest-env==1.1.3 + # via feast (setup.py) pytest-lazy-fixture==0.6.3 # via feast (setup.py) pytest-mock==1.10.4 @@ -781,7 +791,7 @@ requests==2.31.0 # snowflake-connector-python # sphinx # trino -requests-oauthlib==1.4.0 +requests-oauthlib==2.0.0 # via kubernetes responses==0.25.0 # via moto @@ -854,8 +864,13 @@ sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==1.4.52 # via + # duckdb-engine # feast (setup.py) + # ibis-framework # sqlalchemy + # sqlalchemy-views +sqlalchemy-views==0.3.2 + # via ibis-framework sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy sqlglot==20.11.0 @@ -892,6 +907,7 @@ tomli==2.0.1 # pip-tools # pyproject-hooks # pytest + # pytest-env tomlkit==0.12.4 # via snowflake-connector-python toolz==0.12.1 @@ -929,7 +945,7 @@ traitlets==5.14.2 # nbformat trino==0.328.0 # via feast (setup.py) -typeguard==4.1.5 +typeguard==4.2.1 # via feast (setup.py) types-protobuf==3.19.22 # via @@ -998,7 +1014,7 @@ urllib3==1.26.18 # responses # rockset # snowflake-connector-python -uvicorn[standard]==0.28.0 +uvicorn[standard]==0.29.0 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index f2228ade027..3b8f555ca74 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -63,7 +63,7 @@ importlib-metadata==6.11.0 # dask # feast (setup.py) # typeguard -importlib-resources==6.3.1 +importlib-resources==6.4.0 # via feast (setup.py) jinja2==3.1.3 # via feast (setup.py) @@ -159,7 +159,7 @@ toolz==0.12.1 # partd tqdm==4.66.2 # via feast (setup.py) -typeguard==4.1.5 +typeguard==4.2.1 # via feast (setup.py) types-protobuf==4.24.0.20240311 # via mypy-protobuf @@ -178,7 +178,7 @@ tzdata==2024.1 # via pandas urllib3==2.2.1 # via requests -uvicorn[standard]==0.28.0 +uvicorn[standard]==0.29.0 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 743a1ce4a0f..1c9a958ce36 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -13,7 +13,6 @@ # limitations under the License. import logging import multiprocessing -import os import random from datetime import datetime, timedelta from multiprocessing import Process @@ -24,8 +23,6 @@ import pytest from _pytest.nodes import Item -os.environ["FEAST_USAGE"] = "False" -os.environ["IS_TEST"] = "True" from feast.feature_store import FeatureStore # noqa: E402 from feast.wait import wait_retry_backoff # noqa: E402 from tests.data.data_creator import create_basic_driver_dataset # noqa: E402 diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 421ef416018..55d2ed84254 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -15,7 +15,7 @@ ) from feast.data_source import DataSource, RequestSource from feast.feature_view_projection import FeatureViewProjection -from feast.on_demand_feature_view import OnDemandPandasTransformation +from feast.on_demand_feature_view import PandasTransformation from feast.types import Array, FeastType, Float32, Float64, Int32, Int64 from tests.integration.feature_repos.universal.entities import ( customer, @@ -71,7 +71,7 @@ def conv_rate_plus_100_feature_view( name=conv_rate_plus_100.__name__, schema=[] if infer_features else _features, sources=sources, - transformation=OnDemandPandasTransformation( + feature_transformation=PandasTransformation( udf=conv_rate_plus_100, udf_string="raw udf source" ), ) @@ -110,7 +110,7 @@ def similarity_feature_view( name=similarity.__name__, sources=sources, schema=[] if infer_features else _fields, - transformation=OnDemandPandasTransformation( + feature_transformation=PandasTransformation( udf=similarity, udf_string="similarity raw udf" ), ) diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 57e625e66b8..3bc2b3fb398 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -14,9 +14,11 @@ import os import time from datetime import timedelta +from unittest import mock import pytest from pytest_lazyfixture import lazy_fixture +from testcontainers.core.container import DockerContainer from feast import FileSource from feast.data_format import ParquetFormat @@ -60,12 +62,56 @@ def s3_registry() -> Registry: return Registry("project", registry_config, None) +@pytest.fixture +def minio_registry() -> Registry: + minio_user = "minio99" + minio_password = "minio123" + bucket_name = "test-bucket" + + container: DockerContainer = ( + DockerContainer("quay.io/minio/minio") + .with_exposed_ports(9000, 9001) + .with_env("MINIO_ROOT_USER", minio_user) + .with_env("MINIO_ROOT_PASSWORD", minio_password) + .with_command('server /data --console-address ":9001"') + .with_exposed_ports() + ) + + container.start() + + exposed_port = container.get_exposed_port("9000") + container_host = container.get_container_host_ip() + + container.exec(f"mkdir /data/{bucket_name}") + + registry_config = RegistryConfig( + path=f"s3://{bucket_name}/registry.db", cache_ttl_seconds=600 + ) + + mock_environ = { + "FEAST_S3_ENDPOINT_URL": f"http://{container_host}:{exposed_port}", + "AWS_ACCESS_KEY_ID": minio_user, + "AWS_SECRET_ACCESS_KEY": minio_password, + "AWS_SESSION_TOKEN": "", + } + + with mock.patch.dict(os.environ, mock_environ): + yield Registry("project", registry_config, None) + + container.stop() + + @pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], + [ + lazy_fixture("gcs_registry"), + lazy_fixture("s3_registry"), + lazy_fixture("minio_registry"), + ], ) def test_apply_entity_integration(test_registry): + entity = Entity( name="driver_car_id", description="Car driver id", @@ -106,7 +152,11 @@ def test_apply_entity_integration(test_registry): @pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], + [ + lazy_fixture("gcs_registry"), + lazy_fixture("s3_registry"), + lazy_fixture("minio_registry"), + ], ) def test_apply_feature_view_integration(test_registry): # Create Feature Views @@ -183,7 +233,11 @@ def test_apply_feature_view_integration(test_registry): @pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], + [ + lazy_fixture("gcs_registry"), + lazy_fixture("s3_registry"), + lazy_fixture("minio_registry"), + ], ) def test_apply_data_source_integration(test_registry: Registry): validate_registry_data_source_apply(test_registry) diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py index ce40295f8b6..c209f1e0e0b 100644 --- a/sdk/python/tests/unit/diff/test_registry_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -137,13 +137,14 @@ def post_changed(inputs: pd.DataFrame) -> pd.DataFrame: # if no code is changed assert len(feast_object_diffs.feast_object_property_diffs) == 3 assert feast_object_diffs.feast_object_property_diffs[0].property_name == "name" + # Note we should only now be looking at changes for the feature_transformation field assert ( feast_object_diffs.feast_object_property_diffs[1].property_name - == "user_defined_function.name" + == "feature_transformation.name" ) assert ( feast_object_diffs.feast_object_property_diffs[2].property_name - == "user_defined_function.body_text" + == "feature_transformation.body_text" ) diff --git a/sdk/python/tests/unit/infra/offline_stores/test_ibis.py b/sdk/python/tests/unit/infra/offline_stores/test_ibis.py new file mode 100644 index 00000000000..5f105e2af70 --- /dev/null +++ b/sdk/python/tests/unit/infra/offline_stores/test_ibis.py @@ -0,0 +1,138 @@ +from datetime import datetime, timedelta +from typing import Dict, List, Tuple + +import ibis +import pyarrow as pa + +from feast.infra.offline_stores.contrib.ibis_offline_store.ibis import ( + point_in_time_join, +) + + +def pa_datetime(year, month, day): + return pa.scalar(datetime(year, month, day), type=pa.timestamp("s", tz="UTC")) + + +def customer_table(): + return pa.Table.from_arrays( + arrays=[ + pa.array([1, 1, 2]), + pa.array( + [ + pa_datetime(2024, 1, 1), + pa_datetime(2024, 1, 2), + pa_datetime(2024, 1, 1), + ] + ), + ], + names=["customer_id", "event_timestamp"], + ) + + +def features_table_1(): + return pa.Table.from_arrays( + arrays=[ + pa.array([1, 1, 1, 2]), + pa.array( + [ + pa_datetime(2023, 12, 31), + pa_datetime(2024, 1, 2), + pa_datetime(2024, 1, 3), + pa_datetime(2023, 1, 3), + ] + ), + pa.array([11, 22, 33, 22]), + ], + names=["customer_id", "event_timestamp", "feature1"], + ) + + +def point_in_time_join_brute( + entity_table: pa.Table, + feature_tables: List[Tuple[pa.Table, str, Dict[str, str], List[str], timedelta]], + event_timestamp_col="event_timestamp", +): + ret_fields = [entity_table.schema.field(n) for n in entity_table.schema.names] + + from operator import itemgetter + + ret = entity_table.to_pydict() + batch_dict = entity_table.to_pydict() + + for i, row_timestmap in enumerate(batch_dict[event_timestamp_col]): + for ( + feature_table, + timestamp_key, + join_key_map, + feature_refs, + ttl, + ) in feature_tables: + if i == 0: + ret_fields.extend( + [ + feature_table.schema.field(f) + for f in feature_table.schema.names + if f not in join_key_map.values() and f != timestamp_key + ] + ) + + def check_equality(ft_dict, batch_dict, x, y): + return all( + [ft_dict[k][x] == batch_dict[v][y] for k, v in join_key_map.items()] + ) + + ft_dict = feature_table.to_pydict() + found_matches = [ + (j, ft_dict[timestamp_key][j]) + for j in range(entity_table.num_rows) + if check_equality(ft_dict, batch_dict, j, i) + and ft_dict[timestamp_key][j] <= row_timestmap + and ft_dict[timestamp_key][j] >= row_timestmap - ttl + ] + + index_found = ( + max(found_matches, key=itemgetter(1))[0] if found_matches else None + ) + for col in ft_dict.keys(): + if col not in feature_refs: + continue + + if col not in ret: + ret[col] = [] + + if index_found is not None: + ret[col].append(ft_dict[col][index_found]) + else: + ret[col].append(None) + + return pa.Table.from_pydict(ret, schema=pa.schema(ret_fields)) + + +def test_point_in_time_join(): + expected = point_in_time_join_brute( + customer_table(), + feature_tables=[ + ( + features_table_1(), + "event_timestamp", + {"customer_id": "customer_id"}, + ["feature1"], + timedelta(days=10), + ) + ], + ) + + actual = point_in_time_join( + ibis.memtable(customer_table()), + feature_tables=[ + ( + ibis.memtable(features_table_1()), + "event_timestamp", + {"customer_id": "customer_id"}, + ["feature1"], + timedelta(days=10), + ) + ], + ).to_pyarrow() + + assert actual.equals(expected) diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py index a108d397bd9..be97a838bda 100644 --- a/sdk/python/tests/unit/infra/test_inference_unit_tests.py +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -1,3 +1,5 @@ +from typing import Any, Dict + import pandas as pd import pytest @@ -51,7 +53,7 @@ def test_infer_datasource_names_dwh(): data_source = dwh_class(query="test_query") -def test_on_demand_features_type_inference(): +def test_on_demand_features_valid_type_inference(): # Create Feature Views date_request = RequestSource( name="date_request", @@ -73,6 +75,31 @@ def test_view(features_df: pd.DataFrame) -> pd.DataFrame: test_view.infer_features() + @on_demand_feature_view( + sources=[date_request], + schema=[ + Field(name="output", dtype=UnixTimestamp), + Field(name="object_output", dtype=String), + ], + mode="python", + ) + def python_native_test_view(input_dict: Dict[str, Any]) -> Dict[str, Any]: + output_dict: Dict[str, Any] = { + "output": input_dict["some_date"], + "object_output": str(input_dict["some_date"]), + } + return output_dict + + python_native_test_view.infer_features() + + +def test_on_demand_features_invalid_type_inference(): + # Create Feature Views + date_request = RequestSource( + name="date_request", + schema=[Field(name="some_date", dtype=UnixTimestamp)], + ) + @on_demand_feature_view( sources=[date_request], schema=[ @@ -96,13 +123,49 @@ def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: ], sources=[date_request], ) - def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: + def view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] return data with pytest.raises(SpecifiedFeaturesNotPresentError): - test_view_with_missing_feature.infer_features() + view_with_missing_feature.infer_features() + + with pytest.raises(TypeError): + + @on_demand_feature_view( + sources=[date_request], + schema=[ + Field(name="output", dtype=UnixTimestamp), + Field(name="object_output", dtype=String), + ], + mode="pandas", + ) + def python_native_test_invalid_pandas_view( + input_dict: Dict[str, Any] + ) -> Dict[str, Any]: + output_dict: Dict[str, Any] = { + "output": input_dict["some_date"], + "object_output": str(input_dict["some_date"]), + } + return output_dict + + with pytest.raises(TypeError): + + @on_demand_feature_view( + sources=[date_request], + schema=[ + Field(name="output", dtype=UnixTimestamp), + Field(name="object_output", dtype=String), + ], + mode="python", + ) + def python_native_test_invalid_dict_view( + features_df: pd.DataFrame, + ) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + return data def test_datasource_inference(): diff --git a/sdk/python/tests/unit/test_feature_views.py b/sdk/python/tests/unit/test_feature_views.py index 2ad9680703f..0220d1a8a95 100644 --- a/sdk/python/tests/unit/test_feature_views.py +++ b/sdk/python/tests/unit/test_feature_views.py @@ -1,22 +1,16 @@ -import copy from datetime import timedelta import pytest from typeguard import TypeCheckError -from feast.aggregation import Aggregation from feast.batch_feature_view import BatchFeatureView from feast.data_format import AvroFormat -from feast.data_source import KafkaSource, PushSource +from feast.data_source import KafkaSource from feast.entity import Entity from feast.feature_view import FeatureView from feast.field import Field from feast.infra.offline_stores.file_source import FileSource -from feast.protos.feast.core.StreamFeatureView_pb2 import ( - StreamFeatureView as StreamFeatureViewProto, -) from feast.protos.feast.types.Value_pb2 import ValueType -from feast.stream_feature_view import StreamFeatureView, stream_feature_view from feast.types import Float32 @@ -65,169 +59,10 @@ def test_create_batch_feature_view(): ) -def test_create_stream_feature_view(): - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - ) - StreamFeatureView( - name="test kafka stream feature view", - entities=[], - ttl=timedelta(days=30), - source=stream_source, - aggregations=[], - ) - - push_source = PushSource( - name="push source", batch_source=FileSource(path="some path") - ) - StreamFeatureView( - name="test push source feature view", - entities=[], - ttl=timedelta(days=30), - source=push_source, - aggregations=[], - ) - - with pytest.raises(TypeError): - StreamFeatureView( - name="test batch feature view", - entities=[], - ttl=timedelta(days=30), - aggregations=[], - ) - - with pytest.raises(ValueError): - StreamFeatureView( - name="test batch feature view", - entities=[], - ttl=timedelta(days=30), - source=FileSource(path="some path"), - aggregations=[], - ) - - def simple_udf(x: int): return x + 3 -def test_stream_feature_view_serialization(): - entity = Entity(name="driver_entity", join_keys=["test_key"]) - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - ) - - sfv = StreamFeatureView( - name="test kafka stream feature view", - entities=[entity], - ttl=timedelta(days=30), - owner="test@example.com", - online=True, - schema=[Field(name="dummy_field", dtype=Float32)], - description="desc", - aggregations=[ - Aggregation( - column="dummy_field", - function="max", - time_window=timedelta(days=1), - ) - ], - timestamp_field="event_timestamp", - mode="spark", - source=stream_source, - udf=simple_udf, - tags={}, - ) - - sfv_proto = sfv.to_proto() - - new_sfv = StreamFeatureView.from_proto(sfv_proto=sfv_proto) - assert new_sfv == sfv - - -def test_stream_feature_view_udfs(): - entity = Entity(name="driver_entity", join_keys=["test_key"]) - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - ) - - @stream_feature_view( - entities=[entity], - ttl=timedelta(days=30), - owner="test@example.com", - online=True, - schema=[Field(name="dummy_field", dtype=Float32)], - description="desc", - aggregations=[ - Aggregation( - column="dummy_field", - function="max", - time_window=timedelta(days=1), - ) - ], - timestamp_field="event_timestamp", - source=stream_source, - ) - def pandas_udf(pandas_df): - import pandas as pd - - assert type(pandas_df) == pd.DataFrame - df = pandas_df.transform(lambda x: x + 10, axis=1) - return df - - import pandas as pd - - df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]}) - sfv = pandas_udf - sfv_proto = sfv.to_proto() - new_sfv = StreamFeatureView.from_proto(sfv_proto) - new_df = new_sfv.udf(df) - - expected_df = pd.DataFrame({"A": [11, 12, 13], "B": [20, 30, 40]}) - - assert new_df.equals(expected_df) - - -def test_stream_feature_view_initialization_with_optional_fields_omitted(): - entity = Entity(name="driver_entity", join_keys=["test_key"]) - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - ) - - sfv = StreamFeatureView( - name="test kafka stream feature view", - entities=[entity], - schema=[], - description="desc", - timestamp_field="event_timestamp", - source=stream_source, - tags={}, - ) - sfv_proto = sfv.to_proto() - - new_sfv = StreamFeatureView.from_proto(sfv_proto=sfv_proto) - assert new_sfv == sfv - - def test_hash(): file_source = FileSource(name="my-file-source", path="test.parquet") feature_view_1 = FeatureView( @@ -282,41 +117,3 @@ def test_hash(): def test_field_types(): with pytest.raises(TypeCheckError): Field(name="name", dtype=ValueType.INT32) - - -def test_stream_feature_view_proto_type(): - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - ) - sfv = StreamFeatureView( - name="test stream featureview proto class", - entities=[], - ttl=timedelta(days=30), - source=stream_source, - aggregations=[], - ) - assert sfv.proto_class is StreamFeatureViewProto - - -def test_stream_feature_view_copy(): - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - ) - sfv = StreamFeatureView( - name="test stream featureview proto class", - entities=[], - ttl=timedelta(days=30), - source=stream_source, - aggregations=[], - ) - assert sfv == copy.copy(sfv) diff --git a/sdk/python/tests/unit/test_on_demand_feature_view.py b/sdk/python/tests/unit/test_on_demand_feature_view.py index 66d02c65d13..cf4afa94228 100644 --- a/sdk/python/tests/unit/test_on_demand_feature_view.py +++ b/sdk/python/tests/unit/test_on_demand_feature_view.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, List + import pandas as pd import pytest @@ -20,7 +22,8 @@ from feast.infra.offline_stores.file_source import FileSource from feast.on_demand_feature_view import ( OnDemandFeatureView, - OnDemandPandasTransformation, + PandasTransformation, + PythonTransformation, ) from feast.types import Float32 @@ -35,10 +38,18 @@ def udf1(features_df: pd.DataFrame) -> pd.DataFrame: def udf2(features_df: pd.DataFrame) -> pd.DataFrame: df = pd.DataFrame() df["output1"] = features_df["feature1"] + 100 - df["output2"] = features_df["feature2"] + 100 + df["output2"] = features_df["feature2"] + 101 return df +def python_native_udf(features_dict: Dict[str, Any]) -> Dict[str, Any]: + output_dict: Dict[str, List[Any]] = { + "output1": features_dict["feature1"] + 100, + "output2": features_dict["feature2"] + 101, + } + return output_dict + + @pytest.mark.filterwarnings("ignore:udf and udf_string parameters are deprecated") def test_hash(): file_source = FileSource(name="my-file-source", path="test.parquet") @@ -59,7 +70,7 @@ def test_hash(): Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], - transformation=OnDemandPandasTransformation( + feature_transformation=PandasTransformation( udf=udf1, udf_string="udf1 source code" ), ) @@ -70,7 +81,7 @@ def test_hash(): Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], - transformation=OnDemandPandasTransformation( + feature_transformation=PandasTransformation( udf=udf1, udf_string="udf1 source code" ), ) @@ -81,7 +92,7 @@ def test_hash(): Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], - transformation=OnDemandPandasTransformation( + feature_transformation=PandasTransformation( udf=udf2, udf_string="udf2 source code" ), ) @@ -92,7 +103,7 @@ def test_hash(): Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], - transformation=OnDemandPandasTransformation( + feature_transformation=PandasTransformation( udf=udf2, udf_string="udf2 source code" ), description="test", @@ -104,8 +115,9 @@ def test_hash(): Field(name="output1", dtype=Float32), Field(name="output2", dtype=Float32), ], - udf=udf2, - udf_string="udf2 source code", + feature_transformation=PandasTransformation( + udf=udf2, udf_string="udf2 source code" + ), description="test", ) @@ -126,6 +138,137 @@ def test_hash(): } assert len(s4) == 3 - assert on_demand_feature_view_5.transformation == OnDemandPandasTransformation( + assert on_demand_feature_view_5.feature_transformation == PandasTransformation( udf2, "udf2 source code" ) + + +def test_python_native_transformation_mode(): + file_source = FileSource(name="my-file-source", path="test.parquet") + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + sources = [feature_view] + + on_demand_feature_view_python_native = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + feature_transformation=PythonTransformation( + udf=python_native_udf, udf_string="python native udf source code" + ), + description="test", + mode="python", + ) + + on_demand_feature_view_python_native_err = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + feature_transformation=PandasTransformation( + udf=python_native_udf, udf_string="python native udf source code" + ), + description="test", + mode="python", + ) + + assert ( + on_demand_feature_view_python_native.feature_transformation + == PythonTransformation(python_native_udf, "python native udf source code") + ) + + with pytest.raises(TypeError): + assert ( + on_demand_feature_view_python_native_err.feature_transformation + == PythonTransformation(python_native_udf, "python native udf source code") + ) + + with pytest.raises(TypeError): + # This should fail + on_demand_feature_view_python_native_err.feature_transformation.transform( + { + "feature1": 0, + "feature2": 1, + } + ) + + assert on_demand_feature_view_python_native.get_transformed_features( + { + "feature1": 0, + "feature2": 1, + } + ) == {"feature1": 0, "feature2": 1, "output1": 100, "output2": 102} + + +@pytest.mark.filterwarnings("ignore:udf and udf_string parameters are deprecated") +def test_from_proto_backwards_compatible_udf(): + file_source = FileSource(name="my-file-source", path="test.parquet") + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + sources = [feature_view] + on_demand_feature_view = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + feature_transformation=PandasTransformation( + udf=udf1, udf_string="udf1 source code" + ), + ) + + # We need a proto with the "udf1 source code" in the user_defined_function.body_text + # and to populate it in feature_transformation + proto = on_demand_feature_view.to_proto() + assert ( + on_demand_feature_view.feature_transformation.udf_string + == proto.spec.feature_transformation.user_defined_function.body_text + ) + # Because of the current set of code this is just confirming it is empty + assert proto.spec.user_defined_function.body_text == "" + assert proto.spec.user_defined_function.body == b"" + assert proto.spec.user_defined_function.name == "" + + # Assuming we pull it from the registry we set it to the feature_transformation proto values + proto.spec.user_defined_function.name = ( + proto.spec.feature_transformation.user_defined_function.name + ) + proto.spec.user_defined_function.body = ( + proto.spec.feature_transformation.user_defined_function.body + ) + proto.spec.user_defined_function.body_text = ( + proto.spec.feature_transformation.user_defined_function.body_text + ) + + # And now we're going to null the feature_transformation proto object before reserializing the entire proto + # proto.spec.user_defined_function.body_text = on_demand_feature_view.transformation.udf_string + proto.spec.feature_transformation.user_defined_function.name = "" + proto.spec.feature_transformation.user_defined_function.body = b"" + proto.spec.feature_transformation.user_defined_function.body_text = "" + + # And now we expect the to get the same object back under feature_transformation + reserialized_proto = OnDemandFeatureView.from_proto(proto) + assert ( + reserialized_proto.feature_transformation.udf_string + == on_demand_feature_view.feature_transformation.udf_string + ) diff --git a/sdk/python/tests/unit/test_stream_feature_view.py b/sdk/python/tests/unit/test_stream_feature_view.py new file mode 100644 index 00000000000..b53f9a593ae --- /dev/null +++ b/sdk/python/tests/unit/test_stream_feature_view.py @@ -0,0 +1,252 @@ +import copy +from datetime import timedelta + +import pytest + +from feast.aggregation import Aggregation +from feast.batch_feature_view import BatchFeatureView +from feast.data_format import AvroFormat +from feast.data_source import KafkaSource, PushSource +from feast.entity import Entity +from feast.field import Field +from feast.infra.offline_stores.file_source import FileSource +from feast.protos.feast.core.StreamFeatureView_pb2 import ( + StreamFeatureView as StreamFeatureViewProto, +) +from feast.stream_feature_view import StreamFeatureView, stream_feature_view +from feast.types import Float32 + + +def test_create_batch_feature_view(): + batch_source = FileSource(path="some path") + BatchFeatureView( + name="test batch feature view", + entities=[], + ttl=timedelta(days=30), + source=batch_source, + ) + + with pytest.raises(TypeError): + BatchFeatureView( + name="test batch feature view", entities=[], ttl=timedelta(days=30) + ) + + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + with pytest.raises(ValueError): + BatchFeatureView( + name="test batch feature view", + entities=[], + ttl=timedelta(days=30), + source=stream_source, + ) + + +def test_create_stream_feature_view(): + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + StreamFeatureView( + name="test kafka stream feature view", + entities=[], + ttl=timedelta(days=30), + source=stream_source, + aggregations=[], + ) + + push_source = PushSource( + name="push source", batch_source=FileSource(path="some path") + ) + StreamFeatureView( + name="test push source feature view", + entities=[], + ttl=timedelta(days=30), + source=push_source, + aggregations=[], + ) + + with pytest.raises(TypeError): + StreamFeatureView( + name="test batch feature view", + entities=[], + ttl=timedelta(days=30), + aggregations=[], + ) + + with pytest.raises(ValueError): + StreamFeatureView( + name="test batch feature view", + entities=[], + ttl=timedelta(days=30), + source=FileSource(path="some path"), + aggregations=[], + ) + + +def simple_udf(x: int): + return x + 3 + + +def test_stream_feature_view_serialization(): + entity = Entity(name="driver_entity", join_keys=["test_key"]) + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + + sfv = StreamFeatureView( + name="test kafka stream feature view", + entities=[entity], + ttl=timedelta(days=30), + owner="test@example.com", + online=True, + schema=[Field(name="dummy_field", dtype=Float32)], + description="desc", + aggregations=[ + Aggregation( + column="dummy_field", + function="max", + time_window=timedelta(days=1), + ) + ], + timestamp_field="event_timestamp", + mode="spark", + source=stream_source, + udf=simple_udf, + tags={}, + ) + + sfv_proto = sfv.to_proto() + + new_sfv = StreamFeatureView.from_proto(sfv_proto=sfv_proto) + assert new_sfv == sfv + assert ( + sfv_proto.spec.feature_transformation.user_defined_function.name == "simple_udf" + ) + + +def test_stream_feature_view_udfs(): + entity = Entity(name="driver_entity", join_keys=["test_key"]) + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + + @stream_feature_view( + entities=[entity], + ttl=timedelta(days=30), + owner="test@example.com", + online=True, + schema=[Field(name="dummy_field", dtype=Float32)], + description="desc", + aggregations=[ + Aggregation( + column="dummy_field", + function="max", + time_window=timedelta(days=1), + ) + ], + timestamp_field="event_timestamp", + source=stream_source, + ) + def pandas_udf(pandas_df): + import pandas as pd + + assert type(pandas_df) == pd.DataFrame + df = pandas_df.transform(lambda x: x + 10, axis=1) + return df + + import pandas as pd + + df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]}) + sfv = pandas_udf + sfv_proto = sfv.to_proto() + new_sfv = StreamFeatureView.from_proto(sfv_proto) + new_df = new_sfv.udf(df) + + expected_df = pd.DataFrame({"A": [11, 12, 13], "B": [20, 30, 40]}) + + assert new_df.equals(expected_df) + + +def test_stream_feature_view_initialization_with_optional_fields_omitted(): + entity = Entity(name="driver_entity", join_keys=["test_key"]) + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + + sfv = StreamFeatureView( + name="test kafka stream feature view", + entities=[entity], + schema=[], + description="desc", + timestamp_field="event_timestamp", + source=stream_source, + tags={}, + ) + sfv_proto = sfv.to_proto() + + new_sfv = StreamFeatureView.from_proto(sfv_proto=sfv_proto) + assert new_sfv == sfv + + +def test_stream_feature_view_proto_type(): + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + sfv = StreamFeatureView( + name="test stream featureview proto class", + entities=[], + ttl=timedelta(days=30), + source=stream_source, + aggregations=[], + ) + assert sfv.proto_class is StreamFeatureViewProto + + +def test_stream_feature_view_copy(): + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + sfv = StreamFeatureView( + name="test stream featureview proto class", + entities=[], + ttl=timedelta(days=30), + source=stream_source, + aggregations=[], + ) + assert sfv == copy.copy(sfv) diff --git a/sdk/python/tests/unit/test_on_demand_substrait_transformation.py b/sdk/python/tests/unit/test_substrait_transformation.py similarity index 72% rename from sdk/python/tests/unit/test_on_demand_substrait_transformation.py rename to sdk/python/tests/unit/test_substrait_transformation.py index c9d30c5b7af..28ab68c70be 100644 --- a/sdk/python/tests/unit/test_on_demand_substrait_transformation.py +++ b/sdk/python/tests/unit/test_substrait_transformation.py @@ -60,6 +60,7 @@ def test_ibis_pandas_parity(): @on_demand_feature_view( sources=[driver_stats_fv], schema=[Field(name="conv_rate_plus_acc", dtype=Float64)], + mode="pandas", ) def pandas_view(inputs: pd.DataFrame) -> pd.DataFrame: df = pd.DataFrame() @@ -71,6 +72,7 @@ def pandas_view(inputs: pd.DataFrame) -> pd.DataFrame: @on_demand_feature_view( sources=[driver_stats_fv[["conv_rate", "acc_rate"]]], schema=[Field(name="conv_rate_plus_acc_substrait", dtype=Float64)], + mode="substrait", ) def substrait_view(inputs: Table) -> Table: return inputs.select( @@ -83,30 +85,50 @@ def substrait_view(inputs: Table) -> Table: [driver, driver_stats_source, driver_stats_fv, substrait_view, pandas_view] ) + store.materialize( + start_date=start_date, + end_date=end_date, + ) + entity_df = pd.DataFrame.from_dict( { # entity's join key -> entity values "driver_id": [1001, 1002, 1003], # "event_timestamp" (reserved key) -> timestamps "event_timestamp": [ - datetime(2021, 4, 12, 10, 59, 42), - datetime(2021, 4, 12, 8, 12, 10), - datetime(2021, 4, 12, 16, 40, 26), + start_date + timedelta(days=4), + start_date + timedelta(days=5), + start_date + timedelta(days=6), ], } ) + requested_features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + "substrait_view:conv_rate_plus_acc_substrait", + "pandas_view:conv_rate_plus_acc", + ] + training_df = store.get_historical_features( - entity_df=entity_df, - features=[ - "driver_hourly_stats:conv_rate", - "driver_hourly_stats:acc_rate", - "driver_hourly_stats:avg_daily_trips", - "substrait_view:conv_rate_plus_acc_substrait", - "pandas_view:conv_rate_plus_acc", - ], - ).to_df() + entity_df=entity_df, features=requested_features + ) + + assert training_df.to_df()["conv_rate_plus_acc"].equals( + training_df.to_df()["conv_rate_plus_acc_substrait"] + ) + + assert training_df.to_arrow()["conv_rate_plus_acc"].equals( + training_df.to_arrow()["conv_rate_plus_acc_substrait"] + ) + + online_response = store.get_online_features( + features=requested_features, + entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}, {"driver_id": 1003}], + ) - assert training_df["conv_rate_plus_acc"].equals( - training_df["conv_rate_plus_acc_substrait"] + assert ( + online_response.to_dict()["conv_rate_plus_acc"] + == online_response.to_dict()["conv_rate_plus_acc_substrait"] ) diff --git a/setup.py b/setup.py index b32d03ed77c..ef3ba1d7843 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ "typeguard>=4.0.0", "fastapi>=0.68.0", "uvicorn[standard]>=0.14.0,<1", - "gunicorn", + "gunicorn; platform_system != 'Windows'", # https://github.com/dask/dask/issues/10996 "dask>=2021.1.0,<2024.3.0", "bowler", # Needed for automatic repo upgrades @@ -177,6 +177,7 @@ "pytest-timeout==1.4.2", "pytest-ordering~=0.6.0", "pytest-mock==1.10.4", + "pytest-env", "Sphinx>4.0.0,<7", "testcontainers>=3.5,<4", "firebase-admin>=5.2.0,<6", @@ -211,6 +212,7 @@ + HAZELCAST_REQUIRED + IBIS_REQUIRED + GRPCIO_REQUIRED + + DUCKDB_REQUIRED ) DOCS_REQUIRED = CI_REQUIRED diff --git a/ui/src/pages/feature-views/OnDemandFeatureViewOverviewTab.tsx b/ui/src/pages/feature-views/OnDemandFeatureViewOverviewTab.tsx index ee8e41bbf6c..aac3f6ac5bd 100644 --- a/ui/src/pages/feature-views/OnDemandFeatureViewOverviewTab.tsx +++ b/ui/src/pages/feature-views/OnDemandFeatureViewOverviewTab.tsx @@ -57,7 +57,7 @@ const OnDemandFeatureViewOverviewTab = ({ - {data?.spec?.userDefinedFunction?.bodyText} + {data?.spec?.featureTransformation?.userDefinedFunction?.bodyText}