diff --git a/docs/getting-started/concepts/feature-view.md b/docs/getting-started/concepts/feature-view.md index 0bb4f96a31..3f7f59547a 100644 --- a/docs/getting-started/concepts/feature-view.md +++ b/docs/getting-started/concepts/feature-view.md @@ -130,9 +130,9 @@ Feature names must be unique within a [feature view](feature-view.md#feature-vie On demand feature views allows users to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths: ```python -# Define a request data source which encodes features / information only +# Define a request data source which encodes features / information only # available at request time (e.g. part of the user initiated HTTP request) -input_request = RequestDataSource( +input_request = RequestSource( name="vals_to_add", schema={ "val_to_add": ValueType.INT64, diff --git a/examples/java-demo/feature_repo/driver_repo.py b/examples/java-demo/feature_repo/driver_repo.py index 233593ff02..09db18f036 100644 --- a/examples/java-demo/feature_repo/driver_repo.py +++ b/examples/java-demo/feature_repo/driver_repo.py @@ -1,6 +1,6 @@ import pandas as pd from feast import Entity, Feature, FeatureView, FileSource, ValueType -from feast.data_source import RequestDataSource +from feast.data_source import RequestSource from feast.on_demand_feature_view import on_demand_feature_view from feast.request_feature_view import RequestFeatureView from google.protobuf.duration_pb2 import Duration @@ -28,13 +28,13 @@ # Define a request data source which encodes features / information only # available at request time (e.g. part of the user initiated HTTP request) -input_request = RequestDataSource( +input_request = RequestSource( name="vals_to_add", schema={"val_to_add": ValueType.INT64, "val_to_add_2": ValueType.INT64}, ) # Define an on demand feature view which can generate new features based on -# existing feature views and RequestDataSource features +# existing feature views and RequestSource features @on_demand_feature_view( inputs={ "driver_hourly_stats": driver_hourly_stats_view, @@ -55,7 +55,7 @@ def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame: # Define request feature view driver_age_request_fv = RequestFeatureView( name="driver_age", - request_data_source=RequestDataSource( + request_source=RequestSource( name="driver_age", schema={"driver_age": ValueType.INT64,} ), ) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index db9eabfb9f..f0f1d8ef52 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import enum import warnings from abc import ABC, abstractmethod @@ -144,7 +143,7 @@ def to_proto(self) -> DataSourceProto.KinesisOptions: DataSourceProto.SourceType.BATCH_SNOWFLAKE: "feast.infra.offline_stores.snowflake_source.SnowflakeSource", DataSourceProto.SourceType.STREAM_KAFKA: "feast.data_source.KafkaSource", DataSourceProto.SourceType.STREAM_KINESIS: "feast.data_source.KinesisSource", - DataSourceProto.SourceType.REQUEST_SOURCE: "feast.data_source.RequestDataSource", + DataSourceProto.SourceType.REQUEST_SOURCE: "feast.data_source.RequestSource", DataSourceProto.SourceType.PUSH_SOURCE: "feast.data_source.PushSource", } @@ -422,9 +421,9 @@ def get_table_query_string(self) -> str: raise NotImplementedError -class RequestDataSource(DataSource): +class RequestSource(DataSource): """ - RequestDataSource that can be used to provide input features for on demand transforms + RequestSource that can be used to provide input features for on demand transforms Args: name: Name of the request data source @@ -446,7 +445,7 @@ def __init__( tags: Optional[Dict[str, str]] = None, owner: Optional[str] = "", ): - """Creates a RequestDataSource object.""" + """Creates a RequestSource object.""" super().__init__(name=name, description=description, tags=tags, owner=owner) self.schema = schema @@ -464,7 +463,7 @@ def from_proto(data_source: DataSourceProto): schema = {} for key, val in schema_pb.items(): schema[key] = ValueType(val) - return RequestDataSource( + return RequestSource( name=data_source.name, schema=schema, description=data_source.description, @@ -496,6 +495,15 @@ def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: raise NotImplementedError +class RequestDataSource(RequestSource): + def __init__(self, *args, **kwargs): + warnings.warn( + "The 'RequestDataSource' class is deprecated and was renamed to RequestSource. Please use RequestSource instead. This class name will be removed in Feast 0.23.", + DeprecationWarning, + ) + super().__init__(*args, **kwargs) + + class KinesisSource(DataSource): def validate(self, config: RepoConfig): pass diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index b060d286fd..06b5ad57b9 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -679,7 +679,7 @@ def apply( data_sources_set_to_update.add(rfv.request_data_source) for odfv in odfvs_to_update: - for v in odfv.source_request_data_sources.values(): + for v in odfv.source_request_sources.values(): data_sources_set_to_update.add(v) data_sources_to_update = list(data_sources_set_to_update) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 51c4e9d78e..a4ea1859aa 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -9,7 +9,7 @@ RedshiftSource, SnowflakeSource, ) -from feast.data_source import DataSource, RequestDataSource +from feast.data_source import DataSource, RequestSource from feast.errors import RegistryInferenceFailure from feast.feature_view import FeatureView from feast.repo_config import RepoConfig @@ -78,7 +78,7 @@ def update_data_sources_with_inferred_event_timestamp_col( ERROR_MSG_PREFIX = "Unable to infer DataSource event_timestamp_column" for data_source in data_sources: - if isinstance(data_source, RequestDataSource): + if isinstance(data_source, RequestSource): continue if ( data_source.event_timestamp_column is None diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index 483d80892e..9ea1c055de 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -8,7 +8,7 @@ import pandas as pd from feast.base_feature_view import BaseFeatureView -from feast.data_source import RequestDataSource +from feast.data_source import RequestSource from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError from feast.feature import Feature from feast.feature_view import FeatureView @@ -45,8 +45,8 @@ class OnDemandFeatureView(BaseFeatureView): features: The list of features in the output of the on demand feature view. source_feature_view_projections: A map from input source names to actual input sources with type FeatureViewProjection. - source_request_data_sources: A map from input source names to the actual input - sources with type RequestDataSource. + source_request_sources: A map from input source names to the actual input + sources with type RequestSource. udf: The user defined transformation function, which must take pandas dataframes as inputs. description: A human-readable description. @@ -59,7 +59,7 @@ class OnDemandFeatureView(BaseFeatureView): name: str features: List[Feature] source_feature_view_projections: Dict[str, FeatureViewProjection] - source_request_data_sources: Dict[str, RequestDataSource] + source_request_sources: Dict[str, RequestSource] udf: MethodType description: str tags: Dict[str, str] @@ -71,11 +71,11 @@ def __init__( name: str, features: List[Feature], sources: Optional[ - Dict[str, Union[FeatureView, FeatureViewProjection, RequestDataSource]] + Dict[str, Union[FeatureView, FeatureViewProjection, RequestSource]] ] = None, udf: Optional[MethodType] = None, inputs: Optional[ - Dict[str, Union[FeatureView, FeatureViewProjection, RequestDataSource]] + Dict[str, Union[FeatureView, FeatureViewProjection, RequestSource]] ] = None, description: str = "", tags: Optional[Dict[str, str]] = None, @@ -124,10 +124,10 @@ def __init__( assert sources is not None self.source_feature_view_projections: Dict[str, FeatureViewProjection] = {} - self.source_request_data_sources: Dict[str, RequestDataSource] = {} + self.source_request_sources: Dict[str, RequestSource] = {} for source_name, odfv_source in sources.items(): - if isinstance(odfv_source, RequestDataSource): - self.source_request_data_sources[source_name] = odfv_source + if isinstance(odfv_source, RequestSource): + self.source_request_sources[source_name] = odfv_source elif isinstance(odfv_source, FeatureViewProjection): self.source_feature_view_projections[source_name] = odfv_source else: @@ -149,8 +149,7 @@ def __copy__(self): name=self.name, features=self.features, sources=dict( - **self.source_feature_view_projections, - **self.source_request_data_sources, + **self.source_feature_view_projections, **self.source_request_sources, ), udf=self.udf, description=self.description, @@ -167,7 +166,7 @@ def __eq__(self, other): if ( not self.source_feature_view_projections == other.source_feature_view_projections - or not self.source_request_data_sources == other.source_request_data_sources + or not self.source_request_sources == other.source_request_sources or not self.udf.__code__.co_code == other.udf.__code__.co_code ): return False @@ -194,12 +193,9 @@ def to_proto(self) -> OnDemandFeatureViewProto: sources[source_name] = OnDemandSource( feature_view_projection=fv_projection.to_proto() ) - for ( - source_name, - request_data_source, - ) in self.source_request_data_sources.items(): + for (source_name, request_sources,) in self.source_request_sources.items(): sources[source_name] = OnDemandSource( - request_data_source=request_data_source.to_proto() + request_data_source=request_sources.to_proto() ) spec = OnDemandFeatureViewSpec( @@ -241,7 +237,7 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): on_demand_source.feature_view_projection ) else: - sources[source_name] = RequestDataSource.from_proto( + sources[source_name] = RequestSource.from_proto( on_demand_source.request_data_source ) on_demand_feature_view_obj = cls( @@ -282,8 +278,8 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): def get_request_data_schema(self) -> Dict[str, ValueType]: schema: Dict[str, ValueType] = {} - for request_data_source in self.source_request_data_sources.values(): - schema.update(request_data_source.schema) + for request_source in self.source_request_sources.values(): + schema.update(request_source.schema) return schema def get_transformed_features_df( @@ -339,7 +335,7 @@ def infer_features(self): dtype=dtype ) df[f"{feature.name}"] = pd.Series(dtype=dtype) - for request_data in self.source_request_data_sources.values(): + for request_data in self.source_request_sources.values(): for feature_name, feature_type in request_data.schema.items(): dtype = feast_value_type_to_pandas_type(feature_type) df[f"{feature_name}"] = pd.Series(dtype=dtype) @@ -385,7 +381,7 @@ def get_requested_odfvs(feature_refs, project, registry): def on_demand_feature_view( - features: List[Feature], sources: Dict[str, Union[FeatureView, RequestDataSource]] + features: List[Feature], sources: Dict[str, Union[FeatureView, RequestSource]] ): """ Declare an on-demand feature view diff --git a/sdk/python/feast/request_feature_view.py b/sdk/python/feast/request_feature_view.py index 3be361347c..52b1fa56a9 100644 --- a/sdk/python/feast/request_feature_view.py +++ b/sdk/python/feast/request_feature_view.py @@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Type from feast.base_feature_view import BaseFeatureView -from feast.data_source import RequestDataSource +from feast.data_source import RequestSource from feast.feature import Feature from feast.feature_view_projection import FeatureViewProjection from feast.protos.feast.core.RequestFeatureView_pb2 import ( @@ -20,7 +20,7 @@ class RequestFeatureView(BaseFeatureView): Attributes: name: The unique name of the request feature view. - request_data_source: The request data source that specifies the schema and + request_source: The request source that specifies the schema and features of the request feature view. features: The list of features defined as part of this request feature view. description: A human-readable description. @@ -30,7 +30,7 @@ class RequestFeatureView(BaseFeatureView): """ name: str - request_data_source: RequestDataSource + request_source: RequestSource features: List[Feature] description: str tags: Dict[str, str] @@ -40,7 +40,7 @@ class RequestFeatureView(BaseFeatureView): def __init__( self, name: str, - request_data_source: RequestDataSource, + request_data_source: RequestSource, description: str = "", tags: Optional[Dict[str, str]] = None, owner: str = "", @@ -73,7 +73,7 @@ def __init__( tags=tags, owner=owner, ) - self.request_data_source = request_data_source + self.request_source = request_data_source @property def proto_class(self) -> Type[RequestFeatureViewProto]: @@ -88,7 +88,7 @@ def to_proto(self) -> RequestFeatureViewProto: """ spec = RequestFeatureViewSpec( name=self.name, - request_data_source=self.request_data_source.to_proto(), + request_data_source=self.request_source.to_proto(), description=self.description, tags=self.tags, owner=self.owner, @@ -110,7 +110,7 @@ def from_proto(cls, request_feature_view_proto: RequestFeatureViewProto): request_feature_view_obj = cls( name=request_feature_view_proto.spec.name, - request_data_source=RequestDataSource.from_proto( + request_data_source=RequestSource.from_proto( request_feature_view_proto.spec.request_data_source ), description=request_feature_view_proto.spec.description, @@ -127,8 +127,6 @@ def from_proto(cls, request_feature_view_proto: RequestFeatureViewProto): return request_feature_view_obj def __copy__(self): - fv = RequestFeatureView( - name=self.name, request_data_source=self.request_data_source - ) + fv = RequestFeatureView(name=self.name, request_data_source=self.request_source) fv.projection = copy.copy(self.projection) return fv diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 3029b2e774..3beee03d8f 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -35,7 +35,7 @@ ) from tests.integration.feature_repos.universal.feature_views import ( conv_rate_plus_100_feature_view, - create_conv_rate_request_data_source, + create_conv_rate_request_source, create_customer_daily_profile_feature_view, create_driver_hourly_stats_feature_view, create_field_mapping_feature_view, @@ -279,7 +279,7 @@ def construct_universal_feature_views( driver_odfv=conv_rate_plus_100_feature_view( { "driver": driver_hourly_stats, - "input_request": create_conv_rate_request_data_source(), + "input_request": create_conv_rate_request_source(), } ) if with_odfv diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 04443c38a0..fb5bd8e455 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -5,7 +5,7 @@ import pandas as pd from feast import Feature, FeatureView, OnDemandFeatureView, PushSource, ValueType -from feast.data_source import DataSource, RequestDataSource +from feast.data_source import DataSource, RequestSource def driver_feature_view( @@ -51,7 +51,7 @@ def conv_rate_plus_100(features_df: pd.DataFrame) -> pd.DataFrame: def conv_rate_plus_100_feature_view( - sources: Dict[str, Union[RequestDataSource, FeatureView]], + sources: Dict[str, Union[RequestSource, FeatureView]], infer_features: bool = False, features: Optional[List[Feature]] = None, ) -> OnDemandFeatureView: @@ -86,7 +86,7 @@ def similarity(features_df: pd.DataFrame) -> pd.DataFrame: def similarity_feature_view( - sources: Dict[str, Union[RequestDataSource, FeatureView]], + sources: Dict[str, Union[RequestSource, FeatureView]], infer_features: bool = False, features: Optional[List[Feature]] = None, ) -> OnDemandFeatureView: @@ -102,14 +102,12 @@ def similarity_feature_view( ) -def create_conv_rate_request_data_source(): - return RequestDataSource( - name="conv_rate_input", schema={"val_to_add": ValueType.INT32} - ) +def create_conv_rate_request_source(): + return RequestSource(name="conv_rate_input", schema={"val_to_add": ValueType.INT32}) -def create_similarity_request_data_source(): - return RequestDataSource( +def create_similarity_request_source(): + return RequestSource( name="similarity_input", schema={ "vector_double": ValueType.DOUBLE_LIST, diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 54af9f6ff2..6eaae3ec7d 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -13,7 +13,7 @@ SnowflakeSource, ValueType, ) -from feast.data_source import RequestDataSource +from feast.data_source import RequestSource from feast.errors import ( DataSourceNoNameException, RegistryInferenceFailure, @@ -164,7 +164,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so def test_on_demand_features_type_inference(): # Create Feature Views - date_request = RequestDataSource( + date_request = RequestSource( name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP} ) @@ -217,7 +217,7 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: def test_datasource_inference(): # Create Feature Views - date_request = RequestDataSource( + date_request = RequestSource( name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP} ) diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 88369ba76b..7fb30d25fb 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -24,7 +24,7 @@ from feast.entity import Entity from feast.feature import Feature from feast.feature_view import FeatureView -from feast.on_demand_feature_view import RequestDataSource, on_demand_feature_view +from feast.on_demand_feature_view import RequestSource, on_demand_feature_view from feast.protos.feast.types import Value_pb2 as ValueProto from feast.registry import Registry from feast.repo_config import RegistryConfig @@ -244,7 +244,7 @@ def test_modify_feature_views_success(test_registry): created_timestamp_column="timestamp", ) - request_source = RequestDataSource( + request_source = RequestSource( name="request_source", schema={"my_input_1": ValueType.INT32} ) diff --git a/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py b/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py index e8cc1552e2..c925ff7047 100644 --- a/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py +++ b/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py @@ -9,10 +9,10 @@ from tests.integration.feature_repos.universal.entities import customer, driver, item from tests.integration.feature_repos.universal.feature_views import ( conv_rate_plus_100_feature_view, - create_conv_rate_request_data_source, + create_conv_rate_request_source, create_driver_hourly_stats_feature_view, create_item_embeddings_feature_view, - create_similarity_request_data_source, + create_similarity_request_source, similarity_feature_view, ) @@ -26,9 +26,9 @@ def test_infer_odfv_features(environment, universal_data_sources, infer_features (entities, datasets, data_sources) = universal_data_sources driver_hourly_stats = create_driver_hourly_stats_feature_view(data_sources.driver) - request_data_source = create_conv_rate_request_data_source() + request_source = create_conv_rate_request_source() driver_odfv = conv_rate_plus_100_feature_view( - {"driver": driver_hourly_stats, "input_request": request_data_source}, + {"driver": driver_hourly_stats, "input_request": request_source}, infer_features=infer_features, ) @@ -60,7 +60,7 @@ def test_infer_odfv_list_features(environment, infer_features, tmp_path): ) items = create_item_embeddings_feature_view(fake_items_src) sim_odfv = similarity_feature_view( - {"items": items, "input_request": create_similarity_request_data_source()}, + {"items": items, "input_request": create_similarity_request_source()}, infer_features=infer_features, ) store = environment.feature_store @@ -78,9 +78,9 @@ def test_infer_odfv_features_with_error(environment, universal_data_sources): features = [Feature("conv_rate_plus_200", ValueType.DOUBLE)] driver_hourly_stats = create_driver_hourly_stats_feature_view(data_sources.driver) - request_data_source = create_conv_rate_request_data_source() + request_source = create_conv_rate_request_source() driver_odfv = conv_rate_plus_100_feature_view( - {"driver": driver_hourly_stats, "input_request": request_data_source}, + {"driver": driver_hourly_stats, "input_request": request_source}, features=features, ) diff --git a/sdk/python/tests/unit/test_data_sources.py b/sdk/python/tests/unit/test_data_sources.py index 28a12d44ad..6e8e44c0e3 100644 --- a/sdk/python/tests/unit/test_data_sources.py +++ b/sdk/python/tests/unit/test_data_sources.py @@ -1,5 +1,7 @@ +import pytest + from feast import ValueType -from feast.data_source import PushSource +from feast.data_source import PushSource, RequestDataSource, RequestSource from feast.infra.offline_stores.bigquery_source import BigQuerySource @@ -18,3 +20,15 @@ def test_push_with_batch(): assert push_source.name == push_source_unproto.name assert push_source.schema == push_source_unproto.schema assert push_source.batch_source.name == push_source_unproto.batch_source.name + + +def test_request_data_source_deprecation(): + with pytest.warns(DeprecationWarning): + request_data_source = RequestDataSource( + name="vals_to_add", + schema={"val_to_add": ValueType.INT64, "val_to_add_2": ValueType.INT64}, + ) + request_data_source_proto = request_data_source.to_proto() + returned_request_source = RequestSource.from_proto(request_data_source_proto) + assert returned_request_source.name == request_data_source.name + assert returned_request_source.schema == request_data_source.schema diff --git a/ui/feature_repo/features.py b/ui/feature_repo/features.py index 8d9f5c66f1..f06a7b76bf 100644 --- a/ui/feature_repo/features.py +++ b/ui/feature_repo/features.py @@ -8,7 +8,7 @@ FileSource, ValueType, ) -from feast.data_source import RequestDataSource +from feast.data_source import RequestSource from feast.request_feature_view import RequestFeatureView from feast.on_demand_feature_view import on_demand_feature_view import pandas as pd @@ -126,12 +126,12 @@ # Define a request data source which encodes features / information only # available at request time (e.g. part of the user initiated HTTP request) -input_request = RequestDataSource( +input_request = RequestSource( name="transaction", schema={"transaction_amt": ValueType.INT64}, ) # Define an on demand feature view which can generate new features based on -# existing feature views and RequestDataSource features +# existing feature views and RequestSource features @on_demand_feature_view( inputs={"credit_history": credit_history, "transaction": input_request,}, features=[