diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 969ca65862..f462f759d7 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -97,6 +97,7 @@ def python_type_to_feast_value_type( type_map = { "int": ValueType.INT64, "str": ValueType.STRING, + "string": ValueType.STRING, # pandas.StringDtype "float": ValueType.DOUBLE, "bytes": ValueType.BYTES, "float64": ValueType.DOUBLE, @@ -119,48 +120,50 @@ def python_type_to_feast_value_type( if type_name in type_map: return type_map[type_name] - if type_name == "ndarray" or isinstance(value, list): - if recurse: - - # Convert to list type - list_items = pd.core.series.Series(value) - - # This is the final type which we infer from the list - common_item_value_type = None - for item in list_items: - if isinstance(item, ProtoValue): - current_item_value_type: ValueType = _proto_value_to_value_type( - item - ) - else: - # Get the type from the current item, only one level deep - current_item_value_type = python_type_to_feast_value_type( - name=name, value=item, recurse=False - ) - # Validate whether the type stays consistent - if ( - common_item_value_type - and not common_item_value_type == current_item_value_type - ): - raise ValueError( - f"List value type for field {name} is inconsistent. " - f"{common_item_value_type} different from " - f"{current_item_value_type}." - ) - common_item_value_type = current_item_value_type - if common_item_value_type is None: - return ValueType.UNKNOWN - return ValueType[common_item_value_type.name + "_LIST"] - else: - assert value + if isinstance(value, np.ndarray) and str(value.dtype) in type_map: + item_type = type_map[str(value.dtype)] + return ValueType[item_type.name + "_LIST"] + + if isinstance(value, (list, np.ndarray)): + # if the value's type is "ndarray" and we couldn't infer from "value.dtype" + # this is most probably array of "object", + # so we need to iterate over objects and try to infer type of each item + if not recurse: raise ValueError( - f"Value type for field {name} is {value.dtype.__str__()} but " + f"Value type for field {name} is {type(value)} but " f"recursion is not allowed. Array types can only be one level " f"deep." ) - assert value - return type_map[value.dtype.__str__()] + # This is the final type which we infer from the list + common_item_value_type = None + for item in value: + if isinstance(item, ProtoValue): + current_item_value_type: ValueType = _proto_value_to_value_type(item) + else: + # Get the type from the current item, only one level deep + current_item_value_type = python_type_to_feast_value_type( + name=name, value=item, recurse=False + ) + # Validate whether the type stays consistent + if ( + common_item_value_type + and not common_item_value_type == current_item_value_type + ): + raise ValueError( + f"List value type for field {name} is inconsistent. " + f"{common_item_value_type} different from " + f"{current_item_value_type}." + ) + common_item_value_type = current_item_value_type + if common_item_value_type is None: + return ValueType.UNKNOWN + return ValueType[common_item_value_type.name + "_LIST"] + + raise ValueError( + f"Value with native type {type_name} " + f"cannot be converted into Feast value type" + ) def python_values_to_feast_value_type( diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 14aa1e13ad..ca5f56c435 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -3,7 +3,7 @@ from feast import Entity, Feature, RepoConfig, ValueType from feast.data_source import RequestDataSource -from feast.errors import RegistryInferenceFailure +from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError from feast.feature_view import FeatureView from feast.inference import ( update_data_sources_with_inferred_event_timestamp_col, @@ -86,7 +86,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1) ) -def test_modify_feature_views_success(): +def test_on_demand_features_type_inference(): # Create Feature Views date_request = RequestDataSource( name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP} @@ -94,11 +94,46 @@ def test_modify_feature_views_success(): @on_demand_feature_view( inputs={"date_request": date_request}, - features=[Feature("output", ValueType.UNIX_TIMESTAMP)], + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("string_output", ValueType.STRING), + ], ) def test_view(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] + data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) return data test_view.infer_features() + + @on_demand_feature_view( + inputs={"date_request": date_request}, + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("object_output", ValueType.STRING), + ], + ) + def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["object_output"] = features_df["some_date"].astype(str) + return data + + with pytest.raises(ValueError, match="Value with native type object"): + invalid_test_view.infer_features() + + @on_demand_feature_view( + inputs={"date_request": date_request}, + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("missing", ValueType.STRING), + ], + ) + def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + return data + + with pytest.raises(SpecifiedFeaturesNotPresentError): + test_view_with_missing_feature.infer_features()