From cd9dc81a581f97c570d23ca9e2207b15de6bbb42 Mon Sep 17 00:00:00 2001 From: kysersozelee Date: Wed, 21 Dec 2022 00:58:26 +0900 Subject: [PATCH 1/4] fix: Assertion condition when value is 0 (#3401) * fix: Add assertion condition when value is 0 Signed-off-by: zlatan.el * chore: Add comment about zero value validation Signed-off-by: zlatan.el * chore: Modifiy the comment Signed-off-by: zlatan.el * chore: Add the comment Signed-off-by: zlatan.el Signed-off-by: zlatan.el Co-authored-by: zlatan.el Signed-off-by: franciscojavierarceo --- sdk/python/feast/type_map.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 48da7c9acf..a91a6c141d 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -402,7 +402,12 @@ def _python_value_to_proto_value( valid_scalar_types, ) = PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE[feast_value_type] if valid_scalar_types: - assert type(sample) in valid_scalar_types + if sample == 0 or sample == 0.0: + # Numpy convert 0 to int. However, in the feature view definition, the type of column may be a float. + # So, if value is 0, type validation must pass if scalar_types are either int or float. + assert type(sample) in [np.int64, int, np.float64, float] + else: + assert type(sample) in valid_scalar_types if feast_value_type == ValueType.BOOL: # ProtoValue does not support conversion of np.bool_ so we need to convert it to support np.bool_. return [ From a7ce3b73403d469638a01e32750e03081133af34 Mon Sep 17 00:00:00 2001 From: franciscojavierarceo Date: Fri, 23 Dec 2022 13:22:53 -0700 Subject: [PATCH 2/4] updating the batch field so that if you want return the created date of a model you can just add it in the get_online_features feature argument Signed-off-by: franciscojavierarceo --- sdk/python/feast/infra/offline_stores/file.py | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 29897aef43..4431a7b649 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -267,7 +267,7 @@ def evaluate_historical_retrieval(): ) entity_df_with_features = _drop_columns( - df_to_join, timestamp_field, created_timestamp_column + df_to_join, features, timestamp_field, created_timestamp_column ) # Ensure that we delete dataframes to free up memory @@ -609,6 +609,11 @@ def _normalize_timestamp( not hasattr(created_timestamp_column_type, "tz") or created_timestamp_column_type.tz != pytz.UTC ): + if len(df_to_join[created_timestamp_column].shape) > 1: + # if you are querying for the created timestamp field, we have to deduplicate + df_to_join, dups = _df_column_uniquify(df_to_join) + df_to_join = df_to_join.drop(columns=dups) + df_to_join[created_timestamp_column] = df_to_join[ created_timestamp_column ].apply( @@ -701,14 +706,36 @@ def _drop_duplicates( def _drop_columns( df_to_join: dd.DataFrame, + features: List[str], timestamp_field: str, created_timestamp_column: str, ) -> dd.DataFrame: - entity_df_with_features = df_to_join.drop([timestamp_field], axis=1).persist() - - if created_timestamp_column: - entity_df_with_features = entity_df_with_features.drop( - [created_timestamp_column], axis=1 - ).persist() + entity_df_with_features = df_to_join + timestamp_columns = [ + timestamp_field, + created_timestamp_column, + ] + for column in timestamp_columns: + if column and column not in features: + entity_df_with_features = entity_df_with_features.drop( + [column], axis=1 + ).persist() return entity_df_with_features + + +def _df_column_uniquify(df: dd.DataFrame) -> [dd.DataFrame, List[str]]: + df_columns = df.columns + new_columns = [] + duplicate_cols = [] + for item in df_columns: + counter = 0 + newitem = item + while newitem in new_columns: + counter += 1 + newitem = "{}_{}".format(item, counter) + if counter > 0: + duplicate_cols.append(newitem) + new_columns.append(newitem) + df.columns = new_columns + return df, duplicate_cols From 5ea5165dabf1bd62d5853714d4707ba41220ae99 Mon Sep 17 00:00:00 2001 From: franciscojavierarceo Date: Fri, 23 Dec 2022 16:08:52 -0700 Subject: [PATCH 3/4] linted Signed-off-by: franciscojavierarceo --- sdk/python/feast/infra/offline_stores/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 4431a7b649..34db873ab0 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -724,7 +724,7 @@ def _drop_columns( return entity_df_with_features -def _df_column_uniquify(df: dd.DataFrame) -> [dd.DataFrame, List[str]]: +def _df_column_uniquify(df: dd.DataFrame) -> Tuple[dd.DataFrame, List[str]]: df_columns = df.columns new_columns = [] duplicate_cols = [] From 9eba3eaa3defc03b24e0992115927bc31105bc73 Mon Sep 17 00:00:00 2001 From: franciscojavierarceo Date: Tue, 27 Dec 2022 11:39:14 -0700 Subject: [PATCH 4/4] adding change to also support querying the event_timestamp Signed-off-by: franciscojavierarceo --- sdk/python/feast/infra/offline_stores/file.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 34db873ab0..15e614a5a3 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -599,6 +599,11 @@ def _normalize_timestamp( created_timestamp_column_type = df_to_join_types[created_timestamp_column] if not hasattr(timestamp_field_type, "tz") or timestamp_field_type.tz != pytz.UTC: + # if you are querying for the event timestamp field, we have to deduplicate + if len(df_to_join[timestamp_field].shape) > 1: + df_to_join, dups = _df_column_uniquify(df_to_join) + df_to_join = df_to_join.drop(columns=dups) + # Make sure all timestamp fields are tz-aware. We default tz-naive fields to UTC df_to_join[timestamp_field] = df_to_join[timestamp_field].apply( lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc),