Skip to content

Commit

Permalink
Fix ValueType.UNIX_TIMESTAMP conversions (#2219)
Browse files Browse the repository at this point in the history
* Handle `np.datetime64` to `ValueType.UNIX_TIMESTAMP` conversion

Signed-off-by: Judah Rand <[email protected]>

* Add `datetime` feature to tests

Signed-off-by: Judah Rand <[email protected]>

* Fix `datetime` features in `type_map.py`

Signed-off-by: Judah Rand <[email protected]>
  • Loading branch information
judahrand authored Jan 26, 2022
1 parent 6f1174a commit ef1884f
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 12 deletions.
25 changes: 24 additions & 1 deletion sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _type_err(item, dtype):
ValueType.UNIX_TIMESTAMP_LIST: (
Int64List,
"int64_list_val",
[np.int64, np.int32, int],
[np.datetime64, np.int64, np.int32, int, datetime, Timestamp],
),
ValueType.STRING_LIST: (StringList, "string_list_val", [np.str_, str]),
ValueType.BOOL_LIST: (BoolList, "bool_list_val", [np.bool_, bool]),
Expand Down Expand Up @@ -274,6 +274,24 @@ def _python_value_to_proto_value(
)
raise _type_err(first_invalid, valid_types[0])

if feast_value_type == ValueType.UNIX_TIMESTAMP_LIST:
converted_values = []
for value in values:
converted_sub_values = []
for sub_value in value:
if isinstance(sub_value, datetime):
converted_sub_values.append(int(sub_value.timestamp()))
elif isinstance(sub_value, Timestamp):
converted_sub_values.append(int(sub_value.ToSeconds()))
elif isinstance(sub_value, np.datetime64):
converted_sub_values.append(
sub_value.astype("datetime64[s]").astype("int")
)
else:
converted_sub_values.append(sub_value)
converted_values.append(converted_sub_values)
values = converted_values

return [
ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore
if value is not None
Expand All @@ -292,6 +310,11 @@ def _python_value_to_proto_value(
return [
ProtoValue(int64_val=int(value.ToSeconds())) for value in values
]
elif isinstance(sample, np.datetime64):
return [
ProtoValue(int64_val=value.astype("datetime64[s]").astype("int"))
for value in values
]
return [ProtoValue(int64_val=int(value)) for value in values]

if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE:
Expand Down
7 changes: 7 additions & 0 deletions sdk/python/tests/data/data_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ def get_feature_values_for_dtype(
"float": [1.0, None, 3.0, 4.0, 5.0],
"string": ["1", None, "3", "4", "5"],
"bool": [True, None, False, True, False],
"datetime": [
datetime(1980, 1, 1),
None,
datetime(1981, 1, 1),
datetime(1982, 1, 1),
datetime(1982, 1, 1),
],
}
non_list_val = dtype_map[dtype]
if is_list:
Expand Down
32 changes: 21 additions & 11 deletions sdk/python/tests/integration/registration/test_universal_types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Dict, List, Tuple, Union
Expand Down Expand Up @@ -28,6 +29,7 @@ def populate_test_configs(offline: bool):
(ValueType.INT64, "int64"),
(ValueType.STRING, "float"),
(ValueType.STRING, "bool"),
(ValueType.INT32, "datetime"),
]
configs: List[TypeTestConfig] = []
for test_repo_config in FULL_REPO_CONFIGS:
Expand Down Expand Up @@ -232,6 +234,7 @@ def test_feature_get_online_features_types_match(online_types_test_fixtures):
"float": float,
"string": str,
"bool": bool,
"datetime": int,
}
expected_dtype = feature_list_dtype_to_expected_online_response_value_type[
config.feature_dtype
Expand All @@ -258,6 +261,8 @@ def create_feature_view(
value_type = ValueType.FLOAT_LIST
elif feature_dtype == "bool":
value_type = ValueType.BOOL_LIST
elif feature_dtype == "datetime":
value_type = ValueType.UNIX_TIMESTAMP_LIST
else:
if feature_dtype == "int32":
value_type = ValueType.INT32
Expand All @@ -267,6 +272,8 @@ def create_feature_view(
value_type = ValueType.FLOAT
elif feature_dtype == "bool":
value_type = ValueType.BOOL
elif feature_dtype == "datetime":
value_type = ValueType.UNIX_TIMESTAMP

return driver_feature_view(data_source, name=name, value_type=value_type,)

Expand All @@ -281,6 +288,7 @@ def assert_expected_historical_feature_types(
"float": (pd.api.types.is_float_dtype,),
"string": (pd.api.types.is_string_dtype,),
"bool": (pd.api.types.is_bool_dtype, pd.api.types.is_object_dtype),
"datetime": (pd.api.types.is_datetime64_any_dtype,),
}
dtype_checkers = feature_dtype_to_expected_historical_feature_dtype[feature_dtype]
assert any(
Expand Down Expand Up @@ -309,6 +317,7 @@ def assert_feature_list_types(
bool,
np.bool_,
), # Can be `np.bool_` if from `np.array` rather that `list`
"datetime": np.datetime64,
}
expected_dtype = feature_list_dtype_to_expected_historical_feature_list_dtype[
feature_dtype
Expand All @@ -330,22 +339,23 @@ def assert_expected_arrow_types(
historical_features_arrow = historical_features.to_arrow()
print(historical_features_arrow)
feature_list_dtype_to_expected_historical_feature_arrow_type = {
"int32": "int64",
"int64": "int64",
"float": "double",
"string": "string",
"bool": "bool",
"int32": r"int64",
"int64": r"int64",
"float": r"double",
"string": r"string",
"bool": r"bool",
"datetime": r"timestamp\[.+\]",
}
arrow_type = feature_list_dtype_to_expected_historical_feature_arrow_type[
feature_dtype
]
if feature_is_list:
assert (
str(historical_features_arrow.schema.field_by_name("value").type)
== f"list<item: {arrow_type}>"
assert re.match(
f"list<item: {arrow_type}>",
str(historical_features_arrow.schema.field_by_name("value").type),
)
else:
assert (
str(historical_features_arrow.schema.field_by_name("value").type)
== arrow_type
assert re.match(
arrow_type,
str(historical_features_arrow.schema.field_by_name("value").type),
)

0 comments on commit ef1884f

Please sign in to comment.