Skip to content

Commit

Permalink
Fix integration test that is unstable due to incorrect materializatio…
Browse files Browse the repository at this point in the history
…n boundaries (#2095)

* use timestamp from generated dataframe as split point

Signed-off-by: pyalex <[email protected]>

* use utc with redshift

Signed-off-by: pyalex <[email protected]>
  • Loading branch information
pyalex authored Dec 2, 2021
1 parent ce84d38 commit 73ec64d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
4 changes: 4 additions & 0 deletions sdk/python/feast/infra/offline_stores/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pyarrow as pa
from pydantic import StrictStr
from pydantic.typing import Literal
from pytz import utc

from feast import OnDemandFeatureView, RedshiftSource
from feast.data_source import DataSource
Expand Down Expand Up @@ -82,6 +83,9 @@ def pull_latest_from_table_or_query(
)
s3_resource = aws_utils.get_s3_resource(config.offline_store.region)

start_date = start_date.astimezone(tz=utc)
end_date = end_date.astimezone(tz=utc)

query = f"""
SELECT
{field_string}
Expand Down
17 changes: 12 additions & 5 deletions sdk/python/tests/integration/e2e/test_universal_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,21 @@
@pytest.mark.parametrize("infer_features", [True, False])
def test_e2e_consistency(environment, e2e_data_sources, infer_features):
fs = environment.feature_store
fs.config.project = fs.config.project + str(infer_features)
df, data_source = e2e_data_sources
fv = driver_feature_view(data_source=data_source, infer_features=infer_features)
fv = driver_feature_view(
name=f"test_consistency_{'with_inference' if infer_features else ''}",
data_source=data_source,
infer_features=infer_features,
)

entity = driver()
fs.apply([fv, entity])

run_offline_online_store_consistency_test(fs, fv)
# materialization is run in two steps and
# we use timestamp from generated dataframe as a split point
split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1)

run_offline_online_store_consistency_test(fs, fv, split_dt)


def check_offline_and_online_features(
Expand Down Expand Up @@ -80,7 +87,7 @@ def check_offline_and_online_features(


def run_offline_online_store_consistency_test(
fs: FeatureStore, fv: FeatureView
fs: FeatureStore, fv: FeatureView, split_dt: datetime
) -> None:
now = datetime.utcnow()

Expand All @@ -90,7 +97,7 @@ def run_offline_online_store_consistency_test(
# Run materialize()
# use both tz-naive & tz-aware timestamps to test that they're both correctly handled
start_date = (now - timedelta(hours=5)).replace(tzinfo=utc)
end_date = now - timedelta(hours=2)
end_date = split_dt
fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date)

# check result of materialize()
Expand Down

0 comments on commit 73ec64d

Please sign in to comment.