diff --git a/infra/scripts/test-docker-compose.sh b/infra/scripts/test-docker-compose.sh index 884c299ff1..4cfe33b669 100755 --- a/infra/scripts/test-docker-compose.sh +++ b/infra/scripts/test-docker-compose.sh @@ -41,5 +41,11 @@ docker logs feast_jupyter_1 # Wait for Jupyter Notebook Container to come online ${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${JUPYTER_DOCKER_CONTAINER_IP_ADDRESS}:8888 --timeout=300 +# Wait for Feast Core to come online +docker exec feast_core_1 grpc-health-probe -addr :6565 -connect-timeout 300s + # Run e2e tests for Redis docker exec feast_jupyter_1 bash -c 'cd feast/tests/e2e/ && pytest -s basic-ingest-redis-serving.py --core_url core:6565 --serving_url=online-serving:6566' + +# Run ingestion tests for FF Data Science scenarios +docker exec feast_jupyter_1 bash -c 'cd feast/tests/ds_scenarios/ && pytest -s test-ingest.py --core_url core:6565 --serving_url=online-serving:6566' diff --git a/tests/ds_scenarios/conftest.py b/tests/ds_scenarios/conftest.py new file mode 100644 index 0000000000..a6f5c7cc90 --- /dev/null +++ b/tests/ds_scenarios/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--core_url", action="store", default="localhost:6565") + parser.addoption("--serving_url", action="store", default="localhost:6566") + parser.addoption("--allow_dirty", action="store", default="False") diff --git a/tests/ds_scenarios/ds_example_feature_data.py b/tests/ds_scenarios/ds_example_feature_data.py new file mode 100644 index 0000000000..feb0bbf1e3 --- /dev/null +++ b/tests/ds_scenarios/ds_example_feature_data.py @@ -0,0 +1,204 @@ +import datetime +import numpy as np +import pandas as pd +from feast import Feature, FeatureSet, Entity, ValueType +from pytz import utc + +""" +Examples of anticipated daily feature ingestion load: + +Product Computer Vision features (20K/day - generated once a day), see create_product_image_features_df() below: +- CV1: 4 x 64 float +- CV2: 2 x 64 float +- CV3: 256 x float +- CV4: 8192 x float + +Product Text Attributes (20K/day - generated once a day), see create_product_text_attributes_df() below: +- TX1-n: string or list of string + +Fraud features: customer counts for different windows of time (15M throughout day): +- FR1-7: int +""" + +product = Entity('product_id', ValueType.INT64) + + +PRODUCT_IMAGE_FEATURE_SET = FeatureSet( + 'product_image_features', + entities=[Entity('product_id', ValueType.INT64)], + features=[ + Feature('cv1', ValueType.DOUBLE_LIST), + Feature('cv2', ValueType.DOUBLE_LIST), + Feature('cv3', ValueType.DOUBLE_LIST), + Feature('cv4', ValueType.DOUBLE_LIST), + ] +) + + +PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET = FeatureSet( + 'product_text_attributes', + entities=[Entity('product_id', ValueType.INT64)], + features=[ + Feature('brand', ValueType.STRING), + Feature('brand-range', ValueType.STRING), + Feature('colours', ValueType.STRING_LIST), + Feature('footware', ValueType.STRING), + Feature('heel-height', ValueType.STRING), + Feature('heel-type', ValueType.STRING), + Feature('materials', ValueType.STRING_LIST), + Feature('sole-height', ValueType.STRING), + ] +) + + +FRAUD_COUNTS_FEATURE_SET = FeatureSet( + 'fraud_count_features', + entities=[Entity('customer_id', ValueType.INT64)], + features=[ + Feature('window_count1', ValueType.INT64), + Feature('window_count2', ValueType.INT64), + Feature('window_count3', ValueType.INT64), + Feature('window_count4', ValueType.INT64), + Feature('window_count5', ValueType.INT64), + Feature('window_count6', ValueType.INT64), + Feature('window_count7', ValueType.INT64), + ] +) + + +PRODUCT_ATTRIBUTES = { + 'brand': [ + 'Ash', + 'PRADA', + 'Birkenstock', + 'Valentino Garavani', + 'LEMAIRE', + 'R . M . Williams', + 'Eres', + 'Moschino', + 'GIUSEPPE JUNIOR', + 'Montelpare Tradition', + 'Moa Master Of Arts', + "L ' Autre Chose", + 'See by ChloƩ', + 'Y - 3', + 'Nike', + ], + 'brand-range': [ + 'Gizeh', + 'Gain', + 'Stingray', + 'Majolica', + 'Glory', + 'Revekka', + 'Diana Strass', + 'Blackout', + 'Air Jordan 1 High', + 'Authentic', + 'Galore', + 'Montecarlo Mondial', + ], + 'colours': [ + ['black', 'white'], + ['silver'], + ['red'], + ['light green'], + ['bright orange'], + ['tan brown'], + ['yellow', 'green'], + ['bright blue'], + ['orange'], + ['cinnamon brown'], + ['hot - pink'], + ['silver grey'], + ['navy'], + ], + 'footware': [ + 'mules', + 'sandals', + 'flip - flops', + 'sliders', + 'ballerina shoes', + 'mule', + 'slingbacks', + 'derby shoes', + 'school shoes', + 'wedge shoes', + 'runner', + ], + 'heel-height': ['{}mm'.format(x) for x in range(0, 101, 5)], + 'heel-type': [ + 'sculpted', + 'chunky', + 'screw', + 'stacked', + 'discrete', + 'slender', + 'collapsible', + 'platform', + ], + 'materials': [ + ['raffia'], + ['rubber', 'polyester'], + ['nylon'], + ['silk'], + ['patent sheepskin'], + ['mesh'], + ], + 'sole-height': ['{}mm'.format(x) for x in range(0, 101, 5)], +} + + +def create_cv1(): + value = np.random.random((4, 64)) + value[:, 25:] = 0 + np.random.shuffle(value.T) + return value + + +def create_cv2(): + value = np.random.random((2, 64)) + value[:, 15:] = 0 + np.random.shuffle(value.T) + return value + + +def create_cv3(): + value = np.random.randn(256).astype(np.float32) + return value / np.linalg.norm(value) + + +def create_cv4(): + value = np.random.randn(8192) + return value / np.linalg.norm(value) + + +def create_product_image_features_df(initial_product_id=1, n=20000): + dt = datetime.datetime.now(datetime.timezone.utc) + return pd.DataFrame( + { + 'datetime': dt, + 'product_id': list(range(initial_product_id, initial_product_id + n)), + 'cv1': [create_cv1().flatten() for _ in range(n)], + 'cv2': [create_cv2().flatten() for _ in range(n)], + 'cv3': [create_cv3() for _ in range(n)], + 'cv4': [create_cv4() for _ in range(n)], + }) + + +def create_product_text_attributes_df(initial_product_id=1, n=20000): + dt = datetime.datetime.now(datetime.timezone.utc) + return pd.DataFrame( + { + 'datetime': dt, + 'product_id': list(range(initial_product_id, initial_product_id + n)), + 'brand': [np.random.choice(PRODUCT_ATTRIBUTES['brand']) for _ in range(n)], + 'brand-range': [np.random.choice(PRODUCT_ATTRIBUTES['brand-range']) for _ in range(n)], + 'colours': [np.random.choice(PRODUCT_ATTRIBUTES['colours']) for _ in range(n)], + 'footware': [np.random.choice(PRODUCT_ATTRIBUTES['footware']) for _ in range(n)], + 'heel-height': [np.random.choice(PRODUCT_ATTRIBUTES['heel-height']) for _ in range(n)], + 'heel-type': [np.random.choice(PRODUCT_ATTRIBUTES['heel-type']) for _ in range(n)], + 'materials': [np.random.choice(PRODUCT_ATTRIBUTES['materials']) for _ in range(n)], + 'sole-height': [np.random.choice(PRODUCT_ATTRIBUTES['sole-height']) for _ in range(n)], + }) + diff --git a/tests/ds_scenarios/test-ingest.py b/tests/ds_scenarios/test-ingest.py new file mode 100644 index 0000000000..f9a7c85521 --- /dev/null +++ b/tests/ds_scenarios/test-ingest.py @@ -0,0 +1,56 @@ +import pytest +from feast.client import Client + +import uuid + +from ds_example_feature_data import ( + PRODUCT_IMAGE_FEATURE_SET, create_product_image_features_df, + PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET, create_product_text_attributes_df, +) + +PROJECT_NAME = 'ds_' + uuid.uuid4().hex.upper()[0:6] + + +@pytest.fixture(scope='module') +def core_url(pytestconfig): + return pytestconfig.getoption("core_url") + + +@pytest.fixture(scope='module') +def serving_url(pytestconfig): + return pytestconfig.getoption("serving_url") + + +@pytest.fixture(scope='module') +def allow_dirty(pytestconfig): + return True if pytestconfig.getoption( + "allow_dirty").lower() == "true" else False + + +@pytest.fixture(scope='module') +def client(core_url, serving_url, allow_dirty): + # Get client for core and serving + client = Client(core_url=core_url, serving_url=serving_url) + client.create_project(PROJECT_NAME) + + # Ensure Feast core is active, but empty + if not allow_dirty: + feature_sets = client.list_feature_sets(project=PROJECT_NAME) + if len(feature_sets) > 0: + raise Exception( + "Feast cannot have existing feature sets registered. Exiting tests." + ) + + return client + + +@pytest.mark.timeout(600) +@pytest.mark.parametrize("data_frame_generator,feature_set", [ + (create_product_image_features_df, PRODUCT_IMAGE_FEATURE_SET), + (create_product_text_attributes_df, PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET), +]) +def test_ingestion(client, data_frame_generator, feature_set): + client.apply(feature_set) + data_frame = data_frame_generator() + client.ingest(feature_set, data_frame) +