From aad0a478b0dfc851944f7c4a714071ab30bcdbfb Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Fri, 2 Oct 2020 23:11:35 +0800
Subject: [PATCH 01/34] Cleanup cli

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/cli.py | 327 +++-------------------------------------
 1 file changed, 18 insertions(+), 309 deletions(-)

diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py
index 1c774ea89f..f041b4c12f 100644
--- a/sdk/python/feast/cli.py
+++ b/sdk/python/feast/cli.py
@@ -15,7 +15,7 @@
 import json
 import logging
 import sys
-from typing import Dict, List
+from typing import Dict
 
 import click
 import pkg_resources
@@ -23,10 +23,7 @@
 
 from feast.client import Client
 from feast.config import Config
-from feast.contrib.job_controller.client import Client as JCClient
-from feast.core.IngestionJob_pb2 import IngestionJobStatus
-from feast.entity import EntityV2
-from feast.feature_set import FeatureSet, FeatureSetRef
+from feast.entity import Entity
 from feast.feature_table import FeatureTable
 from feast.loaders.yaml import yaml_loader
 
@@ -143,9 +140,7 @@ def entity_create(filename, project):
     Create or update an entity
     """
 
-    entities = [
-        EntityV2.from_dict(entity_dict) for entity_dict in yaml_loader(filename)
-    ]
+    entities = [Entity.from_dict(entity_dict) for entity_dict in yaml_loader(filename)]
     feast_client = Client()  # type: Client
     feast_client.apply_entity(entities, project)
 
@@ -217,6 +212,21 @@ def feature_table():
     pass
 
 
+def _get_labels_dict(label_str: str) -> Dict[str, str]:
+    """
+    Converts CLI input labels string to dictionary format if provided string is valid.
+    """
+    labels_dict: Dict[str, str] = {}
+    labels_kv = label_str.split(",")
+    if label_str == "":
+        return labels_dict
+    if len(labels_kv) % 2 == 1:
+        raise ValueError("Uneven key-value label pairs were entered")
+    for k, v in zip(labels_kv[0::2], labels_kv[1::2]):
+        labels_dict[k] = v
+    return labels_dict
+
+
 @feature_table.command("apply")
 @click.option(
     "--filename",
@@ -291,170 +301,6 @@ def feature_table_list(project: str, labels: str):
     print(tabulate(table, headers=["NAME", "ENTITIES"], tablefmt="plain"))
 
 
-@cli.group(name="features")
-def feature():
-    """
-    Manage feature
-    """
-    pass
-
-
-def _convert_entity_string_to_list(entities_str: str) -> List[str]:
-    """
-    Converts CLI input entities string to list format if provided string is valid.
-    """
-    if entities_str == "":
-        return []
-    return entities_str.split(",")
-
-
-@feature.command(name="list")
-@click.option(
-    "--project",
-    "-p",
-    help="Project that feature belongs to",
-    type=click.STRING,
-    default="*",
-)
-@click.option(
-    "--entities",
-    "-n",
-    help="Entities to filter for features",
-    type=click.STRING,
-    default="",
-)
-@click.option(
-    "--labels",
-    "-l",
-    help="Labels to filter for features",
-    type=click.STRING,
-    default="",
-)
-def feature_list(project: str, entities: str, labels: str):
-    """
-    List all features
-    """
-    feast_client = Client()  # type: Client
-
-    entities_list = _convert_entity_string_to_list(entities)
-    labels_dict: Dict[str, str] = _get_labels_dict(labels)
-
-    table = []
-    for feature_ref, feature in feast_client.list_features_by_ref(
-        project=project, entities=entities_list, labels=labels_dict
-    ).items():
-        table.append([feature.name, feature.dtype, repr(feature_ref)])
-
-    from tabulate import tabulate
-
-    print(tabulate(table, headers=["NAME", "DTYPE", "REFERENCE"], tablefmt="plain"))
-
-
-@cli.group(name="feature-sets")
-def feature_set():
-    """
-    Create and manage feature sets
-    """
-    pass
-
-
-def _get_labels_dict(label_str: str) -> Dict[str, str]:
-    """
-    Converts CLI input labels string to dictionary format if provided string is valid.
-    """
-    labels_dict: Dict[str, str] = {}
-    labels_kv = label_str.split(",")
-    if label_str == "":
-        return labels_dict
-    if len(labels_kv) % 2 == 1:
-        raise ValueError("Uneven key-value label pairs were entered")
-    for k, v in zip(labels_kv[0::2], labels_kv[1::2]):
-        labels_dict[k] = v
-    return labels_dict
-
-
-@feature_set.command(name="list")
-@click.option(
-    "--project",
-    "-p",
-    help="Project that feature set belongs to",
-    type=click.STRING,
-    default="*",
-)
-@click.option(
-    "--name",
-    "-n",
-    help="Filters feature sets by name. Wildcards (*) may be included to match multiple feature sets",
-    type=click.STRING,
-    default="*",
-)
-@click.option(
-    "--labels",
-    "-l",
-    help="Labels to filter for feature sets",
-    type=click.STRING,
-    default="",
-)
-def feature_set_list(project: str, name: str, labels: str):
-    """
-    List all feature sets
-    """
-    feast_client = Client()  # type: Client
-
-    labels_dict = _get_labels_dict(labels)
-
-    table = []
-    for fs in feast_client.list_feature_sets(
-        project=project, name=name, labels=labels_dict
-    ):
-        table.append([fs.name, repr(fs)])
-
-    from tabulate import tabulate
-
-    print(tabulate(table, headers=["NAME", "REFERENCE"], tablefmt="plain"))
-
-
-@feature_set.command("apply")
-# TODO: add project option to overwrite project setting.
-@click.option(
-    "--filename",
-    "-f",
-    help="Path to a feature set configuration file that will be applied",
-    type=click.Path(exists=True),
-)
-def feature_set_create(filename):
-    """
-    Create or update a feature set
-    """
-
-    feature_sets = [FeatureSet.from_dict(fs_dict) for fs_dict in yaml_loader(filename)]
-    feast_client = Client()  # type: Client
-    feast_client.apply(feature_sets)
-
-
-@feature_set.command("describe")
-@click.argument("name", type=click.STRING)
-@click.option(
-    "--project",
-    "-p",
-    help="Project that feature set belongs to",
-    type=click.STRING,
-    default="default",
-)
-def feature_set_describe(name: str, project: str):
-    """
-    Describe a feature set
-    """
-    feast_client = Client()  # type: Client
-    fs = feast_client.get_feature_set(name=name, project=project)
-
-    if not fs:
-        print(f'Feature set with name "{name}" could not be found')
-        return
-
-    print(yaml.dump(yaml.safe_load(str(fs)), default_flow_style=False, sort_keys=False))
-
-
 @cli.group(name="projects")
 def project():
     """
@@ -499,142 +345,5 @@ def project_list():
     print(tabulate(table, headers=["NAME"], tablefmt="plain"))
 
 
-@cli.group(name="ingest-jobs")
-def ingest_job():
-    """
-    Manage ingestion jobs
-    """
-    pass
-
-
-@ingest_job.command("list")
-@click.option("--job-id", "-i", help="Show only ingestion jobs with the given job id")
-@click.option(
-    "--feature-set-ref",
-    "-f",
-    help="Show only ingestion job targeting the feature set with the given reference",
-)
-@click.option(
-    "--store-name",
-    "-s",
-    help="List only ingestion job that ingest into feast store with given name",
-)
-# TODO: types
-def ingest_job_list(job_id, feature_set_ref, store_name):
-    """
-    List ingestion jobs
-    """
-    # parse feature set reference
-    if feature_set_ref is not None:
-        feature_set_ref = FeatureSetRef.from_str(feature_set_ref)
-
-    # pull & render ingestion jobs as a table
-    feast_client = JCClient()
-    table = []
-    for ingest_job in feast_client.list_ingest_jobs(
-        job_id=job_id, feature_set_ref=feature_set_ref, store_name=store_name
-    ):
-        table.append([ingest_job.id, IngestionJobStatus.Name(ingest_job.status)])
-
-    from tabulate import tabulate
-
-    print(tabulate(table, headers=["ID", "STATUS"], tablefmt="plain"))
-
-
-@ingest_job.command("describe")
-@click.argument("job_id")
-def ingest_job_describe(job_id: str):
-    """
-    Describe the ingestion job with the given id.
-    """
-    # find ingestion job for id
-    feast_client = JCClient()
-    jobs = feast_client.list_ingest_jobs(job_id=job_id)
-    if len(jobs) < 1:
-        print(f"Ingestion Job with id {job_id} could not be found")
-        sys.exit(1)
-    job = jobs[0]
-
-    # pretty render ingestion job as yaml
-    print(
-        yaml.dump(yaml.safe_load(str(job)), default_flow_style=False, sort_keys=False)
-    )
-
-
-@ingest_job.command("stop")
-@click.option(
-    "--wait", "-w", is_flag=True, help="Wait for the ingestion job to fully stop."
-)
-@click.option(
-    "--timeout",
-    "-t",
-    default=600,
-    help="Timeout in seconds to wait for the job to stop.",
-)
-@click.argument("job_id")
-def ingest_job_stop(wait: bool, timeout: int, job_id: str):
-    """
-    Stop ingestion job for id.
-    """
-    # find ingestion job for id
-    feast_client = JCClient()
-    jobs = feast_client.list_ingest_jobs(job_id=job_id)
-    if len(jobs) < 1:
-        print(f"Ingestion Job with id {job_id} could not be found")
-        sys.exit(1)
-    job = jobs[0]
-
-    feast_client.stop_ingest_job(job)
-
-    # wait for ingestion job to stop
-    if wait:
-        job.wait(IngestionJobStatus.ABORTED, timeout=timeout)
-
-
-@ingest_job.command("restart")
-@click.argument("job_id")
-def ingest_job_restart(job_id: str):
-    """
-    Restart job for id.
-    Waits for the job to fully restart.
-    """
-    # find ingestion job for id
-    feast_client = JCClient()
-    jobs = feast_client.list_ingest_jobs(job_id=job_id)
-    if len(jobs) < 1:
-        print(f"Ingestion Job with id {job_id} could not be found")
-        sys.exit(1)
-    job = jobs[0]
-
-    feast_client.restart_ingest_job(job)
-
-
-@cli.command()
-@click.option(
-    "--name", "-n", help="Feature set name to ingest data into", required=True
-)
-@click.option(
-    "--filename",
-    "-f",
-    help="Path to file to be ingested",
-    type=click.Path(exists=True),
-    required=True,
-)
-@click.option(
-    "--file-type",
-    "-t",
-    type=click.Choice(["CSV"], case_sensitive=False),
-    help="Type of file to ingest. Defaults to CSV.",
-)
-def ingest(name, filename, file_type):
-    """
-    Ingest feature data into a feature set
-    """
-
-    feast_client = Client()  # type: Client
-    feature_set = feast_client.get_feature_set(name=name)
-    feature_set.ingest_file(file_path=filename)
-
-
 if __name__ == "__main__":
     cli()

From 53d40cf0dafdca8c9476b68e48124b4e89f740e0 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 12:06:28 +0800
Subject: [PATCH 02/34] Cleanup Python SDK

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/__init__.py                  |   21 +-
 sdk/python/feast/client.py                    |  737 ++---------
 sdk/python/feast/contrib/__init__.py          |    0
 .../feast/contrib/job_controller/__init__.py  |    0
 .../feast/contrib/job_controller/client.py    |  145 ---
 .../feast/contrib/job_controller/job.py       |  122 --
 sdk/python/feast/entity.py                    |   37 +-
 sdk/python/feast/feature.py                   |  171 +--
 sdk/python/feast/feature_set.py               | 1078 -----------------
 sdk/python/feast/feature_table.py             |    8 +-
 sdk/python/feast/feature_v2.py                |   94 --
 sdk/python/feast/job.py                       |  210 ----
 sdk/python/feast/loaders/ingest.py            |  170 +--
 sdk/python/feast/source.py                    |  121 --
 sdk/python/feast/type_map.py                  |   84 --
 sdk/python/feast/value_type.py                |    2 +-
 16 files changed, 212 insertions(+), 2788 deletions(-)
 delete mode 100644 sdk/python/feast/contrib/__init__.py
 delete mode 100644 sdk/python/feast/contrib/job_controller/__init__.py
 delete mode 100644 sdk/python/feast/contrib/job_controller/client.py
 delete mode 100644 sdk/python/feast/contrib/job_controller/job.py
 delete mode 100644 sdk/python/feast/feature_set.py
 delete mode 100644 sdk/python/feast/feature_v2.py
 delete mode 100644 sdk/python/feast/job.py
 delete mode 100644 sdk/python/feast/source.py

diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py
index 8342de4c9b..298b8ac975 100644
--- a/sdk/python/feast/__init__.py
+++ b/sdk/python/feast/__init__.py
@@ -1,10 +1,17 @@
 from pkg_resources import DistributionNotFound, get_distribution
 
 from .client import Client
+from .data_source import (
+    BigQueryOptions,
+    DataSource,
+    FileOptions,
+    KafkaOptions,
+    KinesisOptions,
+    SourceType,
+)
 from .entity import Entity
 from .feature import Feature
-from .feature_set import FeatureSet
-from .source import KafkaSource, Source
+from .feature_table import FeatureTable
 from .value_type import ValueType
 
 try:
@@ -16,9 +23,13 @@
 __all__ = [
     "Client",
     "Entity",
+    "DataSource",
+    "BigQueryOptions",
+    "FileOptions",
+    "KafkaOptions",
+    "KinesisOptions",
     "Feature",
-    "FeatureSet",
-    "Source",
-    "KafkaSource",
+    "FeatureTable",
+    "SourceType",
     "ValueType",
 ]
diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 713776f1f5..bc21a6f2e4 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -18,14 +18,12 @@
 import shutil
 import tempfile
 import time
-import uuid
 from math import ceil
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Dict, List, Optional, Tuple, Union
 
 import grpc
 import pandas as pd
 import pyarrow as pa
-from google.protobuf.timestamp_pb2 import Timestamp
 from pyarrow import parquet as pq
 
 from feast.config import Config
@@ -44,8 +42,6 @@
 from feast.core.CoreService_pb2 import (
     ApplyEntityRequest,
     ApplyEntityResponse,
-    ApplyFeatureSetRequest,
-    ApplyFeatureSetResponse,
     ApplyFeatureTableRequest,
     ApplyFeatureTableResponse,
     ArchiveProjectRequest,
@@ -55,49 +51,28 @@
     GetEntityRequest,
     GetEntityResponse,
     GetFeastCoreVersionRequest,
-    GetFeatureSetRequest,
-    GetFeatureSetResponse,
-    GetFeatureStatisticsRequest,
     GetFeatureTableRequest,
     GetFeatureTableResponse,
     ListEntitiesRequest,
     ListEntitiesResponse,
-    ListFeatureSetsRequest,
-    ListFeatureSetsResponse,
-    ListFeaturesRequest,
-    ListFeaturesResponse,
     ListFeatureTablesRequest,
     ListFeatureTablesResponse,
     ListProjectsRequest,
     ListProjectsResponse,
 )
 from feast.core.CoreService_pb2_grpc import CoreServiceStub
-from feast.core.FeatureSet_pb2 import FeatureSetStatus
-from feast.entity import EntityV2
-from feast.feature import Feature, FeatureRef
-from feast.feature_set import FeatureSet
+from feast.data_source import SourceType
+from feast.entity import Entity
 from feast.feature_table import FeatureTable
 from feast.grpc import auth as feast_auth
 from feast.grpc.grpc import create_grpc_channel
-from feast.job import RetrievalJob
-from feast.loaders.abstract_producer import get_producer
-from feast.loaders.file import export_source_to_staging_location
-from feast.loaders.ingest import KAFKA_CHUNK_PRODUCTION_TIMEOUT, get_feature_row_chunks
-from feast.online_response import OnlineResponse
-from feast.serving.ServingService_pb2 import (
-    DataFormat,
-    DatasetSource,
-    FeastServingType,
-    FeatureReference,
-    GetBatchFeaturesRequest,
-    GetFeastServingInfoRequest,
-    GetFeastServingInfoResponse,
-    GetOnlineFeaturesRequest,
+from feast.loaders.ingest import (
+    BATCH_INGESTION_PRODUCTION_TIMEOUT,
+    check_field_mappings,
 )
+from feast.serving.ServingService_pb2 import GetFeastServingInfoRequest
 from feast.serving.ServingService_pb2_grpc import ServingServiceStub
-from feast.type_map import _python_value_to_proto_value, python_type_to_feast_value_type
-from feast.types.Value_pb2 import Value as Value
-from tensorflow_metadata.proto.v0 import statistics_pb2
+from feast.staging.storage_client import get_staging_client
 
 _logger = logging.getLogger(__name__)
 
@@ -368,9 +343,7 @@ def archive_project(self, project):
         if self._project == project:
             self._project = FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
 
-    def apply_entity(
-        self, entities: Union[List[EntityV2], EntityV2], project: str = None
-    ):
+    def apply_entity(self, entities: Union[List[Entity], Entity], project: str = None):
         """
         Idempotently registers entities with Feast Core. Either a single
         entity or a list can be provided.
@@ -380,11 +353,11 @@ def apply_entity(
 
         Examples:
             >>> from feast import Client
-            >>> from feast.entity import EntityV2
+            >>> from feast.entity import Entity
             >>> from feast.value_type import ValueType
             >>>
             >>> feast_client = Client(core_url="localhost:6565")
-            >>> entity = EntityV2(
+            >>> entity = Entity(
             >>>     name="driver_entity",
             >>>     description="Driver entity for car rides",
             >>>     value_type=ValueType.STRING,
@@ -401,12 +374,12 @@ def apply_entity(
         if not isinstance(entities, list):
             entities = [entities]
         for entity in entities:
-            if isinstance(entity, EntityV2):
+            if isinstance(entity, Entity):
                 self._apply_entity(project, entity)  # type: ignore
                 continue
             raise ValueError(f"Could not determine entity type to apply {entity}")
 
-    def _apply_entity(self, project: str, entity: EntityV2):
+    def _apply_entity(self, project: str, entity: Entity):
         """
         Registers a single entity with Feast
 
@@ -428,14 +401,14 @@ def _apply_entity(self, project: str, entity: EntityV2):
             raise grpc.RpcError(e.details())
 
         # Extract the returned entity
-        applied_entity = EntityV2.from_proto(apply_entity_response.entity)
+        applied_entity = Entity.from_proto(apply_entity_response.entity)
 
         # Deep copy from the returned entity to the local entity
         entity._update_from_entity(applied_entity)
 
     def list_entities(
         self, project: str = None, labels: Dict[str, str] = dict()
-    ) -> List[EntityV2]:
+    ) -> List[Entity]:
         """
         Retrieve a list of entities from Feast Core
 
@@ -460,12 +433,12 @@ def list_entities(
         # Extract entities and return
         entities = []
         for entity_proto in entity_protos.entities:
-            entity = EntityV2.from_proto(entity_proto)
+            entity = Entity.from_proto(entity_proto)
             entity._client = self
             entities.append(entity)
         return entities
 
-    def get_entity(self, name: str, project: str = None) -> Union[EntityV2, None]:
+    def get_entity(self, name: str, project: str = None) -> Union[Entity, None]:
         """
         Retrieves an entity.
 
@@ -488,7 +461,7 @@ def get_entity(self, name: str, project: str = None) -> Union[EntityV2, None]:
             )  # type: GetEntityResponse
         except grpc.RpcError as e:
             raise grpc.RpcError(e.details())
-        entity = EntityV2.from_proto(get_entity_response.entity)
+        entity = Entity.from_proto(get_entity_response.entity)
 
         return entity
 
@@ -605,370 +578,21 @@ def get_feature_table(
             raise grpc.RpcError(e.details())
         return FeatureTable.from_proto(get_feature_table_response.table)
 
-    def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]):
-        """
-        Idempotently registers feature set(s) with Feast Core. Either a single
-        feature set or a list can be provided.
-
-        Args:
-            feature_sets: List of feature sets that will be registered
-        """
-        if not isinstance(feature_sets, list):
-            feature_sets = [feature_sets]
-        for feature_set in feature_sets:
-            if isinstance(feature_set, FeatureSet):
-                self._apply_feature_set(feature_set)
-                continue
-            raise ValueError(
-                f"Could not determine feature set type to apply {feature_set}"
-            )
-
-    def _apply_feature_set(self, feature_set: FeatureSet):
-        """
-        Registers a single feature set with Feast
-
-        Args:
-            feature_set: Feature set that will be registered
-        """
-
-        feature_set.is_valid()
-        feature_set_proto = feature_set.to_proto()
-        if len(feature_set_proto.spec.project) == 0:
-            if self.project is not None:
-                feature_set_proto.spec.project = self.project
-
-        # Convert the feature set to a request and send to Feast Core
-        try:
-            apply_fs_response = self._core_service.ApplyFeatureSet(
-                ApplyFeatureSetRequest(feature_set=feature_set_proto),
-                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
-                metadata=self._get_grpc_metadata(),
-            )  # type: ApplyFeatureSetResponse
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-
-        # Extract the returned feature set
-        applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set)
-
-        # If the feature set has changed, update the local copy
-        if apply_fs_response.status == ApplyFeatureSetResponse.Status.CREATED:
-            print(f'Feature set created: "{applied_fs.name}"')
-
-        if apply_fs_response.status == ApplyFeatureSetResponse.Status.UPDATED:
-            print(f'Feature set updated: "{applied_fs.name}"')
-
-        # If no change has been applied, do nothing
-        if apply_fs_response.status == ApplyFeatureSetResponse.Status.NO_CHANGE:
-            print(f"No change detected or applied: {feature_set.name}")
-
-        # Deep copy from the returned feature set to the local feature set
-        feature_set._update_from_feature_set(applied_fs)
-
-    def list_feature_sets(
-        self, project: str = None, name: str = None, labels: Dict[str, str] = dict()
-    ) -> List[FeatureSet]:
-        """
-        Retrieve a list of feature sets from Feast Core
-
-        Args:
-            project: Filter feature sets based on project name
-            name: Filter feature sets based on feature set name
-
-        Returns:
-            List of feature sets
-        """
-
-        if project is None:
-            if self.project is not None:
-                project = self.project
-            else:
-                project = "*"
-
-        if name is None:
-            name = "*"
-
-        filter = ListFeatureSetsRequest.Filter(
-            project=project, feature_set_name=name, labels=labels
-        )
-
-        # Get latest feature sets from Feast Core
-        feature_set_protos = self._core_service.ListFeatureSets(
-            ListFeatureSetsRequest(filter=filter), metadata=self._get_grpc_metadata(),
-        )  # type: ListFeatureSetsResponse
-
-        # Extract feature sets and return
-        feature_sets = []
-        for feature_set_proto in feature_set_protos.feature_sets:
-            feature_set = FeatureSet.from_proto(feature_set_proto)
-            feature_set._client = self
-            feature_sets.append(feature_set)
-        return feature_sets
-
-    def get_feature_set(
-        self, name: str, project: str = None
-    ) -> Union[FeatureSet, None]:
-        """
-        Retrieves a feature set.
-
-        Args:
-            project: Feast project that this feature set belongs to
-            name: Name of feature set
-
-        Returns:
-            Returns either the specified feature set, or raises an exception if
-            none is found
-        """
-
-        if project is None:
-            if self.project is not None:
-                project = self.project
-            else:
-                raise ValueError("No project has been configured.")
-
-        try:
-            get_feature_set_response = self._core_service.GetFeatureSet(
-                GetFeatureSetRequest(project=project, name=name.strip()),
-                metadata=self._get_grpc_metadata(),
-            )  # type: GetFeatureSetResponse
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-        return FeatureSet.from_proto(get_feature_set_response.feature_set)
-
-    def list_features_by_ref(
-        self,
-        project: str = None,
-        entities: List[str] = list(),
-        labels: Dict[str, str] = dict(),
-    ) -> Dict[FeatureRef, Feature]:
-        """
-        Returns a list of features based on filters provided.
-
-        Args:
-            project: Feast project that these features belongs to
-            entities: Feast entity that these features are associated with
-            labels: Feast labels that these features are associated with
-
-        Returns:
-            Dictionary of <feature references: features>
-
-        Examples:
-            >>> from feast import Client
-            >>>
-            >>> feast_client = Client(core_url="localhost:6565")
-            >>> features = list_features_by_ref(project="test_project", entities=["driver_id"], labels={"key1":"val1","key2":"val2"})
-            >>> print(features)
-        """
-        if project is None:
-            if self.project is not None:
-                project = self.project
-            else:
-                project = "default"
-
-        filter = ListFeaturesRequest.Filter(
-            project=project, entities=entities, labels=labels
-        )
-
-        feature_protos = self._core_service.ListFeatures(
-            ListFeaturesRequest(filter=filter), metadata=self._get_grpc_metadata(),
-        )  # type: ListFeaturesResponse
-
-        features_dict = {}
-        for ref_str, feature_proto in feature_protos.features.items():
-            feature_ref = FeatureRef.from_str(ref_str, ignore_project=True)
-            feature = Feature.from_proto(feature_proto)
-            features_dict[feature_ref] = feature
-
-        return features_dict
-
-    def get_historical_features(
-        self,
-        feature_refs: List[str],
-        entity_rows: Union[pd.DataFrame, str],
-        compute_statistics: bool = False,
-        project: str = None,
-    ) -> RetrievalJob:
-        """
-        Retrieves historical features from a Feast Serving deployment.
-
-        Args:
-            feature_refs: List of feature references that will be returned for each entity.
-                Each feature reference should have the following format:
-                "feature_set:feature" where "feature_set" & "feature" refer to
-                the feature and feature set names respectively.
-                Only the feature name is required.
-            entity_rows (Union[pd.DataFrame, str]):
-                Pandas dataframe containing entities and a 'datetime' column.
-                Each entity in a feature set must be present as a column in this
-                dataframe. The datetime column must contain timestamps in
-                datetime64 format.
-            compute_statistics (bool):
-                Indicates whether Feast should compute statistics over the retrieved dataset.
-            project: Specifies the project which contain the FeatureSets
-                which the requested features belong to.
-
-        Returns:
-            feast.job.RetrievalJob:
-                Returns a retrival job object that can be used to monitor retrieval
-                progress asynchronously, and can be used to materialize the
-                results.
-
-        Examples:
-            >>> from feast import Client
-            >>> from datetime import datetime
-            >>>
-            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
-            >>> feature_refs = ["my_project/bookings_7d", "booking_14d"]
-            >>> entity_rows = pd.DataFrame(
-            >>>         {
-            >>>            "datetime": [pd.datetime.now() for _ in range(3)],
-            >>>            "customer": [1001, 1002, 1003],
-            >>>         }
-            >>>     )
-            >>> feature_retrieval_job = feast_client.get_historical_features(
-            >>>     feature_refs, entity_rows, project="my_project")
-            >>> df = feature_retrieval_job.to_dataframe()
-            >>> print(df)
-        """
-
-        # Retrieve serving information to determine store type and
-        # staging location
-        serving_info = self._serving_service.GetFeastServingInfo(
-            GetFeastServingInfoRequest(),
-            timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
-            metadata=self._get_grpc_metadata(),
-        )  # type: GetFeastServingInfoResponse
-
-        if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH:
-            raise Exception(
-                f'You are connected to a store "{self.serving_url}" which '
-                f"does not support batch retrieval "
-            )
-
-        if isinstance(entity_rows, pd.DataFrame):
-            # Pandas DataFrame detected
-
-            # Remove timezone from datetime column
-            if isinstance(
-                entity_rows["datetime"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype
-            ):
-                entity_rows["datetime"] = pd.DatetimeIndex(
-                    entity_rows["datetime"]
-                ).tz_localize(None)
-        elif isinstance(entity_rows, str):
-            # String based source
-            if not entity_rows.endswith((".avro", "*")):
-                raise Exception(
-                    "Only .avro and wildcard paths are accepted as entity_rows"
-                )
-        else:
-            raise Exception(
-                f"Only pandas.DataFrame and str types are allowed"
-                f" as entity_rows, but got {type(entity_rows)}."
-            )
-
-        # Export and upload entity row DataFrame to staging location
-        # provided by Feast
-        staged_files = export_source_to_staging_location(
-            entity_rows, serving_info.job_staging_location
-        )  # type: List[str]
-        request = GetBatchFeaturesRequest(
-            features=_build_feature_references(
-                feature_ref_strs=feature_refs,
-                project=project if project is not None else self.project,
-            ),
-            dataset_source=DatasetSource(
-                file_source=DatasetSource.FileSource(
-                    file_uris=staged_files, data_format=DataFormat.DATA_FORMAT_AVRO
-                )
-            ),
-            compute_statistics=compute_statistics,
-        )
-
-        # Retrieve Feast Job object to manage life cycle of retrieval
-        try:
-            response = self._serving_service.GetBatchFeatures(
-                request, metadata=self._get_grpc_metadata()
-            )
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-
-        return RetrievalJob(
-            response.job,
-            self._serving_service,
-            auth_metadata_plugin=self._auth_metadata,
-        )
-
-    def get_online_features(
-        self,
-        feature_refs: List[str],
-        entity_rows: List[Dict[str, Any]],
-        project: Optional[str] = None,
-        omit_entities: bool = False,
-    ) -> OnlineResponse:
-        """
-        Retrieves the latest online feature data from Feast Serving
-
-        Args:
-            feature_refs: List of feature references that will be returned for each entity.
-                Each feature reference should have the following format:
-                "feature_set:feature" where "feature_set" & "feature" refer to
-                the feature and feature set names respectively.
-                Only the feature name is required.
-            entity_rows: A list of dictionaries where each key is an entity and each value is
-                feast.types.Value or Python native form.
-            project: Optionally specify the the project override. If specified, uses given project for retrieval.
-                Overrides the projects specified in Feature References if also are specified.
-            omit_entities: If true will omit entity values in the returned feature data.
-        Returns:
-            GetOnlineFeaturesResponse containing the feature data in records.
-            Each EntityRow provided will yield one record, which contains
-            data fields with data value and field status metadata (if included).
-
-        Examples:
-            >>> from feast import Client
-            >>>
-            >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566")
-            >>> feature_refs = ["daily_transactions"]
-            >>> entity_rows = [{"customer_id": 0},{"customer_id": 1}]
-            >>>
-            >>> online_response = feast_client.get_online_features(
-            >>>     feature_refs, entity_rows, project="my_project")
-            >>> online_response_dict = online_response.to_dict()
-            >>> print(online_response_dict)
-            {'daily_transactions': [1.1,1.2], 'customer_id': [0,1]}
-        """
-
-        try:
-            response = self._serving_service.GetOnlineFeatures(
-                GetOnlineFeaturesRequest(
-                    omit_entities_in_response=omit_entities,
-                    features=_build_feature_references(feature_ref_strs=feature_refs),
-                    entity_rows=_infer_online_entity_rows(entity_rows),
-                    project=project if project is not None else self.project,
-                ),
-                metadata=self._get_grpc_metadata(),
-            )
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-
-        response = OnlineResponse(response)
-        return response
-
     def ingest(
         self,
-        feature_set: Union[str, FeatureSet],
+        feature_table: Union[str, FeatureTable],
         source: Union[pd.DataFrame, str],
+        project: str = None,
         chunk_size: int = 10000,
         max_workers: int = max(CPU_COUNT - 1, 1),
-        disable_progress_bar: bool = False,
-        timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT,
-    ) -> str:
+        timeout: int = BATCH_INGESTION_PRODUCTION_TIMEOUT,
+    ) -> None:
         """
-        Loads feature data into Feast for a specific feature set.
+        Batch load feature data into batch source of a specific feature table.
 
         Args:
-            feature_set (typing.Union[str, feast.feature_set.FeatureSet]):
-                Feature set object or the string name of the feature set
+            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
+                Feature table object or the string name of the feature table
 
             source (typing.Union[pd.DataFrame, str]):
                 Either a file path or Pandas Dataframe to ingest into Feast
@@ -977,27 +601,22 @@ def ingest(
                     * csv
                     * json
 
+            project: Feast project to locate FeatureTable
+
             chunk_size (int):
                 Amount of rows to load and ingest at a time.
 
             max_workers (int):
                 Number of worker processes to use to encode values.
 
-            disable_progress_bar (bool):
-                Disable printing of progress statistics.
-
             timeout (int):
                 Timeout in seconds to wait for completion.
 
-        Returns:
-            str:
-                ingestion id for this dataset
-
         Examples:
             >>> from feast import Client
             >>>
             >>> client = Client(core_url="localhost:6565")
-            >>> fs_df = pd.DataFrame(
+            >>> ft_df = pd.DataFrame(
             >>>         {
             >>>            "datetime": [pd.datetime.now()],
             >>>            "driver": [1001],
@@ -1005,169 +624,105 @@ def ingest(
             >>>         }
             >>>     )
             >>> client.set_project("project1")
-            >>> client.ingest("driver", fs_df)
             >>>
-            >>> driver_fs = client.get_feature_set(name="driver", project="project1")
-            >>> client.ingest(driver_fs, fs_df)
+            >>> driver_ft = client.get_feature_table(name="driver")
+            >>> client.ingest(driver_ft, ft_df)
         """
 
-        if isinstance(feature_set, FeatureSet):
-            name = feature_set.name
-            project = feature_set.project
-        elif isinstance(feature_set, str):
-            if self.project is not None:
-                project = self.project
-            else:
-                project = "default"
-            name = feature_set
-        else:
-            raise Exception("Feature set name must be provided")
+        if project is None:
+            project = self.project
+        if isinstance(feature_table, FeatureTable):
+            name = feature_table.name
 
         # Read table and get row count
-        dir_path, dest_path = _read_table_from_source(source, chunk_size, max_workers)
-
-        pq_file = pq.ParquetFile(dest_path)
-
-        row_count = pq_file.metadata.num_rows
+        dir_path, dest_path, column_names = _read_table_from_source(
+            source, chunk_size, max_workers
+        )
 
         current_time = time.time()
-
-        print("Waiting for feature set to be ready for ingestion...")
+        print("Waiting for feature table to be ready for ingestion...")
         while True:
             if timeout is not None and time.time() - current_time >= timeout:
-                raise TimeoutError("Timed out waiting for feature set to be ready")
-            fetched_feature_set: Optional[FeatureSet] = self.get_feature_set(
+                raise TimeoutError("Timed out waiting for feature table to be ready")
+            fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
                 name, project
             )
-            if (
-                fetched_feature_set is not None
-                and fetched_feature_set.status == FeatureSetStatus.STATUS_READY
-            ):
-                feature_set = fetched_feature_set
+            if fetched_feature_table is not None:
+                feature_table = fetched_feature_table
                 break
             time.sleep(3)
 
         if timeout is not None:
             timeout = timeout - int(time.time() - current_time)
 
-        try:
-            # Kafka configs
-            brokers = feature_set.get_kafka_source_brokers()
-            topic = feature_set.get_kafka_source_topic()
-            producer = get_producer(brokers, row_count, disable_progress_bar)
-
-            # Loop optimization declarations
-            produce = producer.produce
-            flush = producer.flush
-            ingestion_id = _generate_ingestion_id(feature_set)
-
-            # Transform and push data to Kafka
-            if feature_set.source.source_type == "Kafka":
-                for chunk in get_feature_row_chunks(
-                    file=dest_path,
-                    row_groups=list(range(pq_file.num_row_groups)),
-                    fs=feature_set,
-                    ingestion_id=ingestion_id,
-                    max_workers=max_workers,
-                ):
-
-                    # Push FeatureRow one chunk at a time to kafka
-                    for serialized_row in chunk:
-                        produce(topic=topic, value=serialized_row)
-
-                    # Force a flush after each chunk
-                    flush(timeout=timeout)
-
-                    # Remove chunk from memory
-                    del chunk
-
-            else:
-                raise Exception(
-                    f"Could not determine source type for feature set "
-                    f'"{feature_set.name}" with source type '
-                    f'"{feature_set.source.source_type}"'
-                )
-
-            # Print ingestion statistics
-            producer.print_results()
-        finally:
-            # Remove parquet file(s) that were created earlier
-            print("Removing temporary file(s)...")
-            shutil.rmtree(dir_path)
-
-        return ingestion_id
+        # Check 1) Only parquet file format for FeatureTable batch source is supported
+        if (
+            feature_table.batch_source
+            and SourceType(feature_table.batch_source.type).name == "BATCH_FILE"
+            and "".join(
+                feature_table.batch_source.file_options.file_format.split()
+            ).lower()
+            != "parquet"
+        ):
+            raise Exception(
+                f"No suitable batch source found for FeatureTable, {name}."
+                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
+            )
 
-    def get_statistics(
-        self,
-        feature_set_id: str,
-        store: str,
-        features: List[str] = [],
-        ingestion_ids: Optional[List[str]] = None,
-        start_date: Optional[datetime.datetime] = None,
-        end_date: Optional[datetime.datetime] = None,
-        force_refresh: bool = False,
-        project: Optional[str] = None,
-    ) -> statistics_pb2.DatasetFeatureStatisticsList:
-        """
-        Retrieves the feature featureStatistics computed over the data in the batch
-        stores.
+        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
+        check_field_mappings(
+            column_names, name, feature_table.batch_source.field_mapping
+        )
 
-        Args:
-            feature_set_id: Feature set id to retrieve batch featureStatistics for. If project
-                is not provided, the default ("default") will be used.
-            store: Name of the store to retrieve feature featureStatistics over. This
-                store must be a historical store.
-            features: Optional list of feature names to filter from the results.
-            ingestion_ids: Optional list of dataset Ids by which to filter data
-                before retrieving featureStatistics. Cannot be used with start_date
-                and end_date.
-                If multiple dataset ids are provided, unaggregatable featureStatistics
-                will be dropped.
-            start_date: Optional start date over which to filter statistical data.
-                Data from this date will be included.
-                Cannot be used with dataset_ids. If the provided period spans
-                multiple days, unaggregatable featureStatistics will be dropped.
-            end_date: Optional end date over which to filter statistical data.
-                Data from this data will not be included.
-                Cannot be used with dataset_ids. If the provided period spans
-                multiple days, unaggregatable featureStatistics will be dropped.
-            force_refresh: Setting this flag to true will force a recalculation
-                of featureStatistics and overwrite results currently in the cache, if any.
-            project: Manual override for default project.
+        batch_source_type = SourceType(feature_table.batch_source.type).name
 
-        Returns:
-           Returns a tensorflow DatasetFeatureStatisticsList containing TFDV featureStatistics.
-        """
+        try:
+            if batch_source_type == "BATCH_FILE":
+                from urllib.parse import urlparse
 
-        if ingestion_ids is not None and (
-            start_date is not None or end_date is not None
-        ):
-            raise ValueError(
-                "Only one of dataset_id or [start_date, end_date] can be provided."
-            )
+                file_url = feature_table.batch_source.file_options.file_url[:-1]
+                uri = urlparse(file_url)
+                staging_client = get_staging_client(uri.scheme)
 
-        if project != "" and "/" not in feature_set_id:
-            feature_set_id = f"{project}/{feature_set_id}"
+                file_name = dest_path.split("/")[-1]
+                date_today = datetime.datetime.today().strftime("%Y-%m-%d")
 
-        request = GetFeatureStatisticsRequest(
-            feature_set_id=feature_set_id,
-            features=features,
-            store=store,
-            force_refresh=force_refresh,
-        )
-        if ingestion_ids is not None:
-            request.ingestion_ids.extend(ingestion_ids)
-        else:
-            if start_date is not None:
-                request.start_date.CopyFrom(
-                    Timestamp(seconds=int(start_date.timestamp()))
+                staging_client.upload_file(
+                    dest_path,
+                    uri.hostname,
+                    str(uri.path).strip("/") + "/" + f"date={date_today}/" + file_name,
                 )
-            if end_date is not None:
-                request.end_date.CopyFrom(Timestamp(seconds=int(end_date.timestamp())))
+            if batch_source_type == "BATCH_BIGQUERY":
+                from google.cloud import bigquery
+
+                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
+                gcp_project, dataset_table = bq_table_ref.split(":")
+                dataset, table = dataset_table.split(".")
+
+                client = bigquery.Client(project=gcp_project)
+
+                table_ref = client.dataset(dataset).table(table)
+                job_config = bigquery.LoadJobConfig()
+                job_config.source_format = bigquery.SourceFormat.PARQUET
+
+                # Check for date partitioning column in FeatureTable spec
+                if feature_table.batch_source.date_partition_column:
+                    time_partitioning_obj = bigquery.table.TimePartitioning(
+                        field=feature_table.batch_source.date_partition_column
+                    )
+                    job_config.time_partitioning = time_partitioning_obj
+                with open(dest_path, "rb") as source_file:
+                    client.load_table_from_file(
+                        source_file, table_ref, job_config=job_config
+                    )
+        finally:
+            # Remove parquet file(s) that were created earlier
+            print("Removing temporary file(s)...")
+            shutil.rmtree(dir_path)
 
-        return self._core_service.GetFeatureStatistics(
-            request
-        ).dataset_feature_statistics_list
+        print(
+            f"Data has been successfully ingested into FeatureTable {batch_source_type} batch source."
+        )
 
     def _get_grpc_metadata(self):
         """
@@ -1181,85 +736,9 @@ def _get_grpc_metadata(self):
         return ()
 
 
-def _infer_online_entity_rows(
-    entity_rows: List[Dict[str, Any]],
-) -> List[GetOnlineFeaturesRequest.EntityRow]:
-    """
-    Builds a list of EntityRow protos from Python native type format passed by user.
-
-    Args:
-        entity_rows: A list of dictionaries where each key is an entity and each value is
-            feast.types.Value or Python native form.
-
-    Returns:
-        A list of EntityRow protos parsed from args.
-    """
-    entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows)
-    entity_row_list = []
-    entity_type_map = dict()
-
-    for entity in entity_rows_dicts:
-        fields = {}
-        for key, value in entity.items():
-            # Allow for feast.types.Value
-            if isinstance(value, Value):
-                proto_value = value
-            else:
-                # Infer the specific type for this row
-                current_dtype = python_type_to_feast_value_type(name=key, value=value)
-
-                if key not in entity_type_map:
-                    entity_type_map[key] = current_dtype
-                else:
-                    if current_dtype != entity_type_map[key]:
-                        raise TypeError(
-                            f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. "
-                        )
-                proto_value = _python_value_to_proto_value(current_dtype, value)
-            fields[key] = proto_value
-        entity_row_list.append(GetOnlineFeaturesRequest.EntityRow(fields=fields))
-    return entity_row_list
-
-
-def _build_feature_references(
-    feature_ref_strs: List[str], project: Optional[str] = None
-) -> List[FeatureReference]:
-    """
-    Builds a list of FeatureReference protos from string feature set references
-
-    Args:
-        feature_ref_strs: List of string feature references
-        project: Optionally specifies the project in the parsed feature references.
-
-    Returns:
-        A list of FeatureReference protos parsed from args.
-    """
-    feature_refs = [FeatureRef.from_str(ref_str) for ref_str in feature_ref_strs]
-    feature_ref_protos = [ref.to_proto() for ref in feature_refs]
-    # apply project if specified
-    if project is not None:
-        for feature_ref_proto in feature_ref_protos:
-            feature_ref_proto.project = project
-    return feature_ref_protos
-
-
-def _generate_ingestion_id(feature_set: FeatureSet) -> str:
-    """
-    Generates a UUID from the feature set name, version, and the current time.
-
-    Args:
-        feature_set: Feature set of the dataset to be ingested.
-
-    Returns:
-        UUID unique to current time and the feature set provided.
-    """
-    uuid_str = f"{feature_set.name}_{int(time.time())}"
-    return str(uuid.uuid3(uuid.NAMESPACE_DNS, uuid_str))
-
-
 def _read_table_from_source(
     source: Union[pd.DataFrame, str], chunk_size: int, max_workers: int
-) -> Tuple[str, str]:
+) -> Tuple[str, str, List[str]]:
     """
     Infers a data source type (path or Pandas DataFrame) and reads it in as
     a PyArrow Table.
@@ -1283,9 +762,9 @@ def _read_table_from_source(
             Amount of rows to load and ingest at a time.
 
     Returns:
-        Tuple[str, str]:
-            Tuple containing parent directory path and destination path to
-            parquet file.
+        Tuple[str, str, List[str]]:
+            Tuple containing parent directory path, destination path to
+            parquet file and column names of pyarrow table.
     """
 
     # Pandas DataFrame detected
@@ -1320,7 +799,9 @@ def _read_table_from_source(
     row_group_size = min(ceil(table.num_rows / max_workers), chunk_size)
     pq.write_table(table=table, where=dest_path, row_group_size=row_group_size)
 
+    column_names = table.column_names
+
     # Remove table from memory
     del table
 
-    return dir_path, dest_path
+    return dir_path, dest_path, column_names
diff --git a/sdk/python/feast/contrib/__init__.py b/sdk/python/feast/contrib/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/sdk/python/feast/contrib/job_controller/__init__.py b/sdk/python/feast/contrib/job_controller/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/sdk/python/feast/contrib/job_controller/client.py b/sdk/python/feast/contrib/job_controller/client.py
deleted file mode 100644
index 9a9ffbcc84..0000000000
--- a/sdk/python/feast/contrib/job_controller/client.py
+++ /dev/null
@@ -1,145 +0,0 @@
-from typing import Optional
-
-import grpc
-
-from feast.config import Config
-from feast.constants import (
-    CONFIG_CORE_ENABLE_SSL_KEY,
-    CONFIG_CORE_SERVER_SSL_CERT_KEY,
-    CONFIG_ENABLE_AUTH_KEY,
-    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY,
-    CONFIG_JOB_CONTROLLER_SERVER_KEY,
-)
-from feast.contrib.job_controller.job import IngestJob
-from feast.core.CoreService_pb2 import (
-    ListIngestionJobsRequest,
-    RestartIngestionJobRequest,
-    StopIngestionJobRequest,
-)
-from feast.core.CoreService_pb2_grpc import JobControllerServiceStub
-from feast.feature_set import FeatureSetRef
-from feast.grpc import auth as feast_auth
-from feast.grpc.grpc import create_grpc_channel
-
-
-class Client:
-    """
-    JobController Client: used internally to manage Ingestion Jobs
-    """
-
-    def __init__(self, options=None, **kwargs):
-        """
-        JobControllerClient should be initialized with
-            jobcontroller_url: Feast JobController address
-
-        :param options: Configuration options to initialize client with
-        :param kwargs: options in kwargs style
-        """
-        if options is None:
-            options = dict()
-        self._config = Config(options={**options, **kwargs})
-
-        self._jobcontroller_service_stub: Optional[JobControllerServiceStub] = None
-        self._auth_metadata: Optional[grpc.AuthMetadataPlugin] = None
-
-        # Configure Auth Metadata Plugin if auth is enabled
-        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY):
-            self._auth_metadata = feast_auth.get_auth_metadata_plugin(self._config)
-
-    @property
-    def _jobcontroller_service(self):
-        if not self._jobcontroller_service_stub:
-            channel = create_grpc_channel(
-                url=self._config.get(CONFIG_JOB_CONTROLLER_SERVER_KEY),
-                enable_ssl=self._config.getboolean(CONFIG_CORE_ENABLE_SSL_KEY),
-                enable_auth=self._config.getboolean(CONFIG_ENABLE_AUTH_KEY),
-                ssl_server_cert_path=self._config.get(CONFIG_CORE_SERVER_SSL_CERT_KEY),
-                auth_metadata_plugin=self._auth_metadata,
-                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
-            )
-            self._jobcontroller_service_stub = JobControllerServiceStub(channel)
-
-        return self._jobcontroller_service_stub
-
-    def list_ingest_jobs(
-        self,
-        job_id: str = None,
-        feature_set_ref: FeatureSetRef = None,
-        store_name: str = None,
-    ):
-        """
-        List the ingestion jobs currently registered in Feast, with optional filters.
-        Provides detailed metadata about each ingestion job.
-
-        Args:
-            job_id: Select specific ingestion job with the given job_id
-            feature_set_ref: Filter ingestion jobs by target feature set (via reference)
-            store_name: Filter ingestion jobs by target feast store's name
-
-        Returns:
-            List of IngestJobs matching the given filters
-        """
-        # construct list request
-        feature_set_ref_proto = None
-        if feature_set_ref:
-            feature_set_ref_proto = feature_set_ref.to_proto()
-        list_filter = ListIngestionJobsRequest.Filter(
-            id=job_id,
-            feature_set_reference=feature_set_ref_proto,
-            store_name=store_name,
-        )
-        request = ListIngestionJobsRequest(filter=list_filter)
-        # make list request & unpack response
-        response = self._jobcontroller_service.ListIngestionJobs(request, metadata=self._get_grpc_metadata(),)  # type: ignore
-        ingest_jobs = [
-            IngestJob(proto, self._jobcontroller_service, auth_metadata_plugin=self._auth_metadata) for proto in response.jobs  # type: ignore
-        ]
-
-        return ingest_jobs
-
-    def restart_ingest_job(self, job: IngestJob):
-        """
-        Restart ingestion job currently registered in Feast.
-        NOTE: Data might be lost during the restart for some job runners.
-        Does not support stopping a job in a transitional (ie pending, suspending, aborting),
-        terminal state (ie suspended or aborted) or unknown status
-
-        Args:
-            job: IngestJob to restart
-        """
-        request = RestartIngestionJobRequest(id=job.id)
-        try:
-            self._jobcontroller_service.RestartIngestionJob(
-                request, metadata=self._get_grpc_metadata(),
-            )  # type: ignore
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-
-    def stop_ingest_job(self, job: IngestJob):
-        """
-        Stop ingestion job currently resgistered in Feast
-        Does nothing if the target job if already in a terminal state (ie suspended or aborted).
-        Does not support stopping a job in a transitional (ie pending, suspending, aborting)
-        or in a unknown status
-
-        Args:
-            job: IngestJob to restart
-        """
-        request = StopIngestionJobRequest(id=job.id)
-        try:
-            self._jobcontroller_service.StopIngestionJob(
-                request, metadata=self._get_grpc_metadata(),
-            )  # type: ignore
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-
-    def _get_grpc_metadata(self):
-        """
-        Returns a metadata tuple to attach to gRPC requests. This is primarily
-        used when authentication is enabled but SSL/TLS is disabled.
-
-        Returns: Tuple of metadata to attach to each gRPC call
-        """
-        if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
-            return self._auth_metadata.get_signed_meta()
-        return ()
diff --git a/sdk/python/feast/contrib/job_controller/job.py b/sdk/python/feast/contrib/job_controller/job.py
deleted file mode 100644
index 8f2800cba6..0000000000
--- a/sdk/python/feast/contrib/job_controller/job.py
+++ /dev/null
@@ -1,122 +0,0 @@
-from typing import List
-
-import grpc
-from google.protobuf.json_format import MessageToJson
-
-from feast import Source
-from feast.core.CoreService_pb2 import ListIngestionJobsRequest
-from feast.core.CoreService_pb2_grpc import JobControllerServiceStub
-from feast.core.IngestionJob_pb2 import IngestionJob as IngestJobProto
-from feast.core.IngestionJob_pb2 import IngestionJobStatus
-from feast.core.Store_pb2 import Store
-from feast.feature_set import FeatureSetRef
-from feast.wait import wait_retry_backoff
-
-
-class IngestJob:
-    """
-    Defines a job for feature ingestion in feast.
-    """
-
-    def __init__(
-        self,
-        job_proto: IngestJobProto,
-        core_stub: JobControllerServiceStub,
-        auth_metadata_plugin: grpc.AuthMetadataPlugin = None,
-    ):
-        """
-        Construct a native ingest job from its protobuf version.
-
-        Args:
-        job_proto: Job proto object to construct from.
-        core_stub: stub for Feast CoreService
-        auth_metadata_plugin: plugin to fetch auth metadata
-        """
-        self.proto = job_proto
-        self.core_svc = core_stub
-        self.auth_metadata = auth_metadata_plugin
-
-    def reload(self):
-        """
-        Update this IngestJob with the latest info from Feast
-        """
-        # pull latest proto from feast core
-        response = self.core_svc.ListIngestionJobs(
-            ListIngestionJobsRequest(
-                filter=ListIngestionJobsRequest.Filter(id=self.id)
-            ),
-            metadata=self.auth_metadata.get_signed_meta() if self.auth_metadata else (),
-        )
-        self.proto = response.jobs[0]
-
-    @property
-    def id(self) -> str:
-        """
-        Getter for IngestJob's job id.
-        """
-        return self.proto.id
-
-    @property
-    def external_id(self) -> str:
-        """
-        Getter for IngestJob's external job id.
-        """
-        self.reload()
-        return self.proto.external_id
-
-    @property
-    def status(self) -> IngestionJobStatus:  # type: ignore
-        """
-        Getter for IngestJob's status
-        """
-        self.reload()
-        return self.proto.status
-
-    @property
-    def feature_sets(self) -> List[FeatureSetRef]:
-        """
-        Getter for the IngestJob's feature sets
-        """
-        # convert featureset protos to native objects
-        return [
-            FeatureSetRef.from_proto(fs) for fs in self.proto.feature_set_references
-        ]
-
-    @property
-    def source(self) -> Source:
-        """
-        Getter for the IngestJob's data source.
-        """
-        return Source.from_proto(self.proto.source)
-
-    @property
-    def stores(self) -> List[Store]:
-        """
-        Getter for the IngestJob's target feast store.
-        """
-        return list(self.proto.stores)
-
-    def wait(self, status: IngestionJobStatus, timeout_secs: int = 300):  # type: ignore
-        """
-        Wait for this IngestJob to transtion to the given status.
-        Raises TimeoutError if the wait operation times out.
-
-        Args:
-            status: The IngestionJobStatus to wait for.
-            timeout_secs: Maximum seconds to wait before timing out.
-        """
-        # poll & wait for job status to transition
-        wait_retry_backoff(
-            retry_fn=(lambda: (None, self.status == status)),  # type: ignore
-            timeout_secs=timeout_secs,
-            timeout_msg="Wait for IngestJob's status to transition timed out",
-        )
-
-    def __str__(self):
-        # render the contents of ingest job as human readable string
-        self.reload()
-        return str(MessageToJson(self.proto))
-
-    def __repr__(self):
-        # render the ingest job as human readable string
-        return f"IngestJob<{self.id}>"
diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py
index caa8b22f78..a6e79437af 100644
--- a/sdk/python/feast/entity.py
+++ b/sdk/python/feast/entity.py
@@ -22,42 +22,11 @@
 from feast.core.Entity_pb2 import Entity as EntityV2Proto
 from feast.core.Entity_pb2 import EntityMeta as EntityMetaProto
 from feast.core.Entity_pb2 import EntitySpecV2 as EntitySpecProto
-from feast.core.FeatureSet_pb2 import EntitySpec as EntityProto
-from feast.field import Field
 from feast.loaders import yaml as feast_yaml
-from feast.types import Value_pb2 as ValueTypeProto
 from feast.value_type import ValueType
 
 
-class Entity(Field):
-    """Entity field type"""
-
-    def to_proto(self) -> EntityProto:
-        """
-        Converts Entity to its Protocol Buffer representation
-
-        Returns:
-            Returns EntitySpec object
-        """
-        value_type = ValueTypeProto.ValueType.Enum.Value(self.dtype.name)
-        return EntityProto(name=self.name, value_type=value_type,)
-
-    @classmethod
-    def from_proto(cls, entity_proto: EntityProto):
-        """
-        Creates a Feast Entity object from its Protocol Buffer representation
-
-        Args:
-            entity_proto: EntitySpec protobuf object
-
-        Returns:
-            Entity object
-        """
-        entity = cls(name=entity_proto.name, dtype=ValueType(entity_proto.value_type))
-        return entity
-
-
-class EntityV2:
+class Entity:
     """
     Represents a collection of entities and associated metadata.
     """
@@ -81,8 +50,8 @@ def __init__(
         self._last_updated_timestamp: Optional[Timestamp] = None
 
     def __eq__(self, other):
-        if not isinstance(other, EntityV2):
-            raise TypeError("Comparisons should only involve EntityV2 class objects.")
+        if not isinstance(other, Entity):
+            raise TypeError("Comparisons should only involve Entity class objects.")
 
         if isinstance(self.value_type, int):
             self.value_type = ValueType(self.value_type).name
diff --git a/sdk/python/feast/feature.py b/sdk/python/feast/feature.py
index 054bf5ecc5..4627598d12 100644
--- a/sdk/python/feast/feature.py
+++ b/sdk/python/feast/feature.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The Feast Authors
+# Copyright 2020 The Feast Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,140 +12,83 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from feast.core.FeatureSet_pb2 import FeatureSpec as FeatureProto
-from feast.field import Field
-from feast.serving.ServingService_pb2 import FeatureReference as FeatureRefProto
+from typing import MutableMapping, Optional
+
+from feast.core.Feature_pb2 import FeatureSpecV2 as FeatureSpecProto
 from feast.types import Value_pb2 as ValueTypeProto
 from feast.value_type import ValueType
 
 
-class Feature(Field):
+class Feature:
     """Feature field type"""
 
-    def to_proto(self) -> FeatureProto:
-        """Converts Feature object to its Protocol Buffer representation"""
-        value_type = ValueTypeProto.ValueType.Enum.Value(self.dtype.name)
-        return FeatureProto(
-            name=self.name,
-            value_type=value_type,
-            labels=self.labels,
-            presence=self.presence,
-            group_presence=self.group_presence,
-            shape=self.shape,
-            value_count=self.value_count,
-            domain=self.domain,
-            int_domain=self.int_domain,
-            float_domain=self.float_domain,
-            string_domain=self.string_domain,
-            bool_domain=self.bool_domain,
-            struct_domain=self.struct_domain,
-            natural_language_domain=self.natural_language_domain,
-            image_domain=self.image_domain,
-            mid_domain=self.mid_domain,
-            url_domain=self.url_domain,
-            time_domain=self.time_domain,
-            time_of_day_domain=self.time_of_day_domain,
-        )
+    def __init__(
+        self,
+        name: str,
+        dtype: ValueType,
+        labels: Optional[MutableMapping[str, str]] = None,
+    ):
+        self._name = name
+        if not isinstance(dtype, ValueType):
+            raise ValueError("dtype is not a valid ValueType")
+        self._dtype = dtype
+        if labels is None:
+            self._labels = dict()  # type: MutableMapping
+        else:
+            self._labels = labels
 
-    @classmethod
-    def from_proto(cls, feature_proto: FeatureProto):
+    def __eq__(self, other):
+        if (
+            self.name != other.name
+            or self.dtype != other.dtype
+            or self.labels != other.labels
+        ):
+            return False
+        return True
+
+    @property
+    def name(self):
         """
-
-        Args:
-            feature_proto: FeatureSpec protobuf object
-
-        Returns:
-            Feature object
+        Getter for name of this field
         """
-        feature = cls(
-            name=feature_proto.name,
-            dtype=ValueType(feature_proto.value_type),
-            labels=feature_proto.labels,
-        )
-        feature.update_presence_constraints(feature_proto)
-        feature.update_shape_type(feature_proto)
-        feature.update_domain_info(feature_proto)
-        return feature
+        return self._name
 
-
-class FeatureRef:
-    """ Feature Reference represents a reference to a specific feature.  """
-
-    def __init__(self, name: str, feature_set: str = None):
-        self.proto = FeatureRefProto(name=name, feature_set=feature_set)
-
-    @classmethod
-    def from_proto(cls, proto: FeatureRefProto):
+    @property
+    def dtype(self) -> ValueType:
         """
-        Construct a feature reference from the given FeatureReference proto
-
-        Arg:
-            proto: Protobuf FeatureReference to construct from
-
-        Returns:
-            FeatureRef that refers to the given feature
+        Getter for data type of this field
         """
-        return cls(name=proto.name, feature_set=proto.feature_set)
+        return self._dtype
 
-    @classmethod
-    def from_str(cls, feature_ref_str: str, ignore_project: bool = False):
+    @property
+    def labels(self) -> MutableMapping[str, str]:
         """
-        Parse the given string feature reference into FeatureRef model
-        String feature reference should be in the format feature_set:feature.
-        Where "feature_set" and "name" are the feature_set name and feature name
-        respectively.
-
-        Args:
-            feature_ref_str: String representation of the feature reference
-            ignore_project: Ignore projects in given string feature reference
-                            instead throwing an error
-
-        Returns:
-            FeatureRef that refers to the given feature
+        Getter for labels of this field
         """
-        proto = FeatureRefProto()
-        if "/" in feature_ref_str:
-            if ignore_project:
-                _, feature_ref_str = feature_ref_str.split("/")
-            else:
-                raise ValueError(f"Unsupported feature reference: {feature_ref_str}")
+        return self._labels
 
-        # parse feature set name if specified
-        if ":" in feature_ref_str:
-            proto.feature_set, feature_ref_str = feature_ref_str.split(":")
+    def to_proto(self) -> FeatureSpecProto:
+        """Converts Feature object to its Protocol Buffer representation"""
+        value_type = ValueTypeProto.ValueType.Enum.Value(self.dtype.name)
 
-        proto.name = feature_ref_str
-        return cls.from_proto(proto)
+        return FeatureSpecProto(
+            name=self.name, value_type=value_type, labels=self.labels,
+        )
 
-    def to_proto(self) -> FeatureRefProto:
+    @classmethod
+    def from_proto(cls, feature_proto: FeatureSpecProto):
         """
-        Convert and return this feature set reference to protobuf.
+        Args:
+            feature_proto: FeatureSpecV2 protobuf object
 
         Returns:
-            Protobuf respresentation of this feature set reference.
+            Feature object
         """
-        return self.proto
-
-    def __repr__(self):
-        # return string representation of the reference
-        # [project/][feature_set:]name
-        # in protov3 unset string and int fields default to "" and 0
-        ref_str = ""
-        if len(self.proto.project) > 0:
-            ref_str += self.proto.project + "/"
-        if len(self.proto.feature_set) > 0:
-            ref_str += self.proto.feature_set + ":"
-        ref_str += self.proto.name
-        return ref_str
 
-    def __str__(self):
-        # human readable string of the reference
-        return f"FeatureRef<{self.__repr__()}>"
-
-    def __eq__(self, other):
-        # compare with other feature set
-        return hash(self) == hash(other)
+        feature = cls(
+            name=feature_proto.name,
+            dtype=ValueType(feature_proto.value_type),
+            labels=feature_proto.labels,
+        )
 
-    def __hash__(self):
-        # hash this reference
-        return hash(repr(self))
+        return feature
diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py
deleted file mode 100644
index fd2e17a2eb..0000000000
--- a/sdk/python/feast/feature_set.py
+++ /dev/null
@@ -1,1078 +0,0 @@
-# Copyright 2019 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import warnings
-from collections import OrderedDict
-from typing import Dict, List, MutableMapping, Optional
-
-import pandas as pd
-import pyarrow as pa
-import yaml
-from google.protobuf import json_format
-from google.protobuf.duration_pb2 import Duration
-from google.protobuf.json_format import MessageToDict, MessageToJson
-from google.protobuf.message import Message
-from google.protobuf.timestamp_pb2 import Timestamp
-from pandas.api.types import is_datetime64_ns_dtype
-from pyarrow.lib import TimestampType
-
-from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto
-from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto
-from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto
-from feast.core.FeatureSetReference_pb2 import (
-    FeatureSetReference as FeatureSetReferenceProto,
-)
-from feast.entity import Entity
-from feast.feature import Feature, Field
-from feast.loaders import yaml as feast_yaml
-from feast.source import Source
-from feast.type_map import (
-    DATETIME_COLUMN,
-    pa_to_feast_value_type,
-    python_type_to_feast_value_type,
-)
-from tensorflow_metadata.proto.v0 import schema_pb2
-
-
-class FeatureSet:
-    """
-    Represents a collection of features and associated metadata.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        project: str = None,
-        features: List[Feature] = None,
-        entities: List[Entity] = None,
-        source: Source = None,
-        max_age: Optional[Duration] = None,
-        labels: Optional[MutableMapping[str, str]] = None,
-    ):
-        self._name = name
-        self._project = project
-        self._fields = OrderedDict()  # type: Dict[str, Field]
-        if features is not None:
-            self.features: Optional[List[Feature]] = features
-        if entities is not None:
-            self.entities = entities
-        if source is None:
-            self._source = None
-        else:
-            self._source = source
-        if labels is None:
-            self._labels = OrderedDict()  # type: MutableMapping[str, str]
-        else:
-            self._labels = labels
-        self._max_age = max_age
-        self._status = None
-        self._created_timestamp: Optional[Timestamp] = None
-
-    def __eq__(self, other):
-        if not isinstance(other, FeatureSet):
-            return NotImplemented
-
-        for key in self.fields.keys():
-            if key not in other.fields.keys() or self.fields[key] != other.fields[key]:
-                return False
-
-            if self.fields[key] != other.fields[key]:
-                return False
-
-        if (
-            self.labels != other.labels
-            or self.name != other.name
-            or self.project != other.project
-            or self.max_age != other.max_age
-        ):
-            return False
-
-        if self.source != other.source:
-            return False
-        return True
-
-    def __str__(self):
-        return str(MessageToJson(self.to_proto()))
-
-    def __repr__(self):
-        return FeatureSetRef.from_feature_set(self).__repr__()
-
-    @property
-    def fields(self) -> Dict[str, Field]:
-        """
-        Returns a dict of fields from this feature set
-        """
-        return self._fields
-
-    @property
-    def features(self) -> List[Feature]:
-        """
-        Returns a list of features from this feature set
-        """
-        return [field for field in self._fields.values() if isinstance(field, Feature)]
-
-    @features.setter
-    def features(self, features: List[Feature]):
-        """
-        Sets the active features within this feature set
-
-        Args:
-            features: List of feature objects
-        """
-        for feature in features:
-            if not isinstance(feature, Feature):
-                raise Exception("object type is not a Feature: " + str(type(feature)))
-
-        for key in list(self._fields.keys()):
-            if isinstance(self._fields[key], Feature):
-                del self._fields[key]
-
-        if features is not None:
-            self._add_fields(features)
-
-    @property
-    def entities(self) -> List[Entity]:
-        """
-        Returns list of entities from this feature set
-        """
-        return [field for field in self._fields.values() if isinstance(field, Entity)]
-
-    @entities.setter
-    def entities(self, entities: List[Entity]):
-        """
-        Sets the active entities within this feature set
-
-        Args:
-            entities: List of entities objects
-        """
-        for entity in entities:
-            if not isinstance(entity, Entity):
-                raise Exception("object type is not na Entity: " + str(type(entity)))
-
-        for key in list(self._fields.keys()):
-            if isinstance(self._fields[key], Entity):
-                del self._fields[key]
-
-        if entities is not None:
-            self._add_fields(entities)
-
-    @property
-    def name(self):
-        """
-        Returns the name of this feature set
-        """
-        return self._name
-
-    @name.setter
-    def name(self, name):
-        """
-        Sets the name of this feature set
-        """
-        self._name = name
-
-    @property
-    def project(self):
-        """
-        Returns the project that this feature set belongs to
-        """
-        return self._project
-
-    @project.setter
-    def project(self, project):
-        """
-        Sets the project that this feature set belongs to
-        """
-        self._project = project
-
-    @property
-    def source(self):
-        """
-        Returns the source of this feature set
-        """
-        return self._source
-
-    @source.setter
-    def source(self, source: Source):
-        """
-        Sets the source of this feature set
-        """
-        self._source = source
-
-    @property
-    def max_age(self):
-        """
-        Returns the maximum age of this feature set. This is the total maximum
-        amount of staleness that will be allowed during feature retrieval for
-        each specific feature row that is looked up.
-        """
-        return self._max_age
-
-    @max_age.setter
-    def max_age(self, max_age):
-        """
-        Set the maximum age for this feature set
-        """
-        self._max_age = max_age
-
-    @property
-    def labels(self):
-        """
-        Returns the labels of this feature set. This is the user defined metadata
-        defined as a dictionary.
-        """
-        return self._labels
-
-    @labels.setter
-    def labels(self, labels: MutableMapping[str, str]):
-        """
-        Set the labels for this feature set
-        """
-        self._labels = labels
-
-    @property
-    def status(self):
-        """
-        Returns the status of this feature set
-        """
-        return self._status
-
-    @status.setter
-    def status(self, status):
-        """
-        Sets the status of this feature set
-        """
-        self._status = status
-
-    @property
-    def created_timestamp(self):
-        """
-        Returns the created_timestamp of this feature set
-        """
-        return self._created_timestamp
-
-    @created_timestamp.setter
-    def created_timestamp(self, created_timestamp):
-        """
-        Sets the status of this feature set
-        """
-        self._created_timestamp = created_timestamp
-
-    def set_label(self, key: str, value: str):
-        """
-        Sets the label value for a given key
-        """
-        self.labels[key] = value
-
-    def remove_label(self, key: str):
-        """
-        Removes a label based on key
-        """
-        del self.labels[key]
-
-    def add(self, resource):
-        """
-        Adds a resource (Feature, Entity) to this Feature Set.
-        Does not register the updated Feature Set with Feast Core
-
-        Args:
-            resource: A resource can be either a Feature or an Entity object
-        """
-        if resource.name in self._fields.keys():
-            raise ValueError(
-                'could not add field "'
-                + resource.name
-                + '" since it already exists in feature set "'
-                + self._name
-                + '"'
-            )
-
-        if issubclass(type(resource), Field):
-            return self._set_field(resource)
-
-        raise ValueError("Could not identify the resource being added")
-
-    def _set_field(self, field: Field):
-        self._fields[field.name] = field
-        return
-
-    def drop(self, name: str):
-        """
-        Removes a Feature or Entity from a Feature Set. This does not apply
-        any changes to Feast Core until the apply() method is called.
-
-        Args:
-            name: Name of Feature or Entity to be removed
-        """
-        del self._fields[name]
-
-    def _add_fields(self, fields):
-        """
-        Adds multiple Fields to a Feature Set
-
-        Args:
-            fields: List of Field (Feature or Entity) Objects
-        """
-        for field in fields:
-            self.add(field)
-
-    def infer_fields_from_df(
-        self,
-        df: pd.DataFrame,
-        entities: Optional[List[Entity]] = None,
-        features: Optional[List[Feature]] = None,
-        replace_existing_features: bool = False,
-        replace_existing_entities: bool = False,
-        discard_unused_fields: bool = False,
-        rows_to_sample: int = 100,
-    ):
-        """
-        Adds fields (Features or Entities) to a feature set based on the schema
-        of a Datatframe. Only Pandas dataframes are supported. All columns are
-        detected as features, so setting at least one entity manually is
-        advised.
-
-        Args:
-            df: Pandas dataframe to read schema from
-            entities: List of entities that will be set manually and not
-                inferred. These will take precedence over any existing entities
-                or entities found in the dataframe.
-            features: List of features that will be set manually and not
-                inferred. These will take precedence over any existing feature
-                or features found in the dataframe.
-            replace_existing_features: If true, will replace
-                existing features in this feature set with features found in
-                dataframe. If false, will skip conflicting features.
-            replace_existing_entities: If true, will replace existing entities
-                in this feature set with features found in dataframe. If false,
-                will skip conflicting entities.
-            discard_unused_fields: Boolean flag. Setting this to True will
-                discard any existing fields that are not found in the dataset or
-                provided by the user
-            rows_to_sample: Number of rows to sample to infer types. All rows
-                must have consistent types, even values within list types must
-                be homogeneous
-        """
-
-        if entities is None:
-            entities = list()
-        if features is None:
-            features = list()
-
-        # Validate whether the datetime column exists with the right name
-        if DATETIME_COLUMN not in df:
-            raise Exception("No column 'datetime'")
-
-        # Validate the data type for the datetime column
-        if not is_datetime64_ns_dtype(df.dtypes[DATETIME_COLUMN]):
-            raise Exception(
-                "Column 'datetime' does not have the correct type: datetime64[ns]"
-            )
-
-        # Create dictionary of fields that will not be inferred (manually set)
-        provided_fields = OrderedDict()
-        fields = _create_field_list(entities, features)
-
-        for field in fields:
-            if not isinstance(field, Field):
-                raise Exception(f"Invalid field object type provided {type(field)}")
-            if field.name not in provided_fields:
-                provided_fields[field.name] = field
-            else:
-                raise Exception(f"Duplicate field name detected {field.name}.")
-
-        new_fields = self._fields.copy()
-        output_log = ""
-
-        # Add in provided fields
-        for name, field in provided_fields.items():
-            if name in new_fields.keys():
-                upsert_message = "created"
-            else:
-                upsert_message = "updated (replacing an existing field)"
-
-            output_log += (
-                f"{type(field).__name__} {field.name}"
-                f"({field.dtype}) manually {upsert_message}.\n"
-            )
-            new_fields[name] = field
-
-        # Iterate over all of the columns and create features
-        for column in df.columns:
-            column = column.strip()
-
-            # Skip datetime column
-            if DATETIME_COLUMN in column:
-                continue
-
-            # Skip user provided fields
-            if column in provided_fields.keys():
-                continue
-
-            # Only overwrite conflicting fields if replacement is allowed
-            if column in new_fields:
-                if (
-                    isinstance(self._fields[column], Feature)
-                    and not replace_existing_features
-                ):
-                    continue
-
-                if (
-                    isinstance(self._fields[column], Entity)
-                    and not replace_existing_entities
-                ):
-                    continue
-
-            # Store this field as a feature
-            new_fields[column] = Feature(
-                name=column,
-                dtype=_infer_pd_column_type(column, df[column], rows_to_sample),
-            )
-
-            output_log += f"{type(new_fields[column]).__name__} {new_fields[column].name} ({new_fields[column].dtype}) added from dataframe.\n"
-
-        # Discard unused fields from feature set
-        if discard_unused_fields:
-            keys_to_remove = []
-            for key in new_fields.keys():
-                if not (key in df.columns or key in provided_fields.keys()):
-                    output_log += f"{type(new_fields[key]).__name__} {new_fields[key].name} ({new_fields[key].dtype}) removed because it is unused.\n"
-                    keys_to_remove.append(key)
-            for key in keys_to_remove:
-                del new_fields[key]
-
-        # Update feature set
-        self._fields = new_fields
-        print(output_log)
-
-    def infer_fields_from_pa(
-        self,
-        table: pa.lib.Table,
-        entities: Optional[List[Entity]] = None,
-        features: Optional[List[Feature]] = None,
-        replace_existing_features: bool = False,
-        replace_existing_entities: bool = False,
-        discard_unused_fields: bool = False,
-    ) -> None:
-        """
-        Adds fields (Features or Entities) to a feature set based on the schema
-        of a PyArrow table. Only PyArrow tables are supported. All columns are
-        detected as features, so setting at least one entity manually is
-        advised.
-
-
-        Args:
-            table (pyarrow.lib.Table):
-                PyArrow table to read schema from.
-
-            entities (Optional[List[Entity]]):
-                List of entities that will be set manually and not inferred.
-                These will take precedence over any existing entities or
-                entities found in the PyArrow table.
-
-            features (Optional[List[Feature]]):
-                List of features that will be set manually and not inferred.
-                These will take precedence over any existing feature or features
-                found in the PyArrow table.
-
-            replace_existing_features (bool):
-                Boolean flag. If true, will replace existing features in this
-                feature set with features found in dataframe. If false, will
-                skip conflicting features.
-
-            replace_existing_entities (bool):
-                Boolean flag. If true, will replace existing entities in this
-                feature set with features found in dataframe. If false, will
-                skip conflicting entities.
-
-            discard_unused_fields (bool):
-                Boolean flag. Setting this to True will discard any existing
-                fields that are not found in the dataset or provided by the
-                user.
-
-        Returns:
-            None:
-                None
-        """
-        if entities is None:
-            entities = list()
-        if features is None:
-            features = list()
-
-        # Validate whether the datetime column exists with the right name
-        if DATETIME_COLUMN not in table.column_names:
-            raise Exception("No column 'datetime'")
-
-        # Validate the date type for the datetime column
-        if not isinstance(table.column(DATETIME_COLUMN).type, TimestampType):
-            raise Exception(
-                "Column 'datetime' does not have the correct type: datetime64[ms]"
-            )
-
-        # Create dictionary of fields that will not be inferred (manually set)
-        provided_fields = OrderedDict()
-        fields = _create_field_list(entities, features)
-
-        for field in fields:
-            if not isinstance(field, Field):
-                raise Exception(f"Invalid field object type provided {type(field)}")
-            if field.name not in provided_fields:
-                provided_fields[field.name] = field
-            else:
-                raise Exception(f"Duplicate field name detected {field.name}.")
-
-        new_fields = self._fields.copy()
-        output_log = ""
-
-        # Add in provided fields
-        for name, field in provided_fields.items():
-            if name in new_fields.keys():
-                upsert_message = "created"
-            else:
-                upsert_message = "updated (replacing an existing field)"
-
-            output_log += (
-                f"{type(field).__name__} {field.name}"
-                f"({field.dtype}) manually {upsert_message}.\n"
-            )
-            new_fields[name] = field
-
-        # Iterate over all of the column names and create features
-        for column in table.column_names:
-            column = column.strip()
-
-            # Skip datetime column
-            if DATETIME_COLUMN in column:
-                continue
-
-            # Skip user provided fields
-            if column in provided_fields.keys():
-                continue
-
-            # Only overwrite conflicting fields if replacement is allowed
-            if column in new_fields:
-                if (
-                    isinstance(self._fields[column], Feature)
-                    and not replace_existing_features
-                ):
-                    continue
-
-                if (
-                    isinstance(self._fields[column], Entity)
-                    and not replace_existing_entities
-                ):
-                    continue
-
-            # Store this fields as a feature
-            # TODO: (Minor) Change the parameter name from dtype to patype
-            new_fields[column] = Feature(
-                name=column, dtype=self._infer_pa_column_type(table.column(column))
-            )
-
-            output_log += f"{type(new_fields[column]).__name__} {new_fields[column].name} ({new_fields[column].dtype}) added from PyArrow Table.\n"
-
-        # Discard unused fields from feature set
-        if discard_unused_fields:
-            keys_to_remove = []
-            for key in new_fields.keys():
-                if not (key in table.column_names or key in provided_fields.keys()):
-                    output_log += f"{type(new_fields[key]).__name__} {new_fields[key].name} ({new_fields[key].dtype}) removed because it is unused.\n"
-                    keys_to_remove.append(key)
-            for key in keys_to_remove:
-                del new_fields[key]
-
-        # Update feature set
-        self._fields = new_fields
-        print(output_log)
-
-    def _infer_pd_column_type(self, column, series, rows_to_sample):
-        dtype = None
-        sample_count = 0
-
-        # Loop over all rows for this column to infer types
-        for key, value in series.iteritems():
-            sample_count += 1
-            # Stop sampling at the row limit
-            if sample_count > rows_to_sample:
-                continue
-
-            # Infer the specific type for this row
-            current_dtype = python_type_to_feast_value_type(name=column, value=value)
-
-            # Make sure the type is consistent for column
-            if dtype:
-                if dtype != current_dtype:
-                    raise ValueError(
-                        f"Type mismatch detected in column {column}. Both "
-                        f"the types {current_dtype} and {dtype} "
-                        f"have been found."
-                    )
-            else:
-                # Store dtype in field to type map if it isnt already
-                dtype = current_dtype
-
-        return dtype
-
-    def _infer_pa_column_type(self, column: pa.lib.ChunkedArray):
-        """
-        Infers the PyArrow column type.
-
-        :param column: Column from a PyArrow table
-        :type column: pa.lib.ChunkedArray
-        :return:
-        :rtype:
-        """
-        # Validates the column to ensure that value types are consistent
-        column.validate()
-        return pa_to_feast_value_type(column)
-
-    def _update_from_feature_set(self, feature_set):
-        """
-        Deep replaces one feature set with another
-
-        Args:
-            feature_set: Feature set to use as a source of configuration
-        """
-
-        self.name = feature_set.name
-        self.project = feature_set.project
-        self.source = feature_set.source
-        self.max_age = feature_set.max_age
-        self.features = feature_set.features
-        self.entities = feature_set.entities
-        self.source = feature_set.source
-        self.status = feature_set.status
-        self.created_timestamp = feature_set.created_timestamp
-
-    def get_kafka_source_brokers(self) -> str:
-        """
-        Get the broker list for the source in this feature set
-        """
-        if self.source and self.source.source_type == "Kafka":
-            return self.source.brokers
-        raise Exception("Source type could not be identified")
-
-    def get_kafka_source_topic(self) -> str:
-        """
-        Get the topic that this feature set has been configured to use as source
-        """
-        if self.source and self.source.source_type == "Kafka":
-            return self.source.topic
-        raise Exception("Source type could not be identified")
-
-    def is_valid(self):
-        """
-        Validates the state of a feature set locally. Raises an exception
-        if feature set is invalid.
-        """
-
-        if not self.name:
-            raise ValueError("No name found in feature set.")
-
-        if len(self.entities) == 0:
-            raise ValueError("No entities found in feature set {self.name}")
-
-    def import_tfx_schema(self, schema: schema_pb2.Schema):
-        """
-        Updates presence_constraints, shape_type and domain_info for all fields
-        (features and entities) in the FeatureSet from schema in the Tensorflow metadata.
-
-        Args:
-            schema: Schema from Tensorflow metadata
-
-        Returns:
-            None
-
-        """
-        _make_tfx_schema_domain_info_inline(schema)
-        for feature_from_tfx_schema in schema.feature:
-            if feature_from_tfx_schema.name in self._fields.keys():
-                field = self._fields[feature_from_tfx_schema.name]
-                field.update_presence_constraints(feature_from_tfx_schema)
-                field.update_shape_type(feature_from_tfx_schema)
-                field.update_domain_info(feature_from_tfx_schema)
-            else:
-                warnings.warn(
-                    f"The provided schema contains feature name '{feature_from_tfx_schema.name}' "
-                    f"that does not exist in the FeatureSet '{self.name}' in Feast"
-                )
-
-    def export_tfx_schema(self) -> schema_pb2.Schema:
-        """
-        Create a Tensorflow metadata schema from a FeatureSet.
-
-        Returns:
-            Tensorflow metadata schema.
-
-        """
-        schema = schema_pb2.Schema()
-
-        # List of attributes to copy from fields in the FeatureSet to feature in
-        # Tensorflow metadata schema where the attribute name is the same.
-        attributes_to_copy_from_field_to_feature = [
-            "name",
-            "presence",
-            "group_presence",
-            "shape",
-            "value_count",
-            "domain",
-            "int_domain",
-            "float_domain",
-            "string_domain",
-            "bool_domain",
-            "struct_domain",
-            "_natural_language_domain",
-            "image_domain",
-            "mid_domain",
-            "url_domain",
-            "time_domain",
-            "time_of_day_domain",
-        ]
-
-        for _, field in self._fields.items():
-            if isinstance(field, Entity):
-                continue
-            feature = schema_pb2.Feature()
-            for attr in attributes_to_copy_from_field_to_feature:
-                if getattr(field, attr) is None:
-                    # This corresponds to an unset member in the proto Oneof field.
-                    continue
-                if issubclass(type(getattr(feature, attr)), Message):
-                    # Proto message field to copy is an "embedded" field, so MergeFrom()
-                    # method must be used.
-                    getattr(feature, attr).MergeFrom(getattr(field, attr))
-                elif issubclass(type(getattr(feature, attr)), (int, str, bool)):
-                    # Proto message field is a simple Python type, so setattr()
-                    # can be used.
-                    setattr(feature, attr, getattr(field, attr))
-                else:
-                    warnings.warn(
-                        f"Attribute '{attr}' cannot be copied from Field "
-                        f"'{field.name}' in FeatureSet '{self.name}' to a "
-                        f"Feature in the Tensorflow metadata schema, because"
-                        f"the type is neither a Protobuf message or Python "
-                        f"int, str and bool"
-                    )
-            # "type" attr is handled separately because the attribute name is different
-            # ("dtype" in field and "type" in Feature) and "type" in Feature is only
-            # a subset of "dtype".
-            feature.type = field.dtype.to_tfx_schema_feature_type()
-            schema.feature.append(feature)
-
-        return schema
-
-    @classmethod
-    def from_yaml(cls, yml: str):
-        """
-        Creates a feature set from a YAML string body or a file path
-
-        Args:
-            yml: Either a file path containing a yaml file or a YAML string
-
-        Returns:
-            Returns a FeatureSet object based on the YAML file
-        """
-
-        return cls.from_dict(feast_yaml.yaml_loader(yml, load_single=True))
-
-    @classmethod
-    def from_dict(cls, fs_dict):
-        """
-        Creates a feature set from a dict
-
-        Args:
-            fs_dict: A dict representation of a feature set
-
-        Returns:
-            Returns a FeatureSet object based on the feature set dict
-        """
-
-        feature_set_proto = json_format.ParseDict(
-            fs_dict, FeatureSetProto(), ignore_unknown_fields=True
-        )
-        return cls.from_proto(feature_set_proto)
-
-    @classmethod
-    def from_proto(cls, feature_set_proto: FeatureSetProto):
-        """
-        Creates a feature set from a protobuf representation of a feature set
-
-        Args:
-            feature_set_proto: A protobuf representation of a feature set
-
-        Returns:
-            Returns a FeatureSet object based on the feature set protobuf
-        """
-
-        feature_set = cls(
-            name=feature_set_proto.spec.name,
-            features=[
-                Feature.from_proto(feature)
-                for feature in feature_set_proto.spec.features
-            ],
-            entities=[
-                Entity.from_proto(entity) for entity in feature_set_proto.spec.entities
-            ],
-            max_age=(
-                None
-                if feature_set_proto.spec.max_age.seconds == 0
-                and feature_set_proto.spec.max_age.nanos == 0
-                else feature_set_proto.spec.max_age
-            ),
-            labels=feature_set_proto.spec.labels,
-            source=(
-                None
-                if feature_set_proto.spec.source.type == 0
-                else Source.from_proto(feature_set_proto.spec.source)
-            ),
-            project=None
-            if len(feature_set_proto.spec.project) == 0
-            else feature_set_proto.spec.project,
-        )
-        feature_set._status = feature_set_proto.meta.status  # type: ignore
-        feature_set._created_timestamp = feature_set_proto.meta.created_timestamp
-        return feature_set
-
-    def to_proto(self) -> FeatureSetProto:
-        """
-        Converts a feature set object to its protobuf representation
-
-        Returns:
-            FeatureSetProto protobuf
-        """
-
-        meta = FeatureSetMetaProto(
-            created_timestamp=self.created_timestamp, status=self.status
-        )
-
-        spec = FeatureSetSpecProto(
-            name=self.name,
-            project=self.project,
-            max_age=self.max_age,
-            labels=self.labels,
-            source=self.source.to_proto() if self.source is not None else None,
-            features=[
-                field.to_proto()
-                for field in self._fields.values()
-                if type(field) == Feature
-            ],
-            entities=[
-                field.to_proto()
-                for field in self._fields.values()
-                if type(field) == Entity
-            ],
-        )
-
-        return FeatureSetProto(spec=spec, meta=meta)
-
-    def to_dict(self) -> Dict:
-        """
-        Converts feature set to dict
-
-        :return: Dictionary object representation of feature set
-        """
-        feature_set_dict = MessageToDict(self.to_proto())
-
-        # Remove meta when empty for more readable exports
-        if feature_set_dict["meta"] == {}:
-            del feature_set_dict["meta"]
-
-        return feature_set_dict
-
-    def to_yaml(self):
-        """
-        Converts a feature set to a YAML string.
-
-        :return: Feature set string returned in YAML format
-        """
-        feature_set_dict = self.to_dict()
-        return yaml.dump(feature_set_dict, allow_unicode=True, sort_keys=False)
-
-
-class FeatureSetRef:
-    """
-    Represents a reference to a featureset
-    """
-
-    def __init__(self, project: str = None, name: str = None):
-        self.proto = FeatureSetReferenceProto(project=project, name=name)
-
-    @property
-    def project(self) -> str:
-        """
-        Get the project of feature set referenced by this reference
-        """
-        return self.proto.project
-
-    @property
-    def name(self) -> str:
-        """
-        Get the name of feature set referenced by this reference
-        """
-        return self.proto.name
-
-    @classmethod
-    def from_proto(cls, feature_set_ref_proto: FeatureSetReferenceProto):
-        return cls(
-            project=feature_set_ref_proto.project, name=feature_set_ref_proto.name,
-        )
-
-    @classmethod
-    def from_feature_set(cls, feature_set: FeatureSet):
-        """
-        Construct a feature set reference that refers to the given feature set.
-
-        Args:
-            feature_set: Feature set to create reference from.
-
-        Returns:
-            FeatureSetRef that refers to the given feature set
-        """
-        return cls(feature_set.project, feature_set.name)
-
-    @classmethod
-    def from_str(cls, ref_str: str):
-        """
-        Parse a feature reference from string representation.
-        (as defined by __repr__())
-
-        Args:
-            ref_str: string representation of the reference.
-
-        Returns:
-            FeatureSetRef constructed from the string
-        """
-        project = ""
-        if "/" in ref_str:
-            project, ref_str = ref_str.split("/")
-
-        return cls(project, ref_str)
-
-    def to_proto(self) -> FeatureSetReferenceProto:
-        """
-        Convert and return this feature set reference to protobuf.
-
-        Returns:
-            Protobuf version of this feature set reference.
-        """
-        return self.proto
-
-    def __str__(self):
-        # human readable string of the reference
-        return f"FeatureSetRef<{self.__repr__()}>"
-
-    def __repr__(self):
-        # return string representation of the reference
-        # [project/]name
-        # in protov3 unset string and int fields default to "" and 0
-        ref_str = ""
-        if len(self.proto.project) > 0:
-            ref_str += self.proto.project + "/"
-        ref_str += self.proto.name
-        return ref_str
-
-    def __eq__(self, other):
-        # compare with other feature set
-        return hash(self) == hash(other)
-
-    def __hash__(self):
-        # hash this reference
-        return hash(repr(self))
-
-
-def _make_tfx_schema_domain_info_inline(schema: schema_pb2.Schema) -> None:
-    """
-    Copy top level domain info defined at schema level into inline definition.
-    One use case is when importing domain info from Tensorflow metadata schema
-    into Feast features. Feast features do not have access to schema level information
-    so the domain info needs to be inline.
-
-    Args:
-        schema: Tensorflow metadata schema
-
-    Returns: None
-    """
-    # Reference to domains defined at schema level
-    domain_ref_to_string_domain = {d.name: d for d in schema.string_domain}
-    domain_ref_to_float_domain = {d.name: d for d in schema.float_domain}
-    domain_ref_to_int_domain = {d.name: d for d in schema.int_domain}
-
-    # With the reference, it is safe to remove the domains defined at schema level
-    del schema.string_domain[:]
-    del schema.float_domain[:]
-    del schema.int_domain[:]
-
-    for feature in schema.feature:
-        domain_info_case = feature.WhichOneof("domain_info")
-        if domain_info_case == "domain":
-            domain_ref = feature.domain
-            if domain_ref in domain_ref_to_string_domain:
-                feature.string_domain.MergeFrom(domain_ref_to_string_domain[domain_ref])
-            elif domain_ref in domain_ref_to_float_domain:
-                feature.float_domain.MergeFrom(domain_ref_to_float_domain[domain_ref])
-            elif domain_ref in domain_ref_to_int_domain:
-                feature.int_domain.MergeFrom(domain_ref_to_int_domain[domain_ref])
-
-
-def _infer_pd_column_type(column, series, rows_to_sample):
-    dtype = None
-    sample_count = 0
-
-    # Loop over all rows for this column to infer types
-    for key, value in series.iteritems():
-        sample_count += 1
-        # Stop sampling at the row limit
-        if sample_count > rows_to_sample:
-            continue
-
-        # Infer the specific type for this row
-        current_dtype = python_type_to_feast_value_type(name=column, value=value)
-
-        # Make sure the type is consistent for column
-        if dtype:
-            if dtype != current_dtype:
-                raise ValueError(
-                    f"Type mismatch detected in column {column}. Both "
-                    f"the types {current_dtype} and {dtype} "
-                    f"have been found."
-                )
-        else:
-            # Store dtype in field to type map if it isnt already
-            dtype = current_dtype
-
-    return dtype
-
-
-def _create_field_list(entities: List[Entity], features: List[Feature]) -> List[Field]:
-    """
-    Convert entities and features List to Field List
-
-    Args:
-        entities: List of Entity Objects
-        features: List of Features Objects
-
-
-    Returns:
-         List[Field]:
-            List of field from entities and features combined
-    """
-    fields: List[Field] = []
-
-    for entity in entities:
-        if isinstance(entity, Field):
-            fields.append(entity)
-
-    for feature in features:
-        if isinstance(feature, Field):
-            fields.append(feature)
-
-    return fields
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index 6e73df78c3..ebe69e7fad 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -31,7 +31,7 @@
     KinesisOptions,
     SourceType,
 )
-from feast.feature_v2 import FeatureV2
+from feast.feature import Feature
 from feast.loaders import yaml as feast_yaml
 
 
@@ -44,7 +44,7 @@ def __init__(
         self,
         name: str,
         entities: Union[str, List[str]],
-        features: Union[FeatureV2, List[FeatureV2]],
+        features: Union[Feature, List[Feature]],
         batch_source: Optional[DataSource] = None,
         stream_source: Optional[DataSource] = None,
         max_age: Optional[Duration] = None,
@@ -320,7 +320,7 @@ def from_proto(cls, feature_table_proto: FeatureTableProto):
             name=feature_table_proto.spec.name,
             entities=[entity for entity in feature_table_proto.spec.entities],
             features=[
-                FeatureV2.from_proto(feature).to_proto()
+                Feature.from_proto(feature).to_proto()
                 for feature in feature_table_proto.spec.features
             ],
             labels=feature_table_proto.spec.labels,
@@ -420,7 +420,7 @@ def _update_from_feature_table(self, feature_table):
         Deep replaces one feature table with another
 
         Args:
-            feature_table: Feature set to use as a source of configuration
+            feature_table: Feature table to use as a source of configuration
         """
 
         self.name = feature_table.name
diff --git a/sdk/python/feast/feature_v2.py b/sdk/python/feast/feature_v2.py
deleted file mode 100644
index f3aecf3a4f..0000000000
--- a/sdk/python/feast/feature_v2.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2020 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import MutableMapping, Optional
-
-from feast.core.Feature_pb2 import FeatureSpecV2 as FeatureSpecProto
-from feast.types import Value_pb2 as ValueTypeProto
-from feast.value_type import ValueType
-
-
-class FeatureV2:
-    """FeatureV2 field type"""
-
-    def __init__(
-        self,
-        name: str,
-        dtype: ValueType,
-        labels: Optional[MutableMapping[str, str]] = None,
-    ):
-        self._name = name
-        if not isinstance(dtype, ValueType):
-            raise ValueError("dtype is not a valid ValueType")
-        self._dtype = dtype
-        if labels is None:
-            self._labels = dict()  # type: MutableMapping
-        else:
-            self._labels = labels
-
-    def __eq__(self, other):
-        if (
-            self.name != other.name
-            or self.dtype != other.dtype
-            or self.labels != other.labels
-        ):
-            return False
-        return True
-
-    @property
-    def name(self):
-        """
-        Getter for name of this field
-        """
-        return self._name
-
-    @property
-    def dtype(self) -> ValueType:
-        """
-        Getter for data type of this field
-        """
-        return self._dtype
-
-    @property
-    def labels(self) -> MutableMapping[str, str]:
-        """
-        Getter for labels of this field
-        """
-        return self._labels
-
-    def to_proto(self) -> FeatureSpecProto:
-        """Converts FeatureV2 object to its Protocol Buffer representation"""
-        value_type = ValueTypeProto.ValueType.Enum.Value(self.dtype.name)
-
-        return FeatureSpecProto(
-            name=self.name, value_type=value_type, labels=self.labels,
-        )
-
-    @classmethod
-    def from_proto(cls, feature_proto: FeatureSpecProto):
-        """
-        Args:
-            feature_proto: FeatureSpecV2 protobuf object
-
-        Returns:
-            FeatureV2 object
-        """
-
-        feature = cls(
-            name=feature_proto.name,
-            dtype=ValueType(feature_proto.value_type),
-            labels=feature_proto.labels,
-        )
-
-        return feature
diff --git a/sdk/python/feast/job.py b/sdk/python/feast/job.py
deleted file mode 100644
index ff684d9cbe..0000000000
--- a/sdk/python/feast/job.py
+++ /dev/null
@@ -1,210 +0,0 @@
-from typing import List
-from urllib.parse import urlparse
-
-import fastavro
-import grpc
-import pandas as pd
-
-from feast.constants import CONFIG_TIMEOUT_KEY
-from feast.constants import FEAST_DEFAULT_OPTIONS as defaults
-from feast.serving.ServingService_pb2 import (
-    DATA_FORMAT_AVRO,
-    JOB_STATUS_DONE,
-    GetJobRequest,
-)
-from feast.serving.ServingService_pb2 import Job as JobProto
-from feast.serving.ServingService_pb2_grpc import ServingServiceStub
-from feast.staging.storage_client import get_staging_client
-from feast.wait import wait_retry_backoff
-from tensorflow_metadata.proto.v0 import statistics_pb2
-
-# Maximum no of seconds to wait until the retrieval jobs status is DONE in Feast
-# Currently set to the maximum query execution time limit in BigQuery
-DEFAULT_TIMEOUT_SEC: int = 21600
-
-# Maximum no of seconds to wait before reloading the job status in Feast
-MAX_WAIT_INTERVAL_SEC: int = 60
-
-
-class RetrievalJob:
-    """
-    A class representing a job for feature retrieval in Feast.
-    """
-
-    def __init__(
-        self,
-        job_proto: JobProto,
-        serving_stub: ServingServiceStub,
-        auth_metadata_plugin: grpc.AuthMetadataPlugin = None,
-    ):
-        """
-        Args:
-            job_proto: Job proto object (wrapped by this job object)
-            serving_stub: Stub for Feast serving service
-            auth_metadata_plugin: plugin to fetch auth metadata
-        """
-        self.job_proto = job_proto
-        self.serving_stub = serving_stub
-        self.auth_metadata = auth_metadata_plugin
-
-    @property
-    def id(self):
-        """
-        Getter for the Job Id
-        """
-        return self.job_proto.id
-
-    @property
-    def status(self):
-        """
-        Getter for the Job status from Feast Core
-        """
-        return self.job_proto.status
-
-    def reload(self):
-        """
-        Reload the latest job status
-        Returns: None
-        """
-        self.job_proto = self.serving_stub.GetJob(
-            GetJobRequest(job=self.job_proto),
-            metadata=self.auth_metadata.get_signed_meta() if self.auth_metadata else (),
-        ).job
-
-    def get_avro_files(self, timeout_sec: int = int(defaults[CONFIG_TIMEOUT_KEY])):
-        """
-        Wait until job is done to get the file uri to Avro result files on
-        Google Cloud Storage.
-
-        Args:
-            timeout_sec (int):
-                Max no of seconds to wait until job is done. If "timeout_sec"
-                is exceeded, an exception will be raised.
-
-        Returns:
-            str: Google Cloud Storage file uris of the returned Avro files.
-        """
-
-        def try_retrieve():
-            self.reload()
-            return None, self.status == JOB_STATUS_DONE
-
-        wait_retry_backoff(
-            retry_fn=try_retrieve,
-            timeout_secs=timeout_sec,
-            timeout_msg="Timeout exceeded while waiting for result. Please retry "
-            "this method or use a longer timeout value.",
-        )
-
-        if self.job_proto.error:
-            raise Exception(self.job_proto.error)
-
-        if self.job_proto.data_format != DATA_FORMAT_AVRO:
-            raise Exception(
-                "Feast only supports Avro data format for now. Please check "
-                "your Feast Serving deployment."
-            )
-
-        return [urlparse(uri) for uri in self.job_proto.file_uris]
-
-    def result(self, timeout_sec: int = int(defaults[CONFIG_TIMEOUT_KEY])):
-        """
-        Wait until job is done to get an iterable rows of result. The row can
-        only represent an Avro row in Feast 0.3.
-
-        Args:
-            timeout_sec (int):
-                Max no of seconds to wait until job is done. If "timeout_sec"
-                is exceeded, an exception will be raised.
-
-        Returns:
-            Iterable of Avro rows.
-        """
-        uris = self.get_avro_files(timeout_sec)
-        for file_uri in uris:
-            file_obj = get_staging_client(file_uri.scheme).download_file(file_uri)
-            file_obj.seek(0)
-            avro_reader = fastavro.reader(file_obj)
-
-            for record in avro_reader:
-                yield record
-
-    def to_dataframe(
-        self, timeout_sec: int = int(defaults[CONFIG_TIMEOUT_KEY])
-    ) -> pd.DataFrame:
-        """
-        Wait until a job is done to get an iterable rows of result. This method
-        will return the response as a DataFrame.
-
-        Args:
-            timeout_sec (int):
-                Max no of seconds to wait until job is done. If "timeout_sec"
-                is exceeded, an exception will be raised.
-
-        Returns:
-            pd.DataFrame:
-                Pandas DataFrame of the feature values.
-        """
-        records = [r for r in self.result(timeout_sec=timeout_sec)]
-        return pd.DataFrame.from_records(records)
-
-    def to_chunked_dataframe(
-        self,
-        max_chunk_size: int = -1,
-        timeout_sec: int = int(defaults[CONFIG_TIMEOUT_KEY]),
-    ) -> pd.DataFrame:
-        """
-        Wait until a job is done to get an iterable rows of result. This method
-        will split the response into chunked DataFrame of a specified size to
-        to be yielded to the instance calling it.
-
-        Args:
-            max_chunk_size (int):
-                Maximum number of rows that the DataFrame should contain.
-
-            timeout_sec (int):
-                Max no of seconds to wait until job is done. If "timeout_sec"
-                is exceeded, an exception will be raised.
-
-        Returns:
-            pd.DataFrame:
-                Pandas DataFrame of the feature values.
-        """
-
-        # Object is Avro row type object, refer to self.result function for this type
-        records: List[dict] = []
-
-        # Max chunk size defined by user
-        for result in self.result(timeout_sec=timeout_sec):
-            records.append(result)
-            if len(records) == max_chunk_size:
-                df = pd.DataFrame.from_records(records)
-                records.clear()  # Empty records array
-                yield df
-
-        # Handle for last chunk that is < max_chunk_size
-        if records:
-            yield pd.DataFrame.from_records(records)
-
-    def __iter__(self):
-        return iter(self.result())
-
-    def statistics(
-        self, timeout_sec: int = int(defaults[CONFIG_TIMEOUT_KEY])
-    ) -> statistics_pb2.DatasetFeatureStatisticsList:
-        """
-        Get statistics computed over the retrieved data set. Statistics will only be computed for
-        columns that are part of Feast, and not the columns that were provided.
-
-        Args:
-            timeout_sec (int):
-                Max no of seconds to wait until job is done. If "timeout_sec"
-                is exceeded, an exception will be raised.
-
-        Returns:
-            DatasetFeatureStatisticsList containing statistics of Feast features over the retrieved dataset.
-        """
-        self.get_avro_files(timeout_sec)  # wait for job completion
-        if self.job_proto.error:
-            raise Exception(self.job_proto.error)
-        return self.job_proto.dataset_feature_statistics_list
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index 1a56d04819..0d1c3e5e31 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -1,165 +1,39 @@
-import logging
-from concurrent.futures import ProcessPoolExecutor
-from functools import partial
-from typing import Iterable, List
-
-import pandas as pd
-from pyarrow import parquet as pq
-
-from feast.constants import DATETIME_COLUMN
-from feast.feature_set import FeatureSet
-from feast.type_map import (
-    pa_column_to_proto_column,
-    pa_column_to_timestamp_proto_column,
-)
-from feast.types import Field_pb2 as FieldProto
-from feast.types.FeatureRow_pb2 import FeatureRow
-
-_logger = logging.getLogger(__name__)
+from typing import Dict, List
 
 GRPC_CONNECTION_TIMEOUT_DEFAULT = 3  # type: int
 GRPC_CONNECTION_TIMEOUT_APPLY = 300  # type: int
 FEAST_SERVING_URL_ENV_KEY = "FEAST_SERVING_URL"  # type: str
 FEAST_CORE_URL_ENV_KEY = "FEAST_CORE_URL"  # type: str
 BATCH_FEATURE_REQUEST_WAIT_TIME_SECONDS = 300
-KAFKA_CHUNK_PRODUCTION_TIMEOUT = 120  # type: int
+BATCH_INGESTION_PRODUCTION_TIMEOUT = 120  # type: int
 
 
-def _encode_pa_tables(
-    file: str, feature_set: str, fields: dict, ingestion_id: str, row_group_idx: int
-) -> List[bytes]:
+def check_field_mappings(
+    column_names: List[str],
+    feature_table_name: str,
+    feature_table_field_mappings: Dict[str, str],
+) -> None:
     """
-    Helper function to encode a PyArrow table(s) read from parquet file(s) into
-    FeatureRows.
-
-    This function accepts a list of file directory pointing to many parquet
-    files. All parquet files must have the same schema.
-
-    Each parquet file will be read into as a table and encoded into FeatureRows
-    using a pool of max_workers workers.
-
-    Args:
-        file (str):
-            File directory of all the parquet file to encode.
-            Parquet file must have more than one row group.
-
-        feature_set (str):
-            Feature set reference in the format f"{project}/{name}".
-
-        fields (dict[str, enum.Enum.ValueType]):
-            A mapping of field names to their value types.
-
-        ingestion_id (str):
-            UUID unique to this ingestion job.
+        Checks that all specified field mappings in FeatureTable can be found in
+        column names of specified ingestion source.
 
-        row_group_idx(int):
-            Row group index to read and encode into byte like FeatureRow
-            protobuf objects.
-
-    Returns:
-        List[bytes]:
-            List of byte encoded FeatureRows from the parquet file.
+        Args:
+            column_names: Column names in provided ingestion source
+            feature_table_name: Name of FeatureTable
+            feature_table_field_mappings: Field mappings of FeatureTable
     """
-    pq_file = pq.ParquetFile(file)
-    # Read parquet file as a PyArrow table
-    table = pq_file.read_row_group(row_group_idx)
-
-    # Add datetime column
-    datetime_col = pa_column_to_timestamp_proto_column(table.column(DATETIME_COLUMN))
-
-    # Preprocess the columns by converting all its values to Proto values
-    proto_columns = {
-        field_name: pa_column_to_proto_column(dtype, table.column(field_name))
-        for field_name, dtype in fields.items()
-    }
-
-    # List to store result
-    feature_rows: List[bytes] = []
 
-    # Loop optimization declaration(s)
-    field = FieldProto.Field
-    proto_items = proto_columns.items()
-    append = feature_rows.append
-
-    # Iterate through the rows
-    for row_idx in range(table.num_rows):
-        feature_row = FeatureRow(
-            event_timestamp=datetime_col[row_idx],
-            feature_set=feature_set,
-            ingestion_id=ingestion_id,
+    if "datetime" not in column_names:
+        raise ValueError(
+            f'Provided data source does not contain entity "datetime" in columns {column_names}'
         )
-        # Loop optimization declaration
-        ext = feature_row.fields.extend
-
-        # Insert field from each column
-        for k, v in proto_items:
-            ext([field(name=k, value=v[row_idx])])
-
-        # Append FeatureRow in byte string form
-        append(feature_row.SerializeToString())
 
-    return feature_rows
+    specified_field_mappings = [v for k, v in feature_table_field_mappings.items()]
 
+    is_valid = all(col_name in column_names for col_name in specified_field_mappings)
 
-def get_feature_row_chunks(
-    file: str,
-    row_groups: List[int],
-    fs: FeatureSet,
-    ingestion_id: str,
-    max_workers: int,
-) -> Iterable[List[bytes]]:
-    """
-    Iterator function to encode a PyArrow table read from a parquet file to
-    FeatureRow(s).
-
-    Args:
-        file (str):
-            File directory of the parquet file. The parquet file must have more
-            than one row group.
-
-        row_groups (List[int]):
-            Specific row group indexes to be read and transformed in the parquet
-            file.
-
-        fs (feast.feature_set.FeatureSet):
-            FeatureSet describing parquet files.
-
-        ingestion_id (str):
-            UUID unique to this ingestion job.
-
-        max_workers (int):
-            Maximum number of workers to spawn.
-
-    Returns:
-        Iterable[List[bytes]]:
-            Iterable list of byte encoded FeatureRow(s).
-    """
-
-    feature_set = f"{fs.project}/{fs.name}"
-
-    field_map = {field.name: field.dtype for field in fs.fields.values()}
-    func = partial(_encode_pa_tables, file, feature_set, field_map, ingestion_id)
-
-    with ProcessPoolExecutor(max_workers) as pool:
-        for chunk in pool.map(func, row_groups):
-            yield chunk
-    return
-
-
-def validate_dataframe(dataframe: pd.DataFrame, feature_set: FeatureSet):
-    if "datetime" not in dataframe.columns:
-        raise ValueError(
-            f'Dataframe does not contain entity "datetime" in columns {dataframe.columns}'
+    if not is_valid:
+        raise Exception(
+            f"Provided data source does not contain all field mappings previously "
+            f"defined for FeatureTable, {feature_table_name}."
         )
-
-    for entity in feature_set.entities:
-        if entity.name not in dataframe.columns:
-            raise ValueError(
-                f"Dataframe does not contain entity {entity.name} in columns {dataframe.columns}"
-            )
-
-    for feature in feature_set.features:
-        if feature.name not in dataframe.columns:
-            raise ValueError(
-                f"Dataframe does not contain feature {feature.name} in columns {dataframe.columns}"
-            )
diff --git a/sdk/python/feast/source.py b/sdk/python/feast/source.py
deleted file mode 100644
index 8e388376b3..0000000000
--- a/sdk/python/feast/source.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright 2019 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from feast.core.Source_pb2 import KafkaSourceConfig as KafkaSourceConfigProto
-from feast.core.Source_pb2 import Source as SourceProto
-from feast.core.Source_pb2 import SourceType as SourceTypeProto
-
-
-class Source:
-    """
-    Source is the top level class that represents a data source for finding
-    feature data. Source must be extended with specific implementations to
-    be useful
-    """
-
-    def __eq__(self, other):
-        return True
-
-    @property
-    def source_type(self) -> str:
-        """
-        The type of source. If not implemented, this will return "None"
-        """
-        return "None"
-
-    def to_proto(self):
-        """
-        Converts this source object to its protobuf representation.
-        """
-        return None
-
-    @classmethod
-    def from_proto(cls, source_proto: SourceProto):
-        """
-        Creates a source from a protobuf representation. This will instantiate
-        and return a specific source type, depending on the protobuf that is
-        passed in.
-
-        Args:
-            source_proto: SourceProto python object
-
-        Returns:
-            Source object
-        """
-        if source_proto.type == SourceTypeProto.KAFKA:
-            return KafkaSource(
-                brokers=source_proto.kafka_source_config.bootstrap_servers,
-                topic=source_proto.kafka_source_config.topic,
-            )
-
-        return cls()
-
-
-class KafkaSource(Source):
-    """
-    Kafka feature set source type.
-    """
-
-    def __init__(self, brokers: str = "", topic: str = ""):
-        """
-
-        Args:
-            brokers: Comma separated list of Kafka brokers/bootstrap server
-                addresses, for example: my-host:9092,other-host:9092
-            topic: Kafka topic to find feature rows for this feature set
-        """
-        self._source_type = "Kafka"
-        self._brokers = brokers
-        self._topic = topic
-
-    def __eq__(self, other):
-        if (
-            self.brokers != other.brokers
-            or self.topic != other.topic
-            or self.source_type != other.source_type
-        ):
-            return False
-        return True
-
-    @property
-    def brokers(self) -> str:
-        """
-        Returns the list of broker addresses for this Kafka source
-        """
-        return self._brokers
-
-    @property
-    def topic(self) -> str:
-        """
-        Returns the topic for this feature set
-        """
-        return self._topic
-
-    @property
-    def source_type(self) -> str:
-        """
-        Returns the type of source. For a Kafka source this will always return
-            "kafka"
-        """
-        return self._source_type
-
-    def to_proto(self) -> SourceProto:
-        """
-        Converts this Source into its protobuf representation
-        """
-        return SourceProto(
-            type=SourceTypeProto.KAFKA,
-            kafka_source_config=KafkaSourceConfigProto(
-                bootstrap_servers=self.brokers, topic=self.topic
-            ),
-        )
diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py
index 82ac90bbd1..611e50dfb2 100644
--- a/sdk/python/feast/type_map.py
+++ b/sdk/python/feast/type_map.py
@@ -22,9 +22,6 @@
 from google.protobuf.timestamp_pb2 import Timestamp
 from pyarrow.lib import TimestampType
 
-from feast.constants import DATETIME_COLUMN
-from feast.types import FeatureRow_pb2 as FeatureRowProto
-from feast.types import Field_pb2 as FieldProto
 from feast.types.Value_pb2 import (
     BoolList,
     BytesList,
@@ -163,87 +160,6 @@ def python_type_to_feast_value_type(
     return type_map[value.dtype.__str__()]
 
 
-def convert_df_to_feature_rows(dataframe: pd.DataFrame, feature_set):
-    """
-    Returns a function that converts a Pandas Series to a Feast FeatureRow
-    for a given Feature Set and Pandas Dataframe
-
-    Args:
-        dataframe: Dataframe that will be converted
-        feature_set: Feature set used as schema for conversion
-
-    Returns:
-        Function that will do conversion
-    """
-
-    def convert_series_to_proto_values(row: pd.Series):
-        """
-        Converts a Pandas Series to a Feast FeatureRow
-
-        Args:
-            row: pd.Series The row that should be converted
-
-        Returns:
-            Feast FeatureRow
-        """
-
-        feature_row = FeatureRowProto.FeatureRow(
-            event_timestamp=_pd_datetime_to_timestamp_proto(
-                dataframe[DATETIME_COLUMN].dtype, row[DATETIME_COLUMN]
-            ),
-            feature_set=feature_set.project + "/" + feature_set.name,
-        )
-
-        for field_name, field in feature_set.fields.items():
-            feature_row.fields.extend(
-                [
-                    FieldProto.Field(
-                        name=field.name,
-                        value=_python_value_to_proto_value(
-                            field.dtype, row[field.name]
-                        ),
-                    )
-                ]
-            )
-        return feature_row
-
-    return convert_series_to_proto_values
-
-
-def convert_dict_to_proto_values(
-    row: dict, df_datetime_dtype: pd.DataFrame.dtypes, feature_set
-) -> FeatureRowProto.FeatureRow:
-    """
-    Encode a dictionary describing a feature row into a FeatureRows object.
-
-    Args:
-        row: Dictionary describing a feature row.
-        df_datetime_dtype:  Pandas dtype of datetime column.
-        feature_set: Feature set describing feature row.
-
-    Returns:
-        FeatureRow
-    """
-
-    feature_row = FeatureRowProto.FeatureRow(
-        event_timestamp=_pd_datetime_to_timestamp_proto(
-            df_datetime_dtype, row[DATETIME_COLUMN]
-        ),
-        feature_set=f"{feature_set.project}/{feature_set.name}",
-    )
-
-    for field_name, field in feature_set.fields.items():
-        feature_row.fields.extend(
-            [
-                FieldProto.Field(
-                    name=field.name,
-                    value=_python_value_to_proto_value(field.dtype, row[field.name]),
-                )
-            ]
-        )
-    return feature_row
-
-
 def _pd_datetime_to_timestamp_proto(dtype, value) -> Timestamp:
     """
     Converts a Pandas datetime to a Timestamp Proto
diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py
index aaf3de1822..eba16015d3 100644
--- a/sdk/python/feast/value_type.py
+++ b/sdk/python/feast/value_type.py
@@ -19,7 +19,7 @@
 
 class ValueType(enum.Enum):
     """
-    Feature value type. Used to define data types in Feature Sets.
+    Feature value type. Used to define data types in Feature Tables.
     """
 
     UNKNOWN = 0

From 6ed731ce9ef4cd5bd04a3cc8cc67fa8a876d81bc Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 12:07:23 +0800
Subject: [PATCH 03/34] Cleanup python unit tests

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/tests/feast_core_server.py    | 104 +--
 sdk/python/tests/feast_serving_server.py |  26 +-
 sdk/python/tests/test_client.py          | 865 +++++++----------------
 sdk/python/tests/test_entity.py          |  10 +-
 sdk/python/tests/test_feature.py         |  23 -
 sdk/python/tests/test_feature_set.py     | 394 -----------
 sdk/python/tests/test_feature_table.py   |   6 +-
 sdk/python/tests/test_job.py             | 143 ----
 8 files changed, 322 insertions(+), 1249 deletions(-)
 delete mode 100644 sdk/python/tests/test_feature.py
 delete mode 100644 sdk/python/tests/test_feature_set.py
 delete mode 100644 sdk/python/tests/test_job.py

diff --git a/sdk/python/tests/feast_core_server.py b/sdk/python/tests/feast_core_server.py
index 677ecb84ec..f66830d7a4 100644
--- a/sdk/python/tests/feast_core_server.py
+++ b/sdk/python/tests/feast_core_server.py
@@ -7,16 +7,20 @@
 
 from feast.core import CoreService_pb2_grpc as Core
 from feast.core.CoreService_pb2 import (
-    ApplyFeatureSetRequest,
-    ApplyFeatureSetResponse,
+    ApplyEntityRequest,
+    ApplyEntityResponse,
+    ApplyFeatureTableRequest,
+    ApplyFeatureTableResponse,
     GetFeastCoreVersionResponse,
-    ListFeatureSetsRequest,
-    ListFeatureSetsResponse,
+    ListEntitiesRequest,
+    ListEntitiesResponse,
+    ListFeatureTablesRequest,
+    ListFeatureTablesResponse,
 )
-from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto
-from feast.core.FeatureSet_pb2 import FeatureSetMeta, FeatureSetStatus
-from feast.core.Source_pb2 import KafkaSourceConfig as KafkaSourceConfigProto
-from feast.core.Source_pb2 import SourceType as SourceTypeProto
+from feast.core.Entity_pb2 import Entity as EntityProto
+from feast.core.Entity_pb2 import EntityMeta
+from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
+from feast.core.FeatureTable_pb2 import FeatureTableMeta
 
 _logger = logging.getLogger(__name__)
 
@@ -56,58 +60,62 @@ def intercept_service(self, continuation, handler_call_details):
 
 class CoreServicer(Core.CoreServiceServicer):
     def __init__(self):
-        self._feature_sets = dict()
+        self._feature_tables = dict()
+        self._entities = dict()
 
     def GetFeastCoreVersion(self, request, context):
-        return GetFeastCoreVersionResponse(version="0.3.2")
-
-    def ListFeatureSets(self, request: ListFeatureSetsRequest, context):
-
-        filtered_feature_set_response = [
-            fs
-            for fs in list(self._feature_sets.values())
-            if (
-                not request.filter.feature_set_name
-                or request.filter.feature_set_name == "*"
-                or fs.spec.name == request.filter.feature_set_name
-            )
-        ]
-
-        return ListFeatureSetsResponse(feature_sets=filtered_feature_set_response)
-
-    def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context):
-        feature_set = request.feature_set
-
-        if feature_set.spec.source.type == SourceTypeProto.INVALID:
-            feature_set.spec.source.kafka_source_config.CopyFrom(
-                KafkaSourceConfigProto(bootstrap_servers="server.com", topic="topic1")
-            )
-            feature_set.spec.source.type = SourceTypeProto.KAFKA
-
-        feature_set_meta = FeatureSetMeta(
-            status=FeatureSetStatus.STATUS_READY,
-            created_timestamp=Timestamp(seconds=10),
-        )
-        applied_feature_set = FeatureSetProto(
-            spec=feature_set.spec, meta=feature_set_meta
+        return GetFeastCoreVersionResponse(version="0.10.0")
+
+    def ListFeatureTables(self, request: ListFeatureTablesRequest, context):
+
+        filtered_feature_table_response = list(self._feature_tables.values())
+
+        return ListFeatureTablesResponse(tables=filtered_feature_table_response)
+
+    def ApplyFeatureTable(self, request: ApplyFeatureTableRequest, context):
+        feature_table_spec = request.table_spec
+
+        feature_table_meta = FeatureTableMeta(created_timestamp=Timestamp(seconds=10),)
+        applied_feature_table = FeatureTableProto(
+            spec=feature_table_spec, meta=feature_table_meta
         )
-        self._feature_sets[feature_set.spec.name] = applied_feature_set
+        self._feature_tables[feature_table_spec.name] = applied_feature_table
 
         _logger.info(
-            "registered feature set "
-            + feature_set.spec.name
+            "registered feature table "
+            + feature_table_spec.name
             + " with "
-            + str(len(feature_set.spec.entities))
+            + str(len(feature_table_spec.entities))
             + " entities and "
-            + str(len(feature_set.spec.features))
+            + str(len(feature_table_spec.features))
             + " features"
         )
 
-        return ApplyFeatureSetResponse(
-            feature_set=applied_feature_set,
-            status=ApplyFeatureSetResponse.Status.CREATED,
+        return ApplyFeatureTableResponse(table=applied_feature_table,)
+
+    def ListEntities(self, request: ListEntitiesRequest, context):
+
+        filtered_entities_response = list(self._entities.values())
+
+        return ListEntitiesResponse(entities=filtered_entities_response)
+
+    def ApplyEntity(self, request: ApplyEntityRequest, context):
+        entity_spec = request.spec
+
+        entity_meta = EntityMeta(created_timestamp=Timestamp(seconds=10),)
+        applied_entity = EntityProto(spec=entity_spec, meta=entity_meta)
+        self._entities[entity_spec.name] = applied_entity
+
+        _logger.info(
+            "registered entity "
+            + entity_spec.name
+            + " with "
+            + str(entity_spec.value_type)
+            + " value"
         )
 
+        return ApplyEntityResponse(entity=applied_entity,)
+
 
 def serve():
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
diff --git a/sdk/python/tests/feast_serving_server.py b/sdk/python/tests/feast_serving_server.py
index aba6713275..50ce551405 100644
--- a/sdk/python/tests/feast_serving_server.py
+++ b/sdk/python/tests/feast_serving_server.py
@@ -5,8 +5,8 @@
 
 import grpc
 
-from feast.core import FeatureSet_pb2 as FeatureSetProto
-from feast.core.CoreService_pb2 import ListFeatureSetsResponse
+from feast.core import FeatureTable_pb2 as FeatureTableProto
+from feast.core.CoreService_pb2 import ListFeatureTablesResponse
 from feast.core.CoreService_pb2_grpc import CoreServiceStub
 from feast.serving import ServingService_pb2_grpc as Serving
 from feast.serving.ServingService_pb2 import GetFeastServingInfoResponse
@@ -19,9 +19,9 @@ def __init__(self, core_url: str = None):
         if core_url:
             self.__core_channel = None
             self.__connect_core(core_url)
-            self._feature_sets = (
+            self._feature_tables = (
                 dict()
-            )  # type: Dict[str, FeatureSetProto.FeatureSetSpec]
+            )  # type: Dict[str, FeatureTableProto.FeatureTable]
 
     def __connect_core(self, core_url: str):
         if not core_url:
@@ -40,18 +40,18 @@ def __connect_core(self, core_url: str):
         else:
             self._core_service_stub = CoreServiceStub(self.__core_channel)
 
-    def __get_feature_sets_from_core(self):
-        # Get updated list of feature sets
-        feature_sets = (
-            self._core_service_stub.ListFeatureSets
-        )  # type: ListFeatureSetsResponse
+    def __get_feature_tables_from_core(self):
+        # Get updated list of feature tables
+        feature_tables = (
+            self._core_service_stub.ListFeatureTables
+        )  # type: ListFeatureTablesResponse
 
-        # Store each feature set locally
-        for feature_set in list(feature_sets.feature_sets):
-            self._feature_sets[feature_set.name] = feature_set
+        # Store each feature table locally
+        for feature_table in list(feature_tables.tables):
+            self._feature_tables[feature_table.name] = feature_table
 
     def GetFeastServingVersion(self, request, context):
-        return GetFeastServingInfoResponse(version="0.3.2")
+        return GetFeastServingInfoResponse(version="0.10.0")
 
 
 def serve():
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index be8bc78679..c152d6d400 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -13,58 +13,38 @@
 # limitations under the License.
 import pkgutil
 import socket
-import tempfile
 from concurrent import futures
-from datetime import datetime
 from unittest import mock
 
-import dataframes
 import grpc
-import pandas as pd
-import pandavro
 import pytest
 from google.protobuf.duration_pb2 import Duration
 from mock import MagicMock, patch
 from pytest_lazyfixture import lazy_fixture
-from pytz import timezone
 
 from feast.client import Client
-from feast.contrib.job_controller.client import Client as JCClient
-from feast.contrib.job_controller.job import IngestJob
 from feast.core import CoreService_pb2_grpc as Core
 from feast.core.CoreService_pb2 import (
+    GetEntityResponse,
     GetFeastCoreVersionResponse,
-    GetFeatureSetResponse,
-    ListFeatureSetsResponse,
-    ListFeaturesResponse,
-    ListIngestionJobsResponse,
+    GetFeatureTableResponse,
+    ListEntitiesResponse,
+    ListFeatureTablesResponse,
 )
-from feast.core.FeatureSet_pb2 import EntitySpec as EntitySpecProto
-from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto
-from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto
-from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto
-from feast.core.FeatureSet_pb2 import FeatureSetStatus as FeatureSetStatusProto
-from feast.core.FeatureSet_pb2 import FeatureSpec as FeatureSpecProto
-from feast.core.IngestionJob_pb2 import IngestionJob as IngestJobProto
-from feast.core.IngestionJob_pb2 import IngestionJobStatus
-from feast.core.Source_pb2 import KafkaSourceConfig, Source, SourceType
-from feast.core.Store_pb2 import Store
+from feast.core.DataSource_pb2 import DataSource as DataSourceProto
+from feast.core.Entity_pb2 import Entity as EntityProto
+from feast.core.Entity_pb2 import EntityMeta as EntityMetaProto
+from feast.core.Entity_pb2 import EntitySpecV2 as EntitySpecProto
+from feast.core.Feature_pb2 import FeatureSpecV2 as FeatureSpecProto
+from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
+from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
+from feast.core.FeatureTable_pb2 import FeatureTableSpec as FeatureTableSpecProto
+from feast.data_source import DataSource, FileOptions, KafkaOptions, SourceType
 from feast.entity import Entity
 from feast.feature import Feature
-from feast.feature_set import FeatureSet, FeatureSetRef
+from feast.feature_table import FeatureTable
 from feast.serving import ServingService_pb2_grpc as Serving
-from feast.serving.ServingService_pb2 import DataFormat, FeastServingType
-from feast.serving.ServingService_pb2 import FeatureReference as FeatureRefProto
-from feast.serving.ServingService_pb2 import (
-    GetBatchFeaturesResponse,
-    GetFeastServingInfoResponse,
-    GetJobResponse,
-    GetOnlineFeaturesRequest,
-    GetOnlineFeaturesResponse,
-)
-from feast.serving.ServingService_pb2 import Job as BatchRetrievalJob
-from feast.serving.ServingService_pb2 import JobStatus, JobType
-from feast.source import KafkaSource
+from feast.serving.ServingService_pb2 import GetFeastServingInfoResponse
 from feast.types import Value_pb2 as ValueProto
 from feast.value_type import ValueType
 from feast_core_server import (
@@ -76,7 +56,6 @@
 
 CORE_URL = "core.feast.example.com"
 SERVING_URL = "serving.example.com"
-jobcontroller_URL = "jobcontroller.feast.example.com"
 _PRIVATE_KEY_RESOURCE_PATH = "data/localhost.key"
 _CERTIFICATE_CHAIN_RESOURCE_PATH = "data/localhost.pem"
 _ROOT_CERTIFICATE_RESOURCE_PATH = "data/localhost.crt"
@@ -114,11 +93,6 @@ def mock_client(self):
         client._serving_url = SERVING_URL
         return client
 
-    @pytest.fixture
-    def mock_jobcontroller_client(self):
-        client = JCClient(jobcontroller_url=jobcontroller_URL)
-        return client
-
     @pytest.fixture
     def mock_client_with_auth(self):
         client = Client(
@@ -324,83 +298,138 @@ def test_version(self, mocked_client, mocker):
         ],
     )
     def test_get_online_features(self, mocked_client, auth_metadata, mocker):
-        ROW_COUNT = 300
+        assert 1 == 1
 
-        mocked_client._serving_service_stub = Serving.ServingServiceStub(
+    @pytest.mark.parametrize(
+        "mocked_client",
+        [
+            lazy_fixture("mock_client"),
+            lazy_fixture("mock_client_with_auth"),
+            lazy_fixture("secure_mock_client"),
+            lazy_fixture("secure_mock_client_with_auth"),
+        ],
+    )
+    def test_get_historical_features(self, mocked_client, mocker):
+        assert 1 == 1
+
+    @pytest.mark.parametrize(
+        "mocked_client",
+        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
+    )
+    def test_get_entity(self, mocked_client, mocker):
+        mocked_client._core_service_stub = Core.CoreServiceStub(
             grpc.insecure_channel("")
         )
 
-        def int_val(x):
-            return ValueProto.Value(int64_val=x)
+        entity_proto = EntityProto(
+            spec=EntitySpecProto(
+                name="driver_car_id",
+                description="Car driver id",
+                value_type=ValueProto.ValueType.STRING,
+                labels={"key1": "val1", "key2": "val2"},
+            ),
+            meta=EntityMetaProto(),
+        )
 
-        request = GetOnlineFeaturesRequest(project="driver_project")
-        request.features.extend(
-            [
-                FeatureRefProto(feature_set="driver", name="age"),
-                FeatureRefProto(name="rating"),
-                FeatureRefProto(name="null_value"),
-            ]
+        mocker.patch.object(
+            mocked_client._core_service_stub,
+            "GetEntity",
+            return_value=GetEntityResponse(entity=entity_proto),
         )
-        recieve_response = GetOnlineFeaturesResponse()
-        entity_rows = []
-        for row_number in range(1, ROW_COUNT + 1):
-            request.entity_rows.append(
-                GetOnlineFeaturesRequest.EntityRow(
-                    fields={"driver_id": int_val(row_number)}
-                )
+        mocked_client.set_project("my_project")
+        entity = mocked_client.get_entity("my_entity")
+
+        assert (
+            entity.name == "driver_car_id"
+            and entity.description == "Car driver id"
+            and entity.value_type == ValueType(ValueProto.ValueType.STRING).name
+            and "key1" in entity.labels
+            and entity.labels["key1"] == "val1"
+        )
+
+    @pytest.mark.parametrize(
+        "mocked_client",
+        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
+    )
+    def test_list_entities(self, mocked_client, mocker):
+        mocker.patch.object(
+            mocked_client,
+            "_core_service_stub",
+            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
+        )
+
+        entity_1_proto = EntityProto(
+            spec=EntitySpecProto(
+                name="driver_car_id",
+                description="Car driver id",
+                value_type=ValueProto.ValueType.INT64,
+                labels={"key1": "val1", "key2": "val2"},
             )
-            entity_rows.append({"driver_id": int_val(row_number)})
-            field_values = GetOnlineFeaturesResponse.FieldValues(
-                fields={
-                    "driver_id": int_val(row_number),
-                    "driver:age": int_val(1),
-                    "rating": int_val(9),
-                    "null_value": ValueProto.Value(),
-                },
-                statuses={
-                    "driver_id": GetOnlineFeaturesResponse.FieldStatus.PRESENT,
-                    "driver:age": GetOnlineFeaturesResponse.FieldStatus.PRESENT,
-                    "rating": GetOnlineFeaturesResponse.FieldStatus.PRESENT,
-                    "null_value": GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE,
-                },
+        )
+        entity_2_proto = EntityProto(
+            spec=EntitySpecProto(
+                name="driver_ride_id",
+                description="Ride driver id",
+                value_type=ValueProto.ValueType.STRING,
+                labels={"key3": "val3", "key4": "val4"},
             )
-            recieve_response.field_values.append(field_values)
+        )
 
         mocker.patch.object(
-            mocked_client._serving_service_stub,
-            "GetOnlineFeatures",
-            return_value=recieve_response,
+            mocked_client._core_service_stub,
+            "ListEntities",
+            return_value=ListEntitiesResponse(
+                entities=[entity_1_proto, entity_2_proto]
+            ),
         )
-        got_response = mocked_client.get_online_features(
-            entity_rows=entity_rows,
-            feature_refs=["driver:age", "rating", "null_value"],
-            project="driver_project",
-        )  # type: GetOnlineFeaturesResponse
-        mocked_client._serving_service_stub.GetOnlineFeatures.assert_called_with(
-            request, metadata=auth_metadata
+
+        entities = mocked_client.list_entities(labels={"key1": "val1"})
+        assert len(entities) == 2
+
+        entity = entities[1]
+        assert (
+            entity.name == "driver_ride_id"
+            and entity.description == "Ride driver id"
+            and entity.value_type == ValueType(ValueProto.ValueType.STRING).name
+            and "key3" in entity.labels
+            and entity.labels["key3"] == "val3"
+            and "key4" in entity.labels
+            and entity.labels["key4"] == "val4"
         )
 
-        got_fields = got_response.field_values[0].fields
-        got_statuses = got_response.field_values[0].statuses
+    @pytest.mark.parametrize(
+        "test_client", [lazy_fixture("client"), lazy_fixture("secure_client")],
+    )
+    def test_apply_entity_success(self, test_client):
+
+        test_client.set_project("project1")
+        entity = Entity(
+            name="driver_car_id",
+            description="Car driver id",
+            value_type=ValueType.STRING,
+            labels={"team": "matchmaking"},
+        )
+
+        # Register Entity with Core
+        test_client.apply_entity(entity)
+
+        entities = test_client.list_entities()
+
+        entity = entities[0]
         assert (
-            got_fields["driver_id"] == int_val(1)
-            and got_statuses["driver_id"]
-            == GetOnlineFeaturesResponse.FieldStatus.PRESENT
-            and got_fields["driver:age"] == int_val(1)
-            and got_statuses["driver:age"]
-            == GetOnlineFeaturesResponse.FieldStatus.PRESENT
-            and got_fields["rating"] == int_val(9)
-            and got_statuses["rating"] == GetOnlineFeaturesResponse.FieldStatus.PRESENT
-            and got_fields["null_value"] == ValueProto.Value()
-            and got_statuses["null_value"]
-            == GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE
+            len(entities) == 1
+            and entity.name == "driver_car_id"
+            and entity.value_type == ValueType(ValueProto.ValueType.STRING).name
+            and entity.description == "Car driver id"
+            and "team" in entity.labels
+            and entity.labels["team"] == "matchmaking"
         )
 
     @pytest.mark.parametrize(
         "mocked_client",
         [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
     )
-    def test_get_feature_set(self, mocked_client, mocker):
+    def test_get_feature_table(self, mocked_client, mocker):
         mocked_client._core_service_stub = Core.CoreServiceStub(
             grpc.insecure_channel("")
         )
@@ -409,11 +438,11 @@ def test_get_feature_set(self, mocked_client, mocker):
 
         mocker.patch.object(
             mocked_client._core_service_stub,
-            "GetFeatureSet",
-            return_value=GetFeatureSetResponse(
-                feature_set=FeatureSetProto(
-                    spec=FeatureSetSpecProto(
-                        name="my_feature_set",
+            "GetFeatureTable",
+            return_value=GetFeatureTableResponse(
+                table=FeatureTableProto(
+                    spec=FeatureTableSpecProto(
+                        name="my_feature_table",
                         max_age=Duration(seconds=3600),
                         labels={"key1": "val1", "key2": "val2"},
                         features=[
@@ -426,54 +455,63 @@ def test_get_feature_set(self, mocked_client, mocker):
                                 value_type=ValueProto.ValueType.FLOAT,
                             ),
                         ],
-                        entities=[
-                            EntitySpecProto(
-                                name="my_entity_1",
-                                value_type=ValueProto.ValueType.INT64,
-                            )
-                        ],
-                        source=Source(
-                            type=SourceType.KAFKA,
-                            kafka_source_config=KafkaSourceConfig(
-                                bootstrap_servers="localhost:9092", topic="topic"
+                        entities=["my_entity_1"],
+                        batch_source=DataSourceProto(
+                            type=SourceType(1).name,
+                            field_mapping={
+                                "ride_distance": "ride_distance",
+                                "ride_duration": "ride_duration",
+                            },
+                            file_options=DataSourceProto.FileOptions(
+                                file_format="avro", file_url="data/test.avro"
                             ),
+                            timestamp_column="ts_col",
+                            date_partition_column="date_partition_col",
                         ),
                     ),
-                    meta=FeatureSetMetaProto(),
+                    meta=FeatureTableMetaProto(),
                 )
             ),
         )
         mocked_client.set_project("my_project")
-        feature_set = mocked_client.get_feature_set("my_feature_set")
+        feature_table = mocked_client.get_feature_table("my_feature_table")
 
         assert (
-            feature_set.name == "my_feature_set"
-            and "key1" in feature_set.labels
-            and feature_set.labels["key1"] == "val1"
-            and "key2" in feature_set.labels
-            and feature_set.labels["key2"] == "val2"
-            and feature_set.fields["my_feature_1"].name == "my_feature_1"
-            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
-            and feature_set.fields["my_entity_1"].name == "my_entity_1"
-            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
-            and len(feature_set.features) == 2
-            and len(feature_set.entities) == 1
+            feature_table.name == "my_feature_table"
+            and "key1" in feature_table.labels
+            and feature_table.labels["key1"] == "val1"
+            and "key2" in feature_table.labels
+            and feature_table.labels["key2"] == "val2"
+            and len(feature_table.features) == 2
+            and len(feature_table.entities) == 1
         )
 
     @pytest.mark.parametrize(
         "mocked_client",
         [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
     )
-    def test_list_feature_sets(self, mocked_client, mocker):
+    def test_list_feature_tables(self, mocked_client, mocker):
         mocker.patch.object(
             mocked_client,
             "_core_service_stub",
             return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
         )
 
-        feature_set_1_proto = FeatureSetProto(
-            spec=FeatureSetSpecProto(
-                project="test",
+        batch_source = DataSourceProto(
+            type=SourceType(1).name,
+            field_mapping={
+                "ride_distance": "ride_distance",
+                "ride_duration": "ride_duration",
+            },
+            file_options=DataSourceProto.FileOptions(
+                file_format="avro", file_url="data/test.avro"
+            ),
+            timestamp_column="ts_col",
+            date_partition_column="date_partition_col",
+        )
+
+        feature_table_1_proto = FeatureTableProto(
+            spec=FeatureTableSpecProto(
                 name="driver_car",
                 max_age=Duration(seconds=3600),
                 labels={"key1": "val1", "key2": "val2"},
@@ -482,11 +520,12 @@ def test_list_feature_sets(self, mocked_client, mocker):
                         name="feature_1", value_type=ValueProto.ValueType.FLOAT
                     )
                 ],
+                entities=["driver_car_id"],
+                batch_source=batch_source,
             )
         )
-        feature_set_2_proto = FeatureSetProto(
-            spec=FeatureSetSpecProto(
-                project="test",
+        feature_table_2_proto = FeatureTableProto(
+            spec=FeatureTableSpecProto(
                 name="driver_ride",
                 max_age=Duration(seconds=3600),
                 labels={"key1": "val1"},
@@ -495,516 +534,102 @@ def test_list_feature_sets(self, mocked_client, mocker):
                         name="feature_1", value_type=ValueProto.ValueType.FLOAT
                     )
                 ],
+                entities=["driver_ride_id"],
+                batch_source=batch_source,
             )
         )
 
         mocker.patch.object(
             mocked_client._core_service_stub,
-            "ListFeatureSets",
-            return_value=ListFeatureSetsResponse(
-                feature_sets=[feature_set_1_proto, feature_set_2_proto]
-            ),
-        )
-
-        feature_sets = mocked_client.list_feature_sets(labels={"key1": "val1"})
-        assert len(feature_sets) == 2
-
-        feature_set = feature_sets[0]
-        assert (
-            feature_set.name == "driver_car"
-            and "key1" in feature_set.labels
-            and feature_set.labels["key1"] == "val1"
-            and "key2" in feature_set.labels
-            and feature_set.labels["key2"] == "val2"
-            and feature_set.fields["feature_1"].name == "feature_1"
-            and feature_set.fields["feature_1"].dtype == ValueType.FLOAT
-            and len(feature_set.features) == 1
-        )
-
-    @pytest.mark.parametrize(
-        "mocked_client",
-        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
-    )
-    def test_list_features(self, mocked_client, mocker):
-        mocker.patch.object(
-            mocked_client,
-            "_core_service_stub",
-            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
-        )
-
-        feature1_proto = FeatureSpecProto(
-            name="feature_1", value_type=ValueProto.ValueType.FLOAT
-        )
-        feature2_proto = FeatureSpecProto(
-            name="feature_2", value_type=ValueProto.ValueType.STRING
-        )
-
-        mocker.patch.object(
-            mocked_client._core_service_stub,
-            "ListFeatures",
-            return_value=ListFeaturesResponse(
-                features={
-                    "driver_car:feature_1": feature1_proto,
-                    "driver_car:feature_2": feature2_proto,
-                }
-            ),
-        )
-
-        features = mocked_client.list_features_by_ref(project="test")
-        assert len(features) == 2
-
-        ref_str_list = []
-        feature_name_list = []
-        feature_dtype_list = []
-        for ref_str, feature_proto in features.items():
-            ref_str_list.append(ref_str)
-            feature_name_list.append(feature_proto.name)
-            feature_dtype_list.append(feature_proto.dtype)
-
-        assert (
-            set(ref_str_list) == set(["driver_car:feature_1", "driver_car:feature_2"])
-            and set(feature_name_list) == set(["feature_1", "feature_2"])
-            and set(feature_dtype_list) == set([ValueType.FLOAT, ValueType.STRING])
-        )
-
-    def test_list_ingest_jobs(self, mock_jobcontroller_client, mocker):
-        mocker.patch.object(
-            mock_jobcontroller_client,
-            "_jobcontroller_service_stub",
-            return_value=Core.JobControllerServiceStub(grpc.insecure_channel("")),
-        )
-
-        feature_set_ref = FeatureSetRef(project="test", name="driver",)
-
-        mocker.patch.object(
-            mock_jobcontroller_client._jobcontroller_service_stub,
-            "ListIngestionJobs",
-            return_value=ListIngestionJobsResponse(
-                jobs=[
-                    IngestJobProto(
-                        id="kafka-to-redis",
-                        external_id="job-2222",
-                        status=IngestionJobStatus.RUNNING,
-                        feature_set_references=[feature_set_ref.to_proto()],
-                        source=Source(
-                            type=SourceType.KAFKA,
-                            kafka_source_config=KafkaSourceConfig(
-                                bootstrap_servers="localhost:9092", topic="topic"
-                            ),
-                        ),
-                        stores=[Store(name="redis")],
-                    )
-                ]
-            ),
-        )
-
-        # list ingestion jobs by target feature set reference
-        ingest_jobs = mock_jobcontroller_client.list_ingest_jobs(
-            feature_set_ref=feature_set_ref
-        )
-        assert len(ingest_jobs) >= 1
-
-        ingest_job = ingest_jobs[0]
-        assert (
-            ingest_job.status == IngestionJobStatus.RUNNING
-            and ingest_job.id == "kafka-to-redis"
-            and ingest_job.external_id == "job-2222"
-            and ingest_job.feature_sets[0].name == "driver"
-            and ingest_job.source.source_type == "Kafka"
-        )
-
-    def test_restart_ingest_job(self, mock_jobcontroller_client, mocker):
-        mocker.patch.object(
-            mock_jobcontroller_client,
-            "_jobcontroller_service_stub",
-            return_value=Core.JobControllerServiceStub(grpc.insecure_channel("")),
-        )
-
-        ingest_job = IngestJob(
-            job_proto=IngestJobProto(
-                id="kafka-to-redis",
-                external_id="job#2222",
-                status=IngestionJobStatus.ERROR,
+            "ListFeatureTables",
+            return_value=ListFeatureTablesResponse(
+                tables=[feature_table_1_proto, feature_table_2_proto]
             ),
-            core_stub=mock_jobcontroller_client._jobcontroller_service_stub,
         )
 
-        mock_jobcontroller_client.restart_ingest_job(ingest_job)
-        assert (
-            mock_jobcontroller_client._jobcontroller_service_stub.RestartIngestionJob.called
-        )
-
-    def test_stop_ingest_job(self, mock_jobcontroller_client, mocker):
-        mocker.patch.object(
-            mock_jobcontroller_client,
-            "_jobcontroller_service_stub",
-            return_value=Core.JobControllerServiceStub(grpc.insecure_channel("")),
-        )
-
-        ingest_job = IngestJob(
-            job_proto=IngestJobProto(
-                id="kafka-to-redis",
-                external_id="job#2222",
-                status=IngestionJobStatus.RUNNING,
-            ),
-            core_stub=mock_jobcontroller_client._jobcontroller_service_stub,
-        )
+        feature_tables = mocked_client.list_feature_tables(labels={"key1": "val1"})
+        assert len(feature_tables) == 2
 
-        mock_jobcontroller_client.stop_ingest_job(ingest_job)
+        feature_table = feature_tables[0]
         assert (
-            mock_jobcontroller_client._jobcontroller_service_stub.StopIngestionJob.called
+            feature_table.name == "driver_car"
+            and "key1" in feature_table.labels
+            and feature_table.labels["key1"] == "val1"
+            and "key2" in feature_table.labels
+            and feature_table.labels["key2"] == "val2"
+            and len(feature_table.features) == 1
         )
 
-    @pytest.mark.parametrize(
-        "mocked_client",
-        [
-            lazy_fixture("mock_client"),
-            lazy_fixture("mock_client_with_auth"),
-            lazy_fixture("secure_mock_client"),
-            lazy_fixture("secure_mock_client_with_auth"),
-        ],
-    )
-    def test_get_historical_features(self, mocked_client, mocker):
-
-        mocked_client._serving_service_stub = Serving.ServingServiceStub(
-            grpc.insecure_channel("")
-        )
-        mocked_client._core_service_stub = Core.CoreServiceStub(
-            grpc.insecure_channel("")
-        )
-
-        mocker.patch.object(
-            mocked_client._core_service_stub,
-            "GetFeatureSet",
-            return_value=GetFeatureSetResponse(
-                feature_set=FeatureSetProto(
-                    spec=FeatureSetSpecProto(
-                        name="driver",
-                        project="driver_project",
-                        entities=[
-                            EntitySpecProto(
-                                name="driver", value_type=ValueProto.ValueType.INT64
-                            ),
-                            EntitySpecProto(
-                                name="transaction",
-                                value_type=ValueProto.ValueType.INT64,
-                            ),
-                        ],
-                        features=[
-                            FeatureSpecProto(
-                                name="driver_id", value_type=ValueProto.ValueType.FLOAT,
-                            ),
-                            FeatureSpecProto(
-                                name="driver_name",
-                                value_type=ValueProto.ValueType.STRING,
-                            ),
-                        ],
-                    ),
-                    meta=FeatureSetMetaProto(status=FeatureSetStatusProto.STATUS_READY),
-                )
-            ),
-        )
-
-        expected_dataframe = pd.DataFrame(
-            {
-                "datetime": [datetime.utcnow() for _ in range(3)],
-                "driver": [1001, 1002, 1003],
-                "transaction": [1001, 1002, 1003],
-                "driver_id": [1001, 1002, 1003],
-            }
-        )
-
-        final_results = tempfile.mktemp()
-        pandavro.to_avro(file_path_or_buffer=final_results, df=expected_dataframe)
-
-        mocker.patch.object(
-            mocked_client._serving_service_stub,
-            "GetBatchFeatures",
-            return_value=GetBatchFeaturesResponse(
-                job=BatchRetrievalJob(
-                    id="123",
-                    type=JobType.JOB_TYPE_DOWNLOAD,
-                    status=JobStatus.JOB_STATUS_DONE,
-                    file_uris=[f"file://{final_results}"],
-                    data_format=DataFormat.DATA_FORMAT_AVRO,
-                )
-            ),
-        )
-
-        mocker.patch.object(
-            mocked_client._serving_service_stub,
-            "GetJob",
-            return_value=GetJobResponse(
-                job=BatchRetrievalJob(
-                    id="123",
-                    type=JobType.JOB_TYPE_DOWNLOAD,
-                    status=JobStatus.JOB_STATUS_DONE,
-                    file_uris=[f"file://{final_results}"],
-                    data_format=DataFormat.DATA_FORMAT_AVRO,
-                )
-            ),
-        )
-
-        mocker.patch.object(
-            mocked_client._serving_service_stub,
-            "GetFeastServingInfo",
-            return_value=GetFeastServingInfoResponse(
-                job_staging_location=f"file://{tempfile.mkdtemp()}/",
-                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
-            ),
-        )
-
-        mocked_client.set_project("project1")
-        # TODO: Abstract away GCS client and GCP dependency
-        # NOTE: Feast Serving does not allow for feature references
-        # that specify the same feature in the same request.
-        with patch("google.cloud.storage.Client"):
-            response = mocked_client.get_historical_features(
-                entity_rows=pd.DataFrame(
-                    {
-                        "datetime": [
-                            pd.datetime.now(tz=timezone("Asia/Singapore"))
-                            for _ in range(3)
-                        ],
-                        "driver": [1001, 1002, 1003],
-                        "transaction": [1001, 1002, 1003],
-                    }
-                ),
-                feature_refs=["driver:driver_id", "driver_id"],
-                project="driver_project",
-            )  # Type: GetBatchFeaturesResponse
-
-        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE
-
-        actual_dataframe = response.to_dataframe()
-
-        assert actual_dataframe[["driver_id"]].equals(expected_dataframe[["driver_id"]])
-
     @pytest.mark.parametrize(
         "test_client", [lazy_fixture("client"), lazy_fixture("secure_client")],
     )
-    def test_apply_feature_set_success(self, test_client):
+    def test_apply_feature_table_success(self, test_client):
 
         test_client.set_project("project1")
 
-        # Create Feature Sets
-        fs1 = FeatureSet("my-feature-set-1")
-        fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64))
-        fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING))
-        fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64))
-
-        fs2 = FeatureSet("my-feature-set-2")
-        fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST))
-        fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST))
-        fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64))
-
-        # Register Feature Set with Core
-        test_client.apply(fs1)
-        test_client.apply(fs2)
-
-        feature_sets = test_client.list_feature_sets()
-
-        # List Feature Sets
-        assert (
-            len(feature_sets) == 2
-            and feature_sets[0].name == "my-feature-set-1"
-            and feature_sets[0].features[0].name == "fs1-my-feature-1"
-            and feature_sets[0].features[0].dtype == ValueType.INT64
-            and feature_sets[0].features[1].name == "fs1-my-feature-2"
-            and feature_sets[0].features[1].dtype == ValueType.STRING
-            and feature_sets[0].entities[0].name == "fs1-my-entity-1"
-            and feature_sets[0].entities[0].dtype == ValueType.INT64
-            and feature_sets[1].features[0].name == "fs2-my-feature-1"
-            and feature_sets[1].features[0].dtype == ValueType.STRING_LIST
-            and feature_sets[1].features[1].name == "fs2-my-feature-2"
-            and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST
-            and feature_sets[1].entities[0].name == "fs2-my-entity-1"
-            and feature_sets[1].entities[0].dtype == ValueType.INT64
-        )
-
-    @pytest.mark.parametrize(
-        "dataframe,test_client",
-        [
-            (dataframes.GOOD, lazy_fixture("client")),
-            (dataframes.GOOD, lazy_fixture("secure_client")),
-        ],
-    )
-    def test_feature_set_ingest_success(self, dataframe, test_client, mocker):
-        test_client.set_project("project1")
-        driver_fs = FeatureSet(
-            "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")
-        )
-        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
-        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
-        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
-        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))
-
-        # Register with Feast core
-        test_client.apply(driver_fs)
-        driver_fs = driver_fs.to_proto()
-        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY
-
-        mocker.patch.object(
-            test_client._core_service_stub,
-            "GetFeatureSet",
-            return_value=GetFeatureSetResponse(feature_set=driver_fs),
-        )
-
-        # Need to create a mock producer
-        with patch("feast.client.get_producer"):
-            # Ingest data into Feast
-            test_client.ingest("driver-feature-set", dataframe)
-
-    @pytest.mark.parametrize(
-        "dataframe,test_client,exception",
-        [(dataframes.GOOD, lazy_fixture("client"), Exception)],
-    )
-    def test_feature_set_ingest_throws_exception_if_kafka_down(
-        self, dataframe, test_client, exception, mocker
-    ):
-
-        test_client.set_project("project1")
-        driver_fs = FeatureSet(
-            "driver-feature-set",
-            source=KafkaSource(brokers="localhost:4412", topic="test"),
-        )
-        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
-        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
-        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
-        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))
-
-        # Register with Feast core
-        test_client.apply(driver_fs)
-        driver_fs = driver_fs.to_proto()
-        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY
-
-        mocker.patch.object(
-            test_client._core_service_stub,
-            "GetFeatureSet",
-            return_value=GetFeatureSetResponse(feature_set=driver_fs),
-        )
-
-        with pytest.raises(exception):
-            test_client.ingest("driver-feature-set", dataframe, timeout=1)
-
-    @pytest.mark.parametrize(
-        "dataframe,exception,test_client",
-        [
-            (dataframes.GOOD, TimeoutError, lazy_fixture("client")),
-            (dataframes.GOOD, TimeoutError, lazy_fixture("secure_client")),
-        ],
-    )
-    def test_feature_set_ingest_fail_if_pending(
-        self, dataframe, exception, test_client, mocker
-    ):
-        with pytest.raises(exception):
-            test_client.set_project("project1")
-            driver_fs = FeatureSet(
-                "driver-feature-set",
-                source=KafkaSource(brokers="kafka:9092", topic="test"),
-            )
-            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
-            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
-            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
-            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))
-
-            # Register with Feast core
-            test_client.apply(driver_fs)
-            driver_fs = driver_fs.to_proto()
-            driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING
-
-            mocker.patch.object(
-                test_client._core_service_stub,
-                "GetFeatureSet",
-                return_value=GetFeatureSetResponse(feature_set=driver_fs),
-            )
-
-            # Need to create a mock producer
-            with patch("feast.client.get_producer"):
-                # Ingest data into Feast
-                test_client.ingest("driver-feature-set", dataframe, timeout=1)
-
-    @pytest.mark.parametrize(
-        "dataframe,exception,test_client",
-        [
-            (dataframes.BAD_NO_DATETIME, Exception, lazy_fixture("client")),
-            (
-                dataframes.BAD_INCORRECT_DATETIME_TYPE,
-                Exception,
-                lazy_fixture("client"),
+        # Create Feature Tables
+        batch_source = DataSource(
+            type=SourceType(1).name,
+            field_mapping={
+                "ride_distance": "ride_distance",
+                "ride_duration": "ride_duration",
+            },
+            options=FileOptions(file_format="avro", file_url="data/test.avro"),
+            timestamp_column="ts_col",
+            date_partition_column="date_partition_col",
+        )
+
+        stream_source = DataSource(
+            type=SourceType(3).name,
+            field_mapping={
+                "ride_distance": "ride_distance",
+                "ride_duration": "ride_duration",
+            },
+            options=KafkaOptions(
+                bootstrap_servers="localhost:9094",
+                class_path="random/path/to/class",
+                topic="test_topic",
             ),
-            (dataframes.BAD_NO_ENTITY, Exception, lazy_fixture("client")),
-            (dataframes.NO_FEATURES, Exception, lazy_fixture("client")),
-            (dataframes.BAD_NO_DATETIME, Exception, lazy_fixture("secure_client"),),
-            (
-                dataframes.BAD_INCORRECT_DATETIME_TYPE,
-                Exception,
-                lazy_fixture("secure_client"),
-            ),
-            (dataframes.BAD_NO_ENTITY, Exception, lazy_fixture("secure_client")),
-            (dataframes.NO_FEATURES, Exception, lazy_fixture("secure_client")),
-        ],
-    )
-    def test_feature_set_ingest_failure(self, test_client, dataframe, exception):
-        with pytest.raises(exception):
-            # Create feature set
-            driver_fs = FeatureSet("driver-feature-set")
-
-            # Update based on dataset
-            driver_fs.infer_fields_from_df(dataframe)
-
-            # Register with Feast core
-            test_client.apply(driver_fs)
-
-            # Ingest data into Feast
-            test_client.ingest(driver_fs, dataframe=dataframe)
-
-    @pytest.mark.parametrize(
-        "dataframe,test_client",
-        [
-            (dataframes.ALL_TYPES, lazy_fixture("client")),
-            (dataframes.ALL_TYPES, lazy_fixture("secure_client")),
-        ],
-    )
-    def test_feature_set_types_success(self, test_client, dataframe, mocker):
-
-        test_client.set_project("project1")
+            timestamp_column="ts_col",
+        )
 
-        all_types_fs = FeatureSet(
-            name="all_types",
-            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
+        ft1 = FeatureTable(
+            name="my-feature-table-1",
             features=[
-                Feature(name="float_feature", dtype=ValueType.FLOAT),
-                Feature(name="int64_feature", dtype=ValueType.INT64),
-                Feature(name="int32_feature", dtype=ValueType.INT32),
-                Feature(name="string_feature", dtype=ValueType.STRING),
-                Feature(name="bytes_feature", dtype=ValueType.BYTES),
-                Feature(name="bool_feature", dtype=ValueType.BOOL),
-                Feature(name="double_feature", dtype=ValueType.DOUBLE),
-                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
-                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
-                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
-                Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
-                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
-                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
-                Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
+                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64).to_proto(),
+                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING).to_proto(),
+                Feature(
+                    name="fs1-my-feature-3", dtype=ValueType.STRING_LIST
+                ).to_proto(),
+                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST).to_proto(),
             ],
-            max_age=Duration(seconds=3600),
+            entities=["fs1-my-entity-1"],
+            labels={"team": "matchmaking"},
+            batch_source=batch_source.to_proto(),
+            stream_source=stream_source.to_proto(),
         )
 
-        # Register with Feast core
-        test_client.apply(all_types_fs)
+        # Register Feature Table with Core
+        test_client.apply_feature_table(ft1)
 
-        mocker.patch.object(
-            test_client._core_service,
-            "GetFeatureSet",
-            return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()),
-        )
+        feature_tables = test_client.list_feature_tables()
 
-        # Need to create a mock producer
-        with patch("feast.client.get_producer"):
-            # Ingest data into Feast
-            test_client.ingest(all_types_fs, dataframe)
+        # List Feature Tables
+        assert (
+            len(feature_tables) == 1
+            and feature_tables[0].name == "my-feature-table-1"
+            and feature_tables[0].features[0].name == "fs1-my-feature-1"
+            and feature_tables[0].features[0].value_type == ValueProto.ValueType.INT64
+            and feature_tables[0].features[1].name == "fs1-my-feature-2"
+            and feature_tables[0].features[1].value_type == ValueProto.ValueType.STRING
+            and feature_tables[0].features[2].name == "fs1-my-feature-3"
+            and feature_tables[0].features[2].value_type
+            == ValueProto.ValueType.STRING_LIST
+            and feature_tables[0].features[3].name == "fs1-my-feature-4"
+            and feature_tables[0].features[3].value_type
+            == ValueProto.ValueType.BYTES_LIST
+            and feature_tables[0].entities[0] == "fs1-my-entity-1"
+        )
 
     @patch("grpc.channel_ready_future")
     def test_secure_channel_creation_with_secure_client(
@@ -1058,7 +683,7 @@ def test_secure_channel_creation_with_secure_core_url(
     def test_auth_success_with_secure_channel_on_core_url(
         self, secure_core_client_with_auth
     ):
-        secure_core_client_with_auth.list_feature_sets()
+        secure_core_client_with_auth.list_feature_tables()
 
     def test_auth_success_with_insecure_channel_on_core_url(
         self, insecure_core_server_with_auth
@@ -1068,10 +693,10 @@ def test_auth_success_with_insecure_channel_on_core_url(
             enable_auth=True,
             auth_token=_FAKE_JWT_TOKEN,
         )
-        client.list_feature_sets()
+        client.list_feature_tables()
 
     def test_no_auth_sent_when_auth_disabled(
         self, insecure_core_server_that_blocks_auth
     ):
         client = Client(core_url=f"localhost:{insecure_core_server_that_blocks_auth}")
-        client.list_feature_sets()
+        client.list_feature_tables()
diff --git a/sdk/python/tests/test_entity.py b/sdk/python/tests/test_entity.py
index 4d146da729..d05412c3bb 100644
--- a/sdk/python/tests/test_entity.py
+++ b/sdk/python/tests/test_entity.py
@@ -21,7 +21,7 @@
 
 from feast.client import Client
 from feast.core import CoreService_pb2_grpc as Core
-from feast.entity import EntityV2
+from feast.entity import Entity
 from feast.value_type import ValueType
 from feast_core_server import CoreServicer
 
@@ -52,7 +52,7 @@ def client(self, server):
 
     def test_entity_import_export_yaml(self):
 
-        test_entity = EntityV2(
+        test_entity = Entity(
             name="car_driver_entity",
             description="Driver entity for car rides",
             value_type=ValueType.STRING,
@@ -63,14 +63,14 @@ def test_entity_import_export_yaml(self):
         string_yaml = test_entity.to_yaml()
 
         # Create a new entity object from the YAML string
-        actual_entity_from_string = EntityV2.from_yaml(string_yaml)
+        actual_entity_from_string = Entity.from_yaml(string_yaml)
 
         # Ensure equality is upheld to original entity
         assert test_entity == actual_entity_from_string
 
 
 def test_entity_class_contains_labels():
-    entity = EntityV2(
+    entity = Entity(
         "my-entity",
         description="My entity",
         value_type=ValueType.STRING,
@@ -81,6 +81,6 @@ def test_entity_class_contains_labels():
 
 
 def test_entity_without_labels_empty_dict():
-    entity = EntityV2("my-entity", description="My entity", value_type=ValueType.STRING)
+    entity = Entity("my-entity", description="My entity", value_type=ValueType.STRING)
     assert entity.labels == dict()
     assert len(entity.labels) == 0
diff --git a/sdk/python/tests/test_feature.py b/sdk/python/tests/test_feature.py
deleted file mode 100644
index bc83683e0f..0000000000
--- a/sdk/python/tests/test_feature.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright 2019 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from feast.feature import FeatureRef
-
-
-class TestFeatureRef:
-    def test_str_ref(self):
-        original_ref = FeatureRef(feature_set="test", name="test")
-        ref_str = repr(original_ref)
-        parsed_ref = FeatureRef.from_str(ref_str)
-        assert original_ref == parsed_ref
diff --git a/sdk/python/tests/test_feature_set.py b/sdk/python/tests/test_feature_set.py
deleted file mode 100644
index cf78cf048b..0000000000
--- a/sdk/python/tests/test_feature_set.py
+++ /dev/null
@@ -1,394 +0,0 @@
-# Copyright 2019 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pathlib
-from collections import OrderedDict
-from concurrent import futures
-from datetime import datetime
-
-import dataframes
-import grpc
-import pandas as pd
-import pytest
-import pytz
-from google.protobuf import json_format
-
-from feast.client import Client
-from feast.core import CoreService_pb2_grpc as Core
-from feast.entity import Entity
-from feast.feature_set import (
-    Feature,
-    FeatureSet,
-    FeatureSetRef,
-    _make_tfx_schema_domain_info_inline,
-)
-from feast.value_type import ValueType
-from feast_core_server import CoreServicer
-from tensorflow_metadata.proto.v0 import schema_pb2
-
-CORE_URL = "core.feast.local"
-SERVING_URL = "serving.feast.local"
-
-
-class TestFeatureSet:
-    @pytest.fixture(scope="function")
-    def server(self):
-        server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
-        Core.add_CoreServiceServicer_to_server(CoreServicer(), server)
-        server.add_insecure_port("[::]:50051")
-        server.start()
-        yield server
-        server.stop(0)
-
-    @pytest.fixture
-    def client(self, server):
-        return Client(core_url="localhost:50051")
-
-    def test_add_remove_features_success(self):
-        fs = FeatureSet("my-feature-set")
-        fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64))
-        fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64))
-        fs.drop(name="my-feature-1")
-        assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"
-
-    def test_remove_feature_failure(self):
-        with pytest.raises(KeyError):
-            fs = FeatureSet("my-feature-set")
-            fs.drop(name="my-feature-1")
-
-    def test_update_from_source_failure(self):
-        with pytest.raises(Exception):
-            df = pd.DataFrame()
-            fs = FeatureSet("driver-feature-set")
-            fs.infer_fields_from_df(df)
-
-    @pytest.mark.parametrize(
-        "dataframe,feature_count,entity_count,discard_unused_fields,features,entities",
-        [
-            (
-                dataframes.GOOD,
-                3,
-                1,
-                True,
-                [],
-                [Entity(name="entity_id", dtype=ValueType.INT64)],
-            ),
-            (
-                dataframes.GOOD_FIVE_FEATURES,
-                5,
-                1,
-                True,
-                [],
-                [Entity(name="entity_id", dtype=ValueType.INT64)],
-            ),
-            (
-                dataframes.GOOD_FIVE_FEATURES,
-                6,
-                1,
-                True,
-                [Feature(name="feature_6", dtype=ValueType.INT64)],
-                [Entity(name="entity_id", dtype=ValueType.INT64)],
-            ),
-            (
-                dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES,
-                5,
-                2,
-                True,
-                [],
-                [
-                    Entity(name="entity_1_id", dtype=ValueType.INT64),
-                    Entity(name="entity_2_id", dtype=ValueType.INT64),
-                ],
-            ),
-            (
-                dataframes.GOOD_FIVE_FEATURES_TWO_ENTITIES,
-                6,
-                3,
-                False,
-                [],
-                [
-                    Entity(name="entity_1_id", dtype=ValueType.INT64),
-                    Entity(name="entity_2_id", dtype=ValueType.INT64),
-                ],
-            ),
-            (
-                dataframes.NO_FEATURES,
-                0,
-                1,
-                True,
-                [],
-                [Entity(name="entity_id", dtype=ValueType.INT64)],
-            ),
-            (
-                pd.DataFrame(
-                    {
-                        "datetime": [
-                            datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)
-                        ]
-                    }
-                ),
-                0,
-                0,
-                True,
-                [],
-                [],
-            ),
-        ],
-        ids=[
-            "Test small dataframe update with hardcoded entity",
-            "Test larger dataframe update with hardcoded entity",
-            "Test larger dataframe update with hardcoded entity and feature",
-            "Test larger dataframe update with two hardcoded entities and discarding of existing fields",
-            "Test larger dataframe update with two hardcoded entities and retention of existing fields",
-            "Test dataframe with no featuresdataframe",
-            "Test empty dataframe",
-        ],
-    )
-    def test_add_features_from_df_success(
-        self,
-        dataframe,
-        feature_count,
-        entity_count,
-        discard_unused_fields,
-        features,
-        entities,
-    ):
-        my_feature_set = FeatureSet(
-            name="my_feature_set",
-            features=[Feature(name="dummy_f1", dtype=ValueType.INT64)],
-            entities=[Entity(name="dummy_entity_1", dtype=ValueType.INT64)],
-        )
-        my_feature_set.infer_fields_from_df(
-            dataframe,
-            discard_unused_fields=discard_unused_fields,
-            features=features,
-            entities=entities,
-        )
-        assert len(my_feature_set.features) == feature_count
-        assert len(my_feature_set.entities) == entity_count
-
-    def test_import_tfx_schema(self):
-        tests_folder = pathlib.Path(__file__).parent
-        test_input_schema_json = open(
-            tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json"
-        ).read()
-        test_input_schema = schema_pb2.Schema()
-        json_format.Parse(test_input_schema_json, test_input_schema)
-
-        feature_set = FeatureSet(
-            name="bikeshare",
-            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
-            features=[
-                Feature(name="name", dtype=ValueType.STRING),
-                Feature(name="status", dtype=ValueType.STRING),
-                Feature(name="latitude", dtype=ValueType.FLOAT),
-                Feature(name="longitude", dtype=ValueType.FLOAT),
-                Feature(name="location", dtype=ValueType.STRING),
-            ],
-        )
-
-        # Before update
-        for entity in feature_set.entities:
-            assert entity.presence is None
-            assert entity.shape is None
-        for feature in feature_set.features:
-            assert feature.presence is None
-            assert feature.shape is None
-            assert feature.string_domain is None
-            assert feature.float_domain is None
-            assert feature.int_domain is None
-
-        feature_set.import_tfx_schema(test_input_schema)
-
-        # After update
-        for feature in feature_set.features:
-            assert feature.presence is not None
-            assert feature.shape is not None
-            if feature.name in ["location", "name", "status"]:
-                assert feature.string_domain is not None
-            elif feature.name in ["latitude", "longitude"]:
-                assert feature.float_domain is not None
-            elif feature.name in ["station_id"]:
-                assert feature.int_domain is not None
-
-    def test_export_tfx_schema(self):
-        tests_folder = pathlib.Path(__file__).parent
-        test_input_feature_set = FeatureSet.from_yaml(
-            str(
-                tests_folder
-                / "data"
-                / "tensorflow_metadata"
-                / "bikeshare_feature_set.yaml"
-            )
-        )
-
-        expected_schema_json = open(
-            tests_folder / "data" / "tensorflow_metadata" / "bikeshare_schema.json"
-        ).read()
-        expected_schema = schema_pb2.Schema()
-        json_format.Parse(expected_schema_json, expected_schema)
-        _make_tfx_schema_domain_info_inline(expected_schema)
-
-        actual_schema = test_input_feature_set.export_tfx_schema()
-
-        assert len(actual_schema.feature) == len(expected_schema.feature)
-        for actual, expected in zip(actual_schema.feature, expected_schema.feature):
-            assert actual.SerializeToString() == expected.SerializeToString()
-
-    def test_feature_set_import_export_yaml(self):
-
-        test_feature_set = FeatureSet(
-            name="bikeshare",
-            entities=[Entity(name="station_id", dtype=ValueType.INT64)],
-            features=[
-                Feature(name="name", dtype=ValueType.STRING),
-                Feature(name="longitude", dtype=ValueType.FLOAT),
-                Feature(name="location", dtype=ValueType.STRING),
-            ],
-        )
-
-        # Create a string YAML representation of the feature set
-        string_yaml = test_feature_set.to_yaml()
-
-        # Create a new feature set object from the YAML string
-        actual_feature_set_from_string = FeatureSet.from_yaml(string_yaml)
-
-        # Ensure equality is upheld to original feature set
-        assert test_feature_set == actual_feature_set_from_string
-
-
-def make_tfx_schema_domain_info_inline(schema):
-    # Copy top-level domain info defined in the schema to inline definition.
-    # One use case is in FeatureSet which does not have access to the top-level domain
-    # info.
-    domain_ref_to_string_domain = {d.name: d for d in schema.string_domain}
-    domain_ref_to_float_domain = {d.name: d for d in schema.float_domain}
-    domain_ref_to_int_domain = {d.name: d for d in schema.int_domain}
-
-    for feature in schema.feature:
-        domain_info_case = feature.WhichOneof("domain_info")
-        if domain_info_case == "domain":
-            domain_ref = feature.domain
-            if domain_ref in domain_ref_to_string_domain:
-                feature.string_domain.MergeFrom(domain_ref_to_string_domain[domain_ref])
-            elif domain_ref in domain_ref_to_float_domain:
-                feature.float_domain.MergeFrom(domain_ref_to_float_domain[domain_ref])
-            elif domain_ref in domain_ref_to_int_domain:
-                feature.int_domain.MergeFrom(domain_ref_to_int_domain[domain_ref])
-
-
-def test_feature_set_class_contains_labels():
-    fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
-    assert "key1" in fs.labels.keys() and fs.labels["key1"] == "val1"
-    assert "key2" in fs.labels.keys() and fs.labels["key2"] == "val2"
-
-
-def test_feature_class_contains_labels():
-    fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
-    fs.add(
-        Feature(
-            name="my-feature-1",
-            dtype=ValueType.INT64,
-            labels={"feature_key1": "feature_val1"},
-        )
-    )
-    assert "feature_key1" in fs.features[0].labels.keys()
-    assert fs.features[0].labels["feature_key1"] == "feature_val1"
-
-
-def test_feature_set_without_labels_empty_dict():
-    fs = FeatureSet("my-feature-set")
-    assert fs.labels == OrderedDict()
-    assert len(fs.labels) == 0
-
-
-def test_feature_without_labels_empty_dict():
-    f = Feature("my feature", dtype=ValueType.INT64)
-    assert f.labels == OrderedDict()
-    assert len(f.labels) == 0
-
-
-def test_set_label_feature_set():
-    fs = FeatureSet("my-feature-set")
-    fs.set_label("k1", "v1")
-    assert fs.labels["k1"] == "v1"
-
-
-def test_set_labels_overwrites_existing():
-    fs = FeatureSet("my-feature-set")
-    fs.set_label("k1", "v1")
-    fs.set_label("k1", "v2")
-    assert fs.labels["k1"] == "v2"
-
-
-def test_remove_labels_empty_failure():
-    fs = FeatureSet("my-feature-set")
-    with pytest.raises(KeyError):
-        fs.remove_label("key1")
-
-
-def test_remove_labels_invalid_key_failure():
-    fs = FeatureSet("my-feature-set")
-    fs.set_label("k1", "v1")
-    with pytest.raises(KeyError):
-        fs.remove_label("key1")
-
-
-def test_unequal_feature_based_on_labels():
-    f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
-    f2 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
-    assert f1 == f2
-    f3 = Feature(name="feature-1", dtype=ValueType.INT64)
-    assert f1 != f3
-    f4 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "notv1"})
-    assert f1 != f4
-
-
-def test_unequal_feature_set_based_on_labels():
-    fs1 = FeatureSet("my-feature-set")
-    fs2 = FeatureSet("my-feature-set")
-    assert fs1 == fs2
-    fs1.set_label("k1", "v1")
-    fs2.set_label("k1", "v1")
-    assert fs1 == fs2
-    fs2.set_label("k1", "unequal")
-    assert not fs1 == fs2
-
-
-def test_unequal_feature_set_other_has_no_labels():
-    fs1 = FeatureSet("my-feature-set")
-    fs2 = FeatureSet("my-feature-set")
-    assert fs1 == fs2
-    fs1.set_label("k1", "v1")
-    assert not fs1 == fs2
-
-
-def test_unequal_feature_other_has_no_labels():
-    f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
-    f2 = Feature(name="feature-1", dtype=ValueType.INT64)
-    assert f1 != f2
-
-
-class TestFeatureSetRef:
-    def test_from_feature_set(self):
-        feature_set = FeatureSet("test", "test")
-        ref = FeatureSetRef.from_feature_set(feature_set)
-
-        assert ref.name == "test"
-        assert ref.project == "test"
-
-    def test_str_ref(self):
-        original_ref = FeatureSetRef(project="test", name="test")
-        ref_str = repr(original_ref)
-        parsed_ref = FeatureSetRef.from_str(ref_str)
-        assert original_ref == parsed_ref
diff --git a/sdk/python/tests/test_feature_table.py b/sdk/python/tests/test_feature_table.py
index a7a8849c76..8a1059bcb6 100644
--- a/sdk/python/tests/test_feature_table.py
+++ b/sdk/python/tests/test_feature_table.py
@@ -22,8 +22,8 @@
 from feast.client import Client
 from feast.core import CoreService_pb2_grpc as Core
 from feast.data_source import DataSource, FileOptions, KafkaOptions, SourceType
+from feast.feature import Feature
 from feast.feature_table import FeatureTable
-from feast.feature_v2 import FeatureV2
 from feast.value_type import ValueType
 from feast_core_server import CoreServicer
 
@@ -82,8 +82,8 @@ def test_feature_table_import_export_yaml(self):
         test_feature_table = FeatureTable(
             name="car_driver",
             features=[
-                FeatureV2(name="ride_distance", dtype=ValueType.FLOAT).to_proto(),
-                FeatureV2(name="ride_duration", dtype=ValueType.STRING).to_proto(),
+                Feature(name="ride_distance", dtype=ValueType.FLOAT).to_proto(),
+                Feature(name="ride_duration", dtype=ValueType.STRING).to_proto(),
             ],
             entities=["car_driver_entity"],
             labels={"team": "matchmaking"},
diff --git a/sdk/python/tests/test_job.py b/sdk/python/tests/test_job.py
deleted file mode 100644
index 092130401e..0000000000
--- a/sdk/python/tests/test_job.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#
-# Copyright 2020 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import tempfile
-
-import boto3
-import grpc
-import pandas as pd
-import pandavro
-import pytest
-from moto import mock_s3
-from pandas.testing import assert_frame_equal
-from pytest import fixture, raises
-
-from feast.job import JobProto, RetrievalJob
-from feast.serving import ServingService_pb2_grpc as Serving
-from feast.serving.ServingService_pb2 import DataFormat, GetJobResponse
-from feast.serving.ServingService_pb2 import Job as BatchRetrievalJob
-from feast.serving.ServingService_pb2 import JobStatus, JobType
-
-BUCKET = "test_bucket"
-
-TEST_DATA_FRAME = pd.DataFrame(
-    {
-        "driver": [1001, 1002, 1003],
-        "transaction": [1001, 1002, 1003],
-        "driver_id": [1001, 1002, 1003],
-    }
-)
-
-
-class TestRetrievalJob:
-    @fixture
-    def retrieve_job(self):
-
-        serving_service_stub = Serving.ServingServiceStub(grpc.insecure_channel(""))
-        job_proto = JobProto(
-            id="123",
-            type=JobType.JOB_TYPE_DOWNLOAD,
-            status=JobStatus.JOB_STATUS_RUNNING,
-        )
-        return RetrievalJob(job_proto, serving_service_stub)
-
-    @fixture
-    def avro_data_path(self):
-        final_results = tempfile.mktemp()
-        pandavro.to_avro(file_path_or_buffer=final_results, df=TEST_DATA_FRAME)
-        return final_results
-
-    def test_to_dataframe_local_file_staging_should_pass(
-        self, retrieve_job, avro_data_path, mocker
-    ):
-        mocker.patch.object(
-            retrieve_job.serving_stub,
-            "GetJob",
-            return_value=GetJobResponse(
-                job=BatchRetrievalJob(
-                    id="123",
-                    type=JobType.JOB_TYPE_DOWNLOAD,
-                    status=JobStatus.JOB_STATUS_DONE,
-                    file_uris=[f"file://{avro_data_path}"],
-                    data_format=DataFormat.DATA_FORMAT_AVRO,
-                )
-            ),
-        )
-        retrived_df = retrieve_job.to_dataframe()
-        assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)
-
-    @mock_s3
-    def test_to_dataframe_s3_file_staging_should_pass(
-        self, retrieve_job, avro_data_path, mocker
-    ):
-        s3_client = boto3.client("s3")
-        target = "test_proj/test_features.avro"
-        s3_client.create_bucket(Bucket=BUCKET)
-        with open(avro_data_path, "rb") as data:
-            s3_client.upload_fileobj(data, BUCKET, target)
-
-        mocker.patch.object(
-            retrieve_job.serving_stub,
-            "GetJob",
-            return_value=GetJobResponse(
-                job=BatchRetrievalJob(
-                    id="123",
-                    type=JobType.JOB_TYPE_DOWNLOAD,
-                    status=JobStatus.JOB_STATUS_DONE,
-                    file_uris=[f"s3://{BUCKET}/{target}"],
-                    data_format=DataFormat.DATA_FORMAT_AVRO,
-                )
-            ),
-        )
-        retrived_df = retrieve_job.to_dataframe()
-        assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)
-
-    @pytest.mark.parametrize(
-        "job_proto,exception",
-        [
-            (
-                GetJobResponse(
-                    job=BatchRetrievalJob(
-                        id="123",
-                        type=JobType.JOB_TYPE_DOWNLOAD,
-                        status=JobStatus.JOB_STATUS_DONE,
-                        data_format=DataFormat.DATA_FORMAT_AVRO,
-                        error="Testing job failure",
-                    )
-                ),
-                Exception,
-            ),
-            (
-                GetJobResponse(
-                    job=BatchRetrievalJob(
-                        id="123",
-                        type=JobType.JOB_TYPE_DOWNLOAD,
-                        status=JobStatus.JOB_STATUS_DONE,
-                        data_format=DataFormat.DATA_FORMAT_INVALID,
-                    )
-                ),
-                Exception,
-            ),
-        ],
-        ids=["when_retrieve_job_fails", "when_data_format_is_not_avro"],
-    )
-    def test_to_dataframe_s3_file_staging_should_raise(
-        self, retrieve_job, mocker, job_proto, exception
-    ):
-        mocker.patch.object(
-            retrieve_job.serving_stub, "GetJob", return_value=job_proto,
-        )
-        with raises(exception):
-            retrieve_job.to_dataframe()

From 306ec08bb67397ba1e7d77bed0f44eb9592ff075 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 12:11:57 +0800
Subject: [PATCH 04/34] Refactor and parallelize e2e redis tests

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 infra/scripts/test-docker-compose.sh          |    2 +-
 .../scripts/test-end-to-end-redis-cluster.sh  |    5 +-
 infra/scripts/test-end-to-end.sh              |    3 +-
 tests/e2e/conftest.py                         |   18 +-
 tests/e2e/pytest.ini                          |    5 +-
 .../all_types_parquet/all_types_parquet.yaml  |   34 -
 tests/e2e/redis/basic-ingest-redis-serving.py | 1539 -----------------
 tests/e2e/redis/basic/cust_trans_fs.yaml      |   14 -
 tests/e2e/redis/basic/data.csv                |    3 -
 tests/e2e/redis/basic/driver_fs.yaml          |   12 -
 .../large_volume/cust_trans_large_fs.yaml     |   12 -
 .../redis/parallel-ingest-redis-serving.py    |  192 ++
 tests/e2e/redis/specifications/dev_ft.yaml    |   38 +
 tests/e2e/requirements.txt                    |    1 +
 14 files changed, 258 insertions(+), 1620 deletions(-)
 delete mode 100644 tests/e2e/redis/all_types_parquet/all_types_parquet.yaml
 delete mode 100644 tests/e2e/redis/basic-ingest-redis-serving.py
 delete mode 100644 tests/e2e/redis/basic/cust_trans_fs.yaml
 delete mode 100644 tests/e2e/redis/basic/data.csv
 delete mode 100644 tests/e2e/redis/basic/driver_fs.yaml
 delete mode 100644 tests/e2e/redis/large_volume/cust_trans_large_fs.yaml
 create mode 100644 tests/e2e/redis/parallel-ingest-redis-serving.py
 create mode 100644 tests/e2e/redis/specifications/dev_ft.yaml

diff --git a/infra/scripts/test-docker-compose.sh b/infra/scripts/test-docker-compose.sh
index 35e1593dd6..45105d4839 100755
--- a/infra/scripts/test-docker-compose.sh
+++ b/infra/scripts/test-docker-compose.sh
@@ -63,4 +63,4 @@ export FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .N
 ${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS}:6566 --timeout=120
 
 # Run e2e tests for Redis
-docker exec feast_jupyter_1 bash -c 'cd /feast/tests/e2e/redis && pytest --verbose -rs basic-ingest-redis-serving.py --core_url core:6565 --serving_url=online_serving:6566 --jobcontroller_url=jobcontroller:6570 --kafka_brokers=kafka:9092'
+docker exec feast_jupyter_1 bash -c 'cd /feast/tests/e2e/redis && pytest --verbose -rs parallel-ingest-redis-serving.py --core_url core:6565 --serving_url=online_serving:6566 --kafka_brokers=kafka:9092'
diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index ba29961de6..544c1f4d3d 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -73,7 +73,7 @@ feast:
         # Connection string specifies the IP and ports of Redis instances in Redis cluster
         connection_string: "localhost:7000,localhost:7001,localhost:7002,localhost:7003,localhost:7004,localhost:7005"
         flush_frequency_seconds: 1
-      # Subscriptions indicate which feature sets needs to be retrieved and used to populate this store
+      # Subscriptions indicate which feature tables needs to be retrieved and used to populate this store
       subscriptions:
         # Wildcards match all options. No filtering is done.
         - name: "*"
@@ -102,7 +102,8 @@ ORIGINAL_DIR=$(pwd)
 cd tests/e2e
 
 set +e
-pytest redis/* --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+CORE_NO=$(nproc --all)
+pytest redis/parallel-ingest-redis-serving.py -n CORE_NO --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/infra/scripts/test-end-to-end.sh b/infra/scripts/test-end-to-end.sh
index 8f05efa9df..e65c72b0ba 100755
--- a/infra/scripts/test-end-to-end.sh
+++ b/infra/scripts/test-end-to-end.sh
@@ -119,7 +119,8 @@ cd tests/e2e
 
 set +e
 export GOOGLE_APPLICATION_CREDENTIALS=/etc/gcloud/service-account.json
-pytest redis/* --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+CORE_NO=$(nproc --all)
+pytest redis/parallel-ingest-redis-serving.py -n CORE_NO --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index ea2b809f4f..73d141145b 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -1,10 +1,26 @@
+import pytest
+
+
 def pytest_addoption(parser):
     parser.addoption("--core_url", action="store", default="localhost:6565")
     parser.addoption("--serving_url", action="store", default="localhost:6566")
-    parser.addoption("--jobcontroller_url", action="store", default="localhost:6570")
     parser.addoption("--allow_dirty", action="store", default="False")
     parser.addoption(
         "--gcs_path", action="store", default="gs://feast-templocation-kf-feast/"
     )
     parser.addoption("--enable_auth", action="store", default="False")
     parser.addoption("--kafka_brokers", action="store", default="localhost:9092")
+
+
+def pytest_runtest_makereport(item, call):
+    if "incremental" in item.keywords:
+        if call.excinfo is not None:
+            parent = item.parent
+            parent._previousfailed = item
+
+
+def pytest_runtest_setup(item):
+    if "incremental" in item.keywords:
+        previousfailed = getattr(item.parent, "_previousfailed", None)
+        if previousfailed is not None:
+            pytest.xfail("previous test failed (%s)" % previousfailed.name)
diff --git a/tests/e2e/pytest.ini b/tests/e2e/pytest.ini
index b0e5a945f5..0e44395b67 100644
--- a/tests/e2e/pytest.ini
+++ b/tests/e2e/pytest.ini
@@ -1,3 +1,6 @@
 [pytest]
 filterwarnings =
-    ignore::DeprecationWarning
\ No newline at end of file
+    ignore::DeprecationWarning
+
+markers =
+    incremental: Skip subsequent tests if the previous test failed.
diff --git a/tests/e2e/redis/all_types_parquet/all_types_parquet.yaml b/tests/e2e/redis/all_types_parquet/all_types_parquet.yaml
deleted file mode 100644
index b054913c65..0000000000
--- a/tests/e2e/redis/all_types_parquet/all_types_parquet.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-kind: feature_set
-spec:
-  name: all_types_parquet
-  entities:
-    - name: customer_id
-      valueType: INT64
-  features:
-    - name: int32_feature_parquet
-      valueType: INT32
-    - name: int64_feature_parquet
-      valueType: INT64
-    - name: float_feature_parquet
-      valueType: DOUBLE
-    - name: double_feature_parquet
-      valueType: DOUBLE
-    - name: string_feature_parquet
-      valueType: STRING
-    - name: bytes_feature_parquet
-      valueType: BYTES
-    - name: int32_list_feature_parquet
-      valueType: INT64_LIST
-    - name: int64_list_feature_parquet
-      valueType: INT64_LIST
-    - name: float_list_feature_parquet
-      valueType: DOUBLE_LIST
-    - name: double_list_feature_parquet
-      valueType: DOUBLE_LIST
-    - name: string_list_feature_parquet
-      valueType: STRING_LIST
-    - name: bytes_list_feature_parquet
-      valueType: BYTES_LIST
-    - name: bool_list_feature_parquet
-      valueType: BOOL_LIST
-  maxAge: 0s
diff --git a/tests/e2e/redis/basic-ingest-redis-serving.py b/tests/e2e/redis/basic-ingest-redis-serving.py
deleted file mode 100644
index 853da9f529..0000000000
--- a/tests/e2e/redis/basic-ingest-redis-serving.py
+++ /dev/null
@@ -1,1539 +0,0 @@
-import math
-import os
-import random
-import tempfile
-import time
-import uuid
-from copy import copy
-from datetime import datetime, timedelta
-
-import grpc
-import numpy as np
-import pandas as pd
-import pytest
-import pytz
-from google.protobuf.duration_pb2 import Duration
-
-from feast.client import Client
-from feast.config import Config
-from feast.constants import CONFIG_AUTH_PROVIDER
-from feast.contrib.job_controller.client import Client as JCClient
-from feast.core import CoreService_pb2
-from feast.core.CoreService_pb2 import ApplyFeatureSetResponse, GetFeatureSetResponse
-from feast.core.CoreService_pb2_grpc import CoreServiceStub
-from feast.core.IngestionJob_pb2 import IngestionJobStatus
-from feast.entity import Entity
-from feast.feature import Feature
-from feast.feature_set import FeatureSet, FeatureSetRef
-from feast.grpc.auth import get_auth_metadata_plugin
-from feast.serving.ServingService_pb2 import GetOnlineFeaturesResponse
-from feast.source import KafkaSource
-from feast.type_map import ValueType
-from feast.types.Value_pb2 import Int64List
-from feast.types.Value_pb2 import Value as Value
-from feast.wait import wait_retry_backoff
-
-FLOAT_TOLERANCE = 0.00001
-PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
-DIR_PATH = os.path.dirname(os.path.realpath(__file__))
-AUTH_PROVIDER = "google"
-
-
-def basic_dataframe(entities, features, ingest_time, n_size, null_features=[]):
-    """
-    Generate a basic feast-ingestable dataframe for testing.
-    Entity value incrementally increase from 1 to n_size
-    Features values are randomlly generated floats.
-    entities - names of entities
-    features - names of the features
-    ingest_time - ingestion timestamp
-    n_size - no. of rows in the generated dataframe.
-    null_features - names of features that contain null values
-    Returns the generated dataframe
-    """
-    df_dict = {
-        "datetime": [ingest_time.replace(tzinfo=pytz.utc) for _ in range(n_size)],
-    }
-    for entity_name in entities:
-        df_dict[entity_name] = list(range(1, n_size + 1))
-    for feature_name in features:
-        df_dict[feature_name] = [np.random.rand() for _ in range(n_size)]
-    for null_feature_name in null_features:
-        df_dict[null_feature_name] = [None for _ in range(n_size)]
-    return pd.DataFrame(df_dict)
-
-
-def check_online_response(feature_ref, ingest_df, response):
-    """
-    Check the feature value and status in the given online serving response.
-    feature_refs - string feature ref used to access feature in response
-    ingest_df - dataframe of ingested values
-    response - response to extract retrieved feature value and metadata
-    Returns True if given response has expected feature value and metadata, otherwise False.
-    """
-    feature_ref_splits = feature_ref.split(":")
-    if len(feature_ref_splits) == 1:
-        feature_name = feature_ref
-    else:
-        _, feature_name = feature_ref_splits
-
-    returned_status = response.field_values[0].statuses[feature_ref]
-    if ingest_df.loc[0, feature_name] is None:
-        return returned_status == GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE
-    else:
-        sent_value = float(ingest_df.iloc[0][feature_name])
-        returned_value = float(response.field_values[0].fields[feature_ref].float_val)
-        return (
-            math.isclose(sent_value, returned_value, abs_tol=FLOAT_TOLERANCE)
-            and returned_status == GetOnlineFeaturesResponse.FieldStatus.PRESENT
-        )
-
-
-@pytest.fixture(scope="module")
-def core_url(pytestconfig):
-    return pytestconfig.getoption("core_url")
-
-
-@pytest.fixture(scope="module")
-def serving_url(pytestconfig):
-    return pytestconfig.getoption("serving_url")
-
-
-@pytest.fixture(scope="module")
-def jobcontroller_url(pytestconfig):
-    return pytestconfig.getoption("jobcontroller_url")
-
-
-@pytest.fixture(scope="module")
-def allow_dirty(pytestconfig):
-    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
-
-
-@pytest.fixture(scope="module")
-def enable_auth(pytestconfig):
-    return True if pytestconfig.getoption("enable_auth").lower() == "true" else False
-
-
-@pytest.fixture(scope="module")
-def kafka_brokers(pytestconfig):
-    return pytestconfig.getoption("kafka_brokers")
-
-
-@pytest.fixture(scope="module")
-def client(core_url, serving_url, allow_dirty, enable_auth):
-    # Get client for core and serving
-    # if enable_auth is True, Google Id token will be
-    # passed in the metadata for authentication.
-    client = Client(
-        core_url=core_url,
-        serving_url=serving_url,
-        enable_auth=enable_auth,
-        auth_provider=AUTH_PROVIDER,
-    )
-    client.create_project(PROJECT_NAME)
-
-    # Ensure Feast core is active, but empty
-    if not allow_dirty:
-        feature_sets = client.list_feature_sets()
-        if len(feature_sets) > 0:
-            raise Exception(
-                "Feast cannot have existing feature sets registered. Exiting tests."
-            )
-
-    return client
-
-
-@pytest.fixture(scope="module")
-def jobcontroller_client(jobcontroller_url):
-    client = JCClient(jobcontroller_url=jobcontroller_url)
-    return client
-
-
-@pytest.fixture(scope="module")
-def ingest_time():
-    return datetime.utcnow()
-
-
-@pytest.fixture(scope="module")
-def cust_trans_df(ingest_time):
-    return basic_dataframe(
-        entities=["customer_id"],
-        features=["daily_transactions", "total_transactions"],
-        null_features=["null_values"],
-        ingest_time=ingest_time,
-        n_size=5,
-    )
-
-
-@pytest.fixture(scope="module")
-def driver_df(ingest_time):
-    return basic_dataframe(
-        entities=["driver_id"],
-        features=["rating", "cost"],
-        ingest_time=ingest_time,
-        n_size=5,
-    )
-
-
-def test_version_returns_results(client):
-    version_info = client.version()
-    assert not version_info["core"] == "not configured"
-    assert not version_info["serving"] == "not configured"
-
-
-def test_list_feature_sets_when_auth_enabled_should_raise(enable_auth):
-    if enable_auth:
-        client = Client(core_url=core_url, serving_url=serving_url, enable_auth=False)
-        with pytest.raises(ConnectionError):
-            client.list_feature_sets()
-
-
-@pytest.mark.timeout(45)
-@pytest.mark.run(order=10)
-def test_basic_register_feature_set_success(client):
-    # Register feature set without project
-    cust_trans_fs_expected = FeatureSet.from_yaml(
-        f"{DIR_PATH}/basic/cust_trans_fs.yaml"
-    )
-    driver_fs_expected = FeatureSet.from_yaml(f"{DIR_PATH}/basic/driver_fs.yaml")
-    client.apply(cust_trans_fs_expected)
-    client.apply(driver_fs_expected)
-    cust_trans_fs_actual = client.get_feature_set("customer_transactions")
-    assert cust_trans_fs_actual == cust_trans_fs_expected
-    driver_fs_actual = client.get_feature_set("driver")
-    assert driver_fs_actual == driver_fs_expected
-
-    # Register feature set with project
-    cust_trans_fs_expected = FeatureSet.from_yaml(
-        f"{DIR_PATH}/basic/cust_trans_fs.yaml"
-    )
-    client.set_project(PROJECT_NAME)
-    client.apply(cust_trans_fs_expected)
-    cust_trans_fs_actual = client.get_feature_set(
-        "customer_transactions", project=PROJECT_NAME
-    )
-    assert cust_trans_fs_actual == cust_trans_fs_expected
-
-    # Register feature set with labels
-    driver_unlabelled_fs = FeatureSet(
-        "driver_unlabelled",
-        features=[Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    driver_labeled_fs_expected = FeatureSet(
-        "driver_labeled",
-        features=[Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-        labels={"key1": "val1"},
-    )
-    client.set_project(PROJECT_NAME)
-    client.apply(driver_unlabelled_fs)
-    client.apply(driver_labeled_fs_expected)
-    driver_fs_actual = client.list_feature_sets(
-        project=PROJECT_NAME, labels={"key1": "val1"}
-    )[0]
-    assert driver_fs_actual == driver_labeled_fs_expected
-
-    # reset client's project for other tests
-    client.set_project()
-
-
-@pytest.mark.timeout(300)
-@pytest.mark.run(order=11)
-def test_basic_ingest_success(client, cust_trans_df, driver_df):
-    cust_trans_fs = client.get_feature_set(name="customer_transactions")
-    driver_fs = client.get_feature_set(name="driver")
-
-    # Ingest customer transaction data
-    client.ingest(cust_trans_fs, cust_trans_df)
-    client.ingest(driver_fs, driver_df)
-    time.sleep(5)
-
-
-@pytest.mark.timeout(90)
-@pytest.mark.run(order=12)
-def test_basic_retrieve_online_success(client, cust_trans_df):
-    feature_refs = ["daily_transactions", "total_transactions", "null_values"]
-
-    # Poll serving for feature values until the correct values are returned
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=[
-                {"customer_id": Value(int64_val=cust_trans_df.iloc[0]["customer_id"])}
-            ],
-            feature_refs=feature_refs,
-        )  # type: GetOnlineFeaturesResponse
-        is_ok = all(
-            [
-                check_online_response(ref, cust_trans_df, response)
-                for ref in feature_refs
-            ]
-        )
-        return response, is_ok
-
-    wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-
-@pytest.mark.timeout(90)
-@pytest.mark.run(order=13)
-def test_basic_retrieve_online_multiple_featureset(client, cust_trans_df, driver_df):
-    # Test retrieve with different variations of the string feature refs
-    # ie feature set inference for feature refs without specified feature set
-    feature_ref_df_mapping = [
-        ("customer_transactions:daily_transactions", cust_trans_df),
-        ("driver:rating", driver_df),
-        ("total_transactions", cust_trans_df),
-    ]
-
-    # Poll serving for feature values until the correct values are returned
-    def try_get_features():
-        feature_refs = [mapping[0] for mapping in feature_ref_df_mapping]
-        response = client.get_online_features(
-            entity_rows=[
-                {
-                    "customer_id": Value(
-                        int64_val=cust_trans_df.iloc[0]["customer_id"]
-                    ),
-                    "driver_id": Value(int64_val=driver_df.iloc[0]["driver_id"]),
-                }
-            ],
-            feature_refs=feature_refs,
-        )  # type: GetOnlineFeaturesResponse
-        is_ok = all(
-            [
-                check_online_response(ref, df, response)
-                for ref, df in feature_ref_df_mapping
-            ]
-        )
-        return response, is_ok
-
-    wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-
-@pytest.fixture(scope="module")
-def nonlist_entity_dataframe():
-    # Dataframe setup for feature retrieval with entity provided not in list format
-    N_ROWS = 2
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    customer_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "customer_id2": [i for i in range(N_ROWS)],
-            "customer2_rating": [i for i in range(N_ROWS)],
-            "customer2_cost": [float(i) + 0.5 for i in range(N_ROWS)],
-            "customer2_past_transactions_int": [[i, i + 2] for i in range(N_ROWS)],
-            "customer2_past_transactions_double": [
-                [float(i) + 0.5, float(i) + 2] for i in range(N_ROWS)
-            ],
-            "customer2_past_transactions_float": [
-                [float(i) + 0.5, float(i) + 2] for i in range(N_ROWS)
-            ],
-            "customer2_past_transactions_string": [
-                ["first_" + str(i), "second_" + str(i)] for i in range(N_ROWS)
-            ],
-            "customer2_past_transactions_bool": [[True, False] for _ in range(N_ROWS)],
-        }
-    )
-    return customer_df
-
-
-@pytest.fixture(scope="module")
-def list_entity_dataframe():
-    # Dataframe setup for feature retrieval with entity provided in list format
-    N_ROWS = 2
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    customer_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "district_ids": [
-                [np.int64(i), np.int64(i + 1), np.int64(i + 2)] for i in range(N_ROWS)
-            ],
-            "district_rating": [i for i in range(N_ROWS)],
-            "district_cost": [float(i) + 0.5 for i in range(N_ROWS)],
-            "district_past_transactions_int": [[i, i + 2] for i in range(N_ROWS)],
-            "district_past_transactions_double": [
-                [float(i) + 0.5, float(i) + 2] for i in range(N_ROWS)
-            ],
-            "district_past_transactions_float": [
-                [float(i) + 0.5, float(i) + 2] for i in range(N_ROWS)
-            ],
-            "district_past_transactions_string": [
-                ["first_" + str(i), "second_" + str(i)] for i in range(N_ROWS)
-            ],
-            "district_past_transactions_bool": [[True, False] for _ in range(N_ROWS)],
-        }
-    )
-    return customer_df
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=14)
-def test_basic_retrieve_online_entity_nonlistform(
-    client, nonlist_entity_dataframe, list_entity_dataframe
-):
-    # Case 1: Feature retrieval with multiple entities retrieval check
-    customer_fs = FeatureSet(
-        name="customer2",
-        features=[
-            Feature(name="customer2_rating", dtype=ValueType.INT64),
-            Feature(name="customer2_cost", dtype=ValueType.FLOAT),
-            Feature(name="customer2_past_transactions_int", dtype=ValueType.INT64_LIST),
-            Feature(
-                name="customer2_past_transactions_double", dtype=ValueType.DOUBLE_LIST
-            ),
-            Feature(
-                name="customer2_past_transactions_float", dtype=ValueType.FLOAT_LIST
-            ),
-            Feature(
-                name="customer2_past_transactions_string", dtype=ValueType.STRING_LIST
-            ),
-            Feature(name="customer2_past_transactions_bool", dtype=ValueType.BOOL_LIST),
-        ],
-        entities=[Entity("customer_id2", ValueType.INT64)],
-        max_age=Duration(seconds=3600),
-    )
-
-    client.set_project(PROJECT_NAME)
-    client.apply(customer_fs)
-
-    customer_fs = client.get_feature_set(name="customer2")
-    client.ingest(customer_fs, nonlist_entity_dataframe, timeout=600)
-    time.sleep(15)
-
-    online_request_entity = [{"customer_id2": 0}, {"customer_id2": 1}]
-    online_request_features = [
-        "customer2_rating",
-        "customer2_cost",
-        "customer2_past_transactions_int",
-        "customer2_past_transactions_double",
-        "customer2_past_transactions_float",
-        "customer2_past_transactions_string",
-        "customer2_past_transactions_bool",
-    ]
-    online_request_entity2 = [
-        {"customer_id2": Value(int64_val=0)},
-        {"customer_id2": Value(int64_val=1)},
-    ]
-
-    def try_get_features1():
-        response = client.get_online_features(
-            entity_rows=online_request_entity, feature_refs=online_request_features
-        )
-        is_ok = check_online_response(
-            "customer2_rating", nonlist_entity_dataframe, response
-        )
-        return response, is_ok
-
-    def try_get_features2():
-        response = client.get_online_features(
-            entity_rows=online_request_entity2, feature_refs=online_request_features
-        )
-        is_ok = check_online_response(
-            "customer2_rating", nonlist_entity_dataframe, response
-        )
-        return response, is_ok
-
-    online_features_actual1 = wait_retry_backoff(
-        retry_fn=try_get_features1,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_actual2 = wait_retry_backoff(
-        retry_fn=try_get_features2,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_expected = {
-        "customer_id2": [0, 1],
-        "customer2_rating": [0, 1],
-        "customer2_cost": [0.5, 1.5],
-        "customer2_past_transactions_int": [[0, 2], [1, 3]],
-        "customer2_past_transactions_double": [[0.5, 2.0], [1.5, 3.0]],
-        "customer2_past_transactions_float": [[0.5, 2.0], [1.5, 3.0]],
-        "customer2_past_transactions_string": [
-            ["first_0", "second_0"],
-            ["first_1", "second_1"],
-        ],
-        "customer2_past_transactions_bool": [[True, False], [True, False]],
-    }
-
-    assert online_features_actual1.to_dict() == online_features_expected
-    assert online_features_actual2.to_dict() == online_features_expected
-
-    # Case 2: Feature retrieval with multiple entities retrieval check with mixed types
-    with pytest.raises(TypeError) as excinfo:
-        online_request_entity2 = [{"customer_id": 0}, {"customer_id": "error_pls"}]
-        online_features_actual2 = client.get_online_features(
-            entity_rows=online_request_entity2, feature_refs=online_request_features
-        )
-
-    assert (
-        "Input entity customer_id has mixed types, ValueType.STRING and ValueType.INT64. That is not allowed."
-        in str(excinfo.value)
-    )
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=15)
-def test_basic_retrieve_online_entity_listform(client, list_entity_dataframe):
-    # Case 1: Features retrieval with entity in list format check
-    district_fs = FeatureSet(
-        name="district",
-        features=[
-            Feature(name="district_rating", dtype=ValueType.INT64),
-            Feature(name="district_cost", dtype=ValueType.FLOAT),
-            Feature(name="district_past_transactions_int", dtype=ValueType.INT64_LIST),
-            Feature(
-                name="district_past_transactions_double", dtype=ValueType.DOUBLE_LIST
-            ),
-            Feature(
-                name="district_past_transactions_float", dtype=ValueType.FLOAT_LIST
-            ),
-            Feature(
-                name="district_past_transactions_string", dtype=ValueType.STRING_LIST
-            ),
-            Feature(name="district_past_transactions_bool", dtype=ValueType.BOOL_LIST),
-        ],
-        entities=[Entity("district_ids", dtype=ValueType.INT64_LIST)],
-        max_age=Duration(seconds=3600),
-    )
-
-    client.set_project(PROJECT_NAME)
-    client.apply(district_fs)
-
-    district_fs = client.get_feature_set(name="district")
-    client.ingest(district_fs, list_entity_dataframe, timeout=600)
-    time.sleep(15)
-
-    online_request_entity = [{"district_ids": [np.int64(1), np.int64(2), np.int64(3)]}]
-    online_request_features = [
-        "district_rating",
-        "district_cost",
-        "district_past_transactions_int",
-        "district_past_transactions_double",
-        "district_past_transactions_float",
-        "district_past_transactions_string",
-        "district_past_transactions_bool",
-    ]
-    online_request_entity2 = [
-        {"district_ids": Value(int64_list_val=Int64List(val=[1, 2, 3]))}
-    ]
-
-    def try_get_features1():
-        response = client.get_online_features(
-            entity_rows=online_request_entity, feature_refs=online_request_features
-        )
-        is_ok = check_online_response(
-            "district_rating", list_entity_dataframe, response
-        )
-        return response, is_ok
-
-    def try_get_features2():
-        response = client.get_online_features(
-            entity_rows=online_request_entity2, feature_refs=online_request_features
-        )
-        is_ok = check_online_response(
-            "district_rating", list_entity_dataframe, response
-        )
-        return response, is_ok
-
-    online_features_actual = wait_retry_backoff(
-        retry_fn=try_get_features1,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_actual2 = wait_retry_backoff(
-        retry_fn=try_get_features2,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_expected = {
-        "district_ids": [[np.int64(1), np.int64(2), np.int64(3)]],
-        "district_rating": [1],
-        "district_cost": [1.5],
-        "district_past_transactions_int": [[1, 3]],
-        "district_past_transactions_double": [[1.5, 3.0]],
-        "district_past_transactions_float": [[1.5, 3.0]],
-        "district_past_transactions_string": [["first_1", "second_1"]],
-        "district_past_transactions_bool": [[True, False]],
-    }
-
-    assert online_features_actual.to_dict() == online_features_expected
-    assert online_features_actual2.to_dict() == online_features_expected
-
-    # Case 2: Features retrieval with entity in list format check with mixed types
-    with pytest.raises(ValueError) as excinfo:
-        online_request_entity2 = [{"district_ids": [np.int64(1), np.int64(2), True]}]
-        online_features_actual2 = client.get_online_features(
-            entity_rows=online_request_entity2, feature_refs=online_request_features
-        )
-
-    assert (
-        "List value type for field district_ids is inconsistent. ValueType.INT64 different from ValueType.BOOL."
-        in str(excinfo.value)
-    )
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=16)
-def test_basic_ingest_retrieval_fs(client):
-    # Set to another project to test ingestion based on current project context
-    client.set_project(PROJECT_NAME + "_NS1")
-    driver_fs = FeatureSet(
-        name="driver_fs",
-        features=[
-            Feature(name="driver_fs_rating", dtype=ValueType.FLOAT),
-            Feature(name="driver_fs_cost", dtype=ValueType.FLOAT),
-        ],
-        entities=[Entity("driver_fs_id", ValueType.INT64)],
-        max_age=Duration(seconds=3600),
-    )
-    client.apply(driver_fs)
-
-    N_ROWS = 2
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    driver_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "driver_fs_id": [i for i in range(N_ROWS)],
-            "driver_fs_rating": [float(i) for i in range(N_ROWS)],
-            "driver_fs_cost": [float(i) + 0.5 for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(driver_fs, driver_df, timeout=600)
-    time.sleep(15)
-
-    online_request_entity = [{"driver_fs_id": 0}, {"driver_fs_id": 1}]
-    online_request_features = ["driver_fs_rating", "driver_fs_cost"]
-
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=online_request_entity, feature_refs=online_request_features
-        )
-        is_ok = check_online_response("driver_fs_rating", driver_df, response)
-        return response, is_ok
-
-    online_features_actual = wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_expected = {
-        "driver_fs_id": [0, 1],
-        "driver_fs_rating": [0.0, 1.0],
-        "driver_fs_cost": [0.5, 1.5],
-    }
-
-    assert online_features_actual.to_dict() == online_features_expected
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=17)
-def test_basic_ingest_retrieval_str(client):
-    # Set to another project to test ingestion based on current project context
-    client.set_project(PROJECT_NAME + "_NS1")
-    customer_fs = FeatureSet(
-        name="cust_fs",
-        features=[
-            Feature(name="cust_rating", dtype=ValueType.INT64),
-            Feature(name="cust_cost", dtype=ValueType.FLOAT),
-        ],
-        entities=[Entity("cust_id", ValueType.INT64)],
-        max_age=Duration(seconds=3600),
-    )
-    client.apply(customer_fs)
-
-    N_ROWS = 2
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    cust_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "cust_id": [i for i in range(N_ROWS)],
-            "cust_rating": [i for i in range(N_ROWS)],
-            "cust_cost": [float(i) + 0.5 for i in range(N_ROWS)],
-        }
-    )
-    client.ingest("cust_fs", cust_df, timeout=600)
-    time.sleep(15)
-
-    online_request_entity = [{"cust_id": 0}, {"cust_id": 1}]
-    online_request_features = ["cust_rating", "cust_cost"]
-
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=online_request_entity, feature_refs=online_request_features
-        )
-        is_ok = check_online_response("cust_rating", cust_df, response)
-        return response, is_ok
-
-    online_features_actual = wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_expected = {
-        "cust_id": [0, 1],
-        "cust_rating": [0, 1],
-        "cust_cost": [0.5, 1.5],
-    }
-
-    assert online_features_actual.to_dict() == online_features_expected
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=18)
-def test_basic_ingest_retrieval_multi_entities(client):
-    # Set to another project to test ingestion based on current project context
-    client.set_project(PROJECT_NAME + "_NS1")
-    merchant_fs = FeatureSet(
-        name="merchant_fs",
-        features=[Feature(name="merchant_sales", dtype=ValueType.FLOAT)],
-        entities=[
-            Entity("driver_id", ValueType.INT64),
-            Entity("merchant_id", ValueType.INT64),
-        ],
-        max_age=Duration(seconds=3600),
-    )
-    client.apply(merchant_fs)
-
-    N_ROWS = 2
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    merchant_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "driver_id": [i for i in range(N_ROWS)],
-            "merchant_id": [i for i in range(N_ROWS)],
-            "merchant_sales": [float(i) + 0.5 for i in range(N_ROWS)],
-        }
-    )
-    client.ingest("merchant_fs", merchant_df, timeout=600)
-    time.sleep(15)
-
-    online_request_entity = [
-        {"driver_id": 0, "merchant_id": 0},
-        {"driver_id": 1, "merchant_id": 1},
-    ]
-    online_request_features = ["merchant_sales"]
-
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=online_request_entity, feature_refs=online_request_features
-        )
-        is_ok = check_online_response("merchant_sales", merchant_df, response)
-        return response, is_ok
-
-    online_features_actual = wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    online_features_expected = {
-        "driver_id": [0, 1],
-        "merchant_id": [0, 1],
-        "merchant_sales": [0.5, 1.5],
-    }
-
-    assert online_features_actual.to_dict() == online_features_expected
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=19)
-def test_basic_retrieve_feature_row_missing_fields(client, cust_trans_df):
-    feature_refs = ["daily_transactions", "total_transactions", "null_values"]
-
-    # apply cust_trans_fs and ingest dataframe
-    client.set_project(PROJECT_NAME + "_basic_retrieve_missing_fields")
-    old_cust_trans_fs = FeatureSet.from_yaml(f"{DIR_PATH}/basic/cust_trans_fs.yaml")
-    client.apply(old_cust_trans_fs)
-    client.ingest(old_cust_trans_fs, cust_trans_df)
-
-    # update cust_trans_fs with one additional feature.
-    # feature rows ingested before the feature set update will be missing a field.
-    new_cust_trans_fs = client.get_feature_set(name="customer_transactions")
-    new_cust_trans_fs.add(Feature("n_trips", ValueType.INT64))
-    client.apply(new_cust_trans_fs)
-    # sleep to ensure feature set update is propagated
-    time.sleep(15)
-
-    # attempt to retrieve features from feature rows with missing fields
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=[
-                {"customer_id": np.int64(cust_trans_df.iloc[0]["customer_id"])}
-            ],
-            feature_refs=feature_refs + ["n_trips"],
-        )  # type: GetOnlineFeaturesResponse
-        # check if the ingested fields can be correctly retrieved.
-        is_ok = all(
-            [
-                check_online_response(ref, cust_trans_df, response)
-                for ref in feature_refs
-            ]
-        )
-        # should return null_value status for missing field n_trips
-        is_missing_ok = (
-            response.field_values[0].statuses["n_trips"]
-            == GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE
-        )
-        return response, is_ok and is_missing_ok
-
-    wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=20)
-def test_basic_retrieve_feature_row_extra_fields(client, cust_trans_df):
-    feature_refs = ["daily_transactions", "total_transactions"]
-    # apply cust_trans_fs and ingest dataframe
-    client.set_project(PROJECT_NAME + "_basic_retrieve_missing_fields")
-    old_cust_trans_fs = FeatureSet.from_yaml(f"{DIR_PATH}/basic/cust_trans_fs.yaml")
-    client.apply(old_cust_trans_fs)
-    client.ingest(old_cust_trans_fs, cust_trans_df)
-
-    # update cust_trans_fs with the null_values feature dropped.
-    # feature rows ingested before the feature set update will have an extra field.
-    new_cust_trans_fs = client.get_feature_set(name="customer_transactions")
-    new_cust_trans_fs.drop("null_values")
-    client.apply(new_cust_trans_fs)
-    # sleep to ensure feature set update is propagated
-    time.sleep(15)
-
-    # attempt to retrieve features from feature rows with extra fields
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=[
-                {"customer_id": np.int64(cust_trans_df.iloc[0]["customer_id"])}
-            ],
-            feature_refs=feature_refs,
-        )  # type: GetOnlineFeaturesResponse
-        # check if the non dropped fields can be correctly retrieved.
-        is_ok = all(
-            [
-                check_online_response(ref, cust_trans_df, response)
-                for ref in feature_refs
-            ]
-        )
-        return response, is_ok
-
-    wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-
-@pytest.fixture(scope="module")
-def all_types_dataframe():
-    return pd.DataFrame(
-        {
-            "datetime": [datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(3)],
-            "user_id": [1001, 1002, 1003],
-            "int32_feature": [np.int32(1), np.int32(2), np.int32(3)],
-            "int64_feature": [np.int64(1), np.int64(2), np.int64(3)],
-            "float_feature": [np.float(0.1), np.float(0.2), np.float(0.3)],
-            "double_feature": [np.float64(0.1), np.float64(0.2), np.float64(0.3)],
-            "string_feature": ["one", "two", "three"],
-            "bytes_feature": [b"one", b"two", b"three"],
-            "bool_feature": [True, False, False],
-            "int32_list_feature": [
-                np.array([1, 2, 3, 4], dtype=np.int32),
-                np.array([1, 2, 3, 4], dtype=np.int32),
-                np.array([1, 2, 3, 4], dtype=np.int32),
-            ],
-            "int64_list_feature": [
-                np.array([1, 2, 3, 4], dtype=np.int64),
-                np.array([1, 2, 3, 4], dtype=np.int64),
-                np.array([1, 2, 3, 4], dtype=np.int64),
-            ],
-            "float_list_feature": [
-                np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
-                np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
-                np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float32),
-            ],
-            "double_list_feature": [
-                np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
-                np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
-                np.array([1.1, 1.2, 1.3, 1.4], dtype=np.float64),
-            ],
-            "string_list_feature": [
-                np.array(["one", "two", "three"]),
-                np.array(["one", "two", "three"]),
-                np.array(["one", "two", "three"]),
-            ],
-            "bytes_list_feature": [
-                np.array([b"one", b"two", b"three"]),
-                np.array([b"one", b"two", b"three"]),
-                np.array([b"one", b"two", b"three"]),
-            ],
-            "bool_list_feature": [
-                [True, False, True],
-                [True, False, True],
-                [True, False, True],
-            ],
-        }
-    )
-
-
-@pytest.mark.timeout(45)
-@pytest.mark.run(order=21)
-def test_all_types_register_feature_set_success(client):
-    client.set_project(PROJECT_NAME)
-
-    all_types_fs_expected = FeatureSet(
-        name="all_types",
-        entities=[Entity(name="user_id", dtype=ValueType.INT64)],
-        features=[
-            Feature(name="float_feature", dtype=ValueType.FLOAT),
-            Feature(name="int64_feature", dtype=ValueType.INT64),
-            Feature(name="int32_feature", dtype=ValueType.INT32),
-            Feature(name="string_feature", dtype=ValueType.STRING),
-            Feature(name="bytes_feature", dtype=ValueType.BYTES),
-            Feature(name="bool_feature", dtype=ValueType.BOOL),
-            Feature(name="double_feature", dtype=ValueType.DOUBLE),
-            Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
-            Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
-            Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
-            Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
-            Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
-            Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
-            Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
-        ],
-        max_age=Duration(seconds=3600),
-    )
-
-    # Register feature set
-    client.apply(all_types_fs_expected)
-
-    # Feast Core needs some time to fully commit the FeatureSet applied
-    # when there is no existing job yet for the Featureset
-    time.sleep(15)
-
-    all_types_fs_actual = client.get_feature_set(name="all_types")
-
-    assert all_types_fs_actual == all_types_fs_expected
-
-    if all_types_fs_actual is None:
-        raise Exception(
-            "Client cannot retrieve 'all_types_fs' FeatureSet "
-            "after registration. Either Feast Core does not save the "
-            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
-            "to be committed."
-        )
-
-
-@pytest.mark.timeout(300)
-@pytest.mark.run(order=22)
-def test_all_types_ingest_success(client, all_types_dataframe):
-    # Get all_types feature set
-    all_types_fs = client.get_feature_set(name="all_types")
-
-    # Ingest user embedding data
-    client.ingest(all_types_fs, all_types_dataframe)
-
-
-@pytest.mark.timeout(90)
-@pytest.mark.run(order=23)
-def test_all_types_retrieve_online_success(client, all_types_dataframe):
-    # Poll serving for feature values until the correct values are returned_float_list
-    feature_refs = [
-        "float_feature",
-        "int64_feature",
-        "int32_feature",
-        "double_feature",
-        "string_feature",
-        "bool_feature",
-        "bytes_feature",
-        "float_list_feature",
-        "int64_list_feature",
-        "int32_list_feature",
-        "string_list_feature",
-        "bytes_list_feature",
-        "double_list_feature",
-        "bool_list_feature",
-    ]
-
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=[
-                {"user_id": Value(int64_val=all_types_dataframe.iloc[0]["user_id"])}
-            ],
-            feature_refs=feature_refs,
-        )  # type: GetOnlineFeaturesResponse
-        is_ok = check_online_response("float_feature", all_types_dataframe, response)
-        return response, is_ok
-
-    response = wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    # check returned values
-    returned_float_list = (
-        response.field_values[0].fields["float_list_feature"].float_list_val.val
-    )
-    sent_float_list = all_types_dataframe.iloc[0]["float_list_feature"]
-    assert math.isclose(
-        returned_float_list[0], sent_float_list[0], abs_tol=FLOAT_TOLERANCE
-    )
-    # check returned metadata
-    assert (
-        response.field_values[0].statuses["float_list_feature"]
-        == GetOnlineFeaturesResponse.FieldStatus.PRESENT
-    )
-
-
-@pytest.mark.timeout(300)
-@pytest.mark.run(order=35)
-def test_all_types_ingest_jobs(jobcontroller_client, client, all_types_dataframe):
-    # list ingestion jobs given featureset
-    client.set_project(PROJECT_NAME)
-
-    all_types_fs = client.get_feature_set(name="all_types")
-    ingest_jobs = jobcontroller_client.list_ingest_jobs(
-        feature_set_ref=FeatureSetRef.from_feature_set(all_types_fs)
-    )
-    # filter ingestion jobs to only those that are running
-    ingest_jobs = [
-        job for job in ingest_jobs if job.status == IngestionJobStatus.RUNNING
-    ]
-    assert len(ingest_jobs) >= 1
-
-    ingest_job = ingest_jobs[0]
-    # restart ingestion ingest_job
-    # restart means stop current job
-    # (replacement will be automatically spawned)
-    jobcontroller_client.restart_ingest_job(ingest_job)
-    # wait for replacement to be created
-    time.sleep(15)  # should be more than polling_interval
-
-    # id without timestamp part
-    # that remains the same between jobs
-    shared_id = "-".join(ingest_job.id.split("-")[:-1])
-    ingest_jobs = jobcontroller_client.list_ingest_jobs(
-        feature_set_ref=FeatureSetRef.from_feature_set(all_types_fs)
-    )
-    replacement_jobs = [
-        job
-        for job in ingest_jobs
-        if job.status == IngestionJobStatus.RUNNING
-        and job.id.startswith(shared_id)
-        and job.id != ingest_job.id
-    ]
-
-    assert len(replacement_jobs) >= 1
-    replacement_job = replacement_jobs[0]
-
-    replacement_job.wait(IngestionJobStatus.RUNNING)
-    assert replacement_job.status == IngestionJobStatus.RUNNING
-
-    # stop ingestion ingest_job
-    jobcontroller_client.stop_ingest_job(replacement_job)
-    replacement_job.wait(IngestionJobStatus.ABORTED)
-    assert replacement_job.status == IngestionJobStatus.ABORTED
-
-
-@pytest.fixture(scope="module")
-def large_volume_dataframe():
-    ROW_COUNT = 100000
-    offset = random.randint(1000000, 10000000)  # ensure a unique key space
-    customer_data = pd.DataFrame(
-        {
-            "datetime": [
-                datetime.utcnow().replace(tzinfo=pytz.utc) for _ in range(ROW_COUNT)
-            ],
-            "customer_id": [offset + inc for inc in range(ROW_COUNT)],
-            "daily_transactions_large": [np.random.rand() for _ in range(ROW_COUNT)],
-            "total_transactions_large": [256 for _ in range(ROW_COUNT)],
-        }
-    )
-    return customer_data
-
-
-@pytest.mark.timeout(45)
-@pytest.mark.run(order=40)
-def test_large_volume_register_feature_set_success(client):
-    cust_trans_fs_expected = FeatureSet.from_yaml(
-        f"{DIR_PATH}/large_volume/cust_trans_large_fs.yaml"
-    )
-
-    # Register feature set
-    client.apply(cust_trans_fs_expected)
-
-    # Feast Core needs some time to fully commit the FeatureSet applied
-    # when there is no existing job yet for the Featureset
-    time.sleep(10)
-    cust_trans_fs_actual = client.get_feature_set(name="customer_transactions_large")
-
-    assert cust_trans_fs_actual == cust_trans_fs_expected
-
-    if cust_trans_fs_actual is None:
-        raise Exception(
-            "Client cannot retrieve 'customer_transactions' FeatureSet "
-            "after registration. Either Feast Core does not save the "
-            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
-            "to be committed."
-        )
-
-
-@pytest.mark.timeout(300)
-@pytest.mark.run(order=41)
-def test_large_volume_ingest_success(client, large_volume_dataframe):
-    # Get large volume feature set
-    cust_trans_fs = client.get_feature_set(name="customer_transactions_large")
-
-    # Ingest customer transaction data
-    client.ingest(cust_trans_fs, large_volume_dataframe)
-
-
-@pytest.mark.timeout(90)
-@pytest.mark.run(order=42)
-def test_large_volume_retrieve_online_success(client, large_volume_dataframe):
-    # Poll serving for feature values until the correct values are returned
-    feature_refs = [
-        "daily_transactions_large",
-        "total_transactions_large",
-    ]
-    while True:
-        response = client.get_online_features(
-            entity_rows=[
-                {
-                    "customer_id": Value(
-                        int64_val=large_volume_dataframe.iloc[0]["customer_id"]
-                    )
-                }
-            ],
-            feature_refs=feature_refs,
-        )  # type: GetOnlineFeaturesResponse
-        is_ok = all(
-            [
-                check_online_response(ref, large_volume_dataframe, response)
-                for ref in feature_refs
-            ]
-        )
-        return None, is_ok
-
-
-@pytest.fixture(scope="module")
-def all_types_parquet_file():
-    COUNT = 20000
-
-    df = pd.DataFrame(
-        {
-            "datetime": [datetime.utcnow() for _ in range(COUNT)],
-            "customer_id": [np.int32(random.randint(0, 10000)) for _ in range(COUNT)],
-            "int32_feature_parquet": [
-                np.int32(random.randint(0, 10000)) for _ in range(COUNT)
-            ],
-            "int64_feature_parquet": [
-                np.int64(random.randint(0, 10000)) for _ in range(COUNT)
-            ],
-            "float_feature_parquet": [np.float(random.random()) for _ in range(COUNT)],
-            "double_feature_parquet": [
-                np.float64(random.random()) for _ in range(COUNT)
-            ],
-            "string_feature_parquet": [
-                "one" + str(random.random()) for _ in range(COUNT)
-            ],
-            "bytes_feature_parquet": [b"one" for _ in range(COUNT)],
-            "int32_list_feature_parquet": [
-                np.array([1, 2, 3, random.randint(0, 10000)], dtype=np.int32)
-                for _ in range(COUNT)
-            ],
-            "int64_list_feature_parquet": [
-                np.array([1, random.randint(0, 10000), 3, 4], dtype=np.int64)
-                for _ in range(COUNT)
-            ],
-            "float_list_feature_parquet": [
-                np.array([1.1, 1.2, 1.3, random.random()], dtype=np.float32)
-                for _ in range(COUNT)
-            ],
-            "double_list_feature_parquet": [
-                np.array([1.1, 1.2, 1.3, random.random()], dtype=np.float64)
-                for _ in range(COUNT)
-            ],
-            "string_list_feature_parquet": [
-                np.array(["one", "two" + str(random.random()), "three"])
-                for _ in range(COUNT)
-            ],
-            "bytes_list_feature_parquet": [
-                np.array([b"one", b"two", b"three"]) for _ in range(COUNT)
-            ],
-            "bool_list_feature_parquet": [[True, False, True] for _ in range(COUNT)],
-        }
-    )
-
-    file_path = os.path.join(tempfile.mkdtemp(), "all_types.parquet")
-    df.to_parquet(file_path, allow_truncated_timestamps=True)
-    return file_path
-
-
-@pytest.mark.timeout(300)
-@pytest.mark.run(order=50)
-def test_all_types_parquet_register_feature_set_success(client):
-    # Load feature set from file
-    all_types_parquet_expected = FeatureSet.from_yaml(
-        f"{DIR_PATH}/all_types_parquet/all_types_parquet.yaml"
-    )
-
-    # Register feature set
-    client.apply(all_types_parquet_expected)
-
-    # Feast Core needs some time to fully commit the FeatureSet applied
-    # when there is no existing job yet for the Featureset
-    time.sleep(30)
-
-    all_types_parquet_actual = client.get_feature_set(name="all_types_parquet")
-
-    assert all_types_parquet_actual == all_types_parquet_expected
-
-    if all_types_parquet_actual is None:
-        raise Exception(
-            "Client cannot retrieve 'customer_transactions' FeatureSet "
-            "after registration. Either Feast Core does not save the "
-            "FeatureSet correctly or the client needs to wait longer for FeatureSet "
-            "to be committed."
-        )
-
-
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=51)
-def test_all_types_infer_register_ingest_file_success(client, all_types_parquet_file):
-    # Get feature set
-    all_types_fs = client.get_feature_set(name="all_types_parquet")
-
-    # Ingest user embedding data
-    client.ingest(feature_set=all_types_fs, source=all_types_parquet_file)
-
-
-@pytest.mark.timeout(200)
-@pytest.mark.run(order=60)
-def test_list_entities_and_features(client):
-    customer_entity = Entity("customer_id", ValueType.INT64)
-    driver_entity = Entity("driver_id", ValueType.INT64)
-
-    customer_feature_rating = Feature(
-        name="rating", dtype=ValueType.FLOAT, labels={"key1": "val1"}
-    )
-    customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT)
-    driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT)
-    driver_feature_cost = Feature(
-        name="cost", dtype=ValueType.FLOAT, labels={"key1": "val1"}
-    )
-
-    filter_by_project_entity_labels_expected = dict(
-        [("customer:rating", customer_feature_rating)]
-    )
-
-    filter_by_project_entity_expected = dict(
-        [("driver:cost", driver_feature_cost), ("driver:rating", driver_feature_rating)]
-    )
-
-    filter_by_project_labels_expected = dict(
-        [
-            ("customer:rating", customer_feature_rating),
-            ("driver:cost", driver_feature_cost),
-        ]
-    )
-
-    customer_fs = FeatureSet(
-        "customer",
-        features=[customer_feature_rating, customer_feature_cost],
-        entities=[customer_entity],
-        max_age=Duration(seconds=100),
-    )
-
-    driver_fs = FeatureSet(
-        "driver",
-        features=[driver_feature_rating, driver_feature_cost],
-        entities=[driver_entity],
-        max_age=Duration(seconds=100),
-    )
-
-    client.set_project(PROJECT_NAME)
-    client.apply(customer_fs)
-    client.apply(driver_fs)
-
-    # Test for listing of features
-    # Case 1: Filter by: project, entities and labels
-    filter_by_project_entity_labels_actual = client.list_features_by_ref(
-        project=PROJECT_NAME, entities=["customer_id"], labels={"key1": "val1"}
-    )
-
-    # Case 2: Filter by: project, entities
-    filter_by_project_entity_actual = client.list_features_by_ref(
-        project=PROJECT_NAME, entities=["driver_id"]
-    )
-
-    # Case 3: Filter by: project, labels
-    filter_by_project_labels_actual = client.list_features_by_ref(
-        project=PROJECT_NAME, labels={"key1": "val1"}
-    )
-
-    assert set(filter_by_project_entity_labels_expected) == set(
-        filter_by_project_entity_labels_actual
-    )
-    assert set(filter_by_project_entity_expected) == set(
-        filter_by_project_entity_actual
-    )
-    assert set(filter_by_project_labels_expected) == set(
-        filter_by_project_labels_actual
-    )
-
-
-@pytest.mark.timeout(500)
-@pytest.mark.run(order=70)
-def test_sources_deduplicate_ingest_jobs(client, jobcontroller_client, kafka_brokers):
-    shared_source = KafkaSource(kafka_brokers, "dup_shared")
-    dup_source_fs_1 = FeatureSet(
-        name="duplicate_source_fs_1",
-        features=[Feature("fs1", ValueType.FLOAT), Feature("fs2", ValueType.FLOAT)],
-        entities=[Entity("e2", ValueType.INT64)],
-        source=shared_source,
-    )
-    dup_source_fs_2 = copy(dup_source_fs_1)
-    dup_source_fs_2.name = "duplicate_source_fs_2"
-
-    def is_same_jobs():
-        fs_1_jobs = jobcontroller_client.list_ingest_jobs(
-            feature_set_ref=FeatureSetRef(
-                name=dup_source_fs_1.name, project=dup_source_fs_1.project
-            )
-        )
-        fs_2_jobs = jobcontroller_client.list_ingest_jobs(
-            feature_set_ref=FeatureSetRef(
-                name=dup_source_fs_2.name, project=dup_source_fs_2.project
-            )
-        )
-        same = True
-        if not (len(fs_1_jobs) > 0 and len(fs_1_jobs) == len(fs_2_jobs)):
-            same = False
-        for fs_1_job in fs_1_jobs:
-            for fs_2_job in fs_2_jobs:
-                if (
-                    not fs_1_job.source.to_proto() == fs_2_job.source.to_proto()
-                    and fs_1_job.source.to_proto() == shared_source.to_proto()
-                ):
-                    same = False
-                if fs_1_job.id != fs_2_job.id:
-                    same = False
-        return same
-
-    def is_different_jobs():
-        fs_1_jobs = jobcontroller_client.list_ingest_jobs(
-            feature_set_ref=FeatureSetRef(
-                name=dup_source_fs_1.name, project=dup_source_fs_1.project
-            )
-        )
-        fs_2_jobs = jobcontroller_client.list_ingest_jobs(
-            feature_set_ref=FeatureSetRef(
-                name=dup_source_fs_2.name, project=dup_source_fs_2.project
-            )
-        )
-        different = True
-        if not (len(fs_1_jobs) > 0 and len(fs_2_jobs) > 0):
-            different = False
-        for fs_1_job in fs_1_jobs:
-            if fs_1_job.source.to_proto() == alt_source.to_proto():
-                different = False
-        for fs_2_job in fs_2_jobs:
-            if fs_2_job.source.to_proto() == shared_source.to_proto():
-                different = False
-        for fs_1_job in fs_1_jobs:
-            for fs_2_job in fs_2_jobs:
-                if fs_1_job.id == fs_2_job.id:
-                    different = False
-        return different
-
-    # register multiple feature sets with the same source
-    # only one ingest job should spawned due to test ingest job deduplication
-    client.apply(dup_source_fs_1)
-    client.apply(dup_source_fs_2)
-
-    while not is_same_jobs():
-        time.sleep(1)
-
-    # update feature sets with different sources, should have different jobs
-    alt_source = KafkaSource(kafka_brokers, "alt_source")
-    dup_source_fs_2.source = alt_source
-    client.apply(dup_source_fs_2)
-
-    while not is_different_jobs():
-        time.sleep(1)
-
-    # update feature sets with same source again, should have the same job
-    dup_source_fs_2.source = shared_source
-    client.apply(dup_source_fs_2)
-
-    while not is_same_jobs():
-        time.sleep(1)
-
-
-@pytest.mark.run(order=30)
-def test_sink_writes_only_recent_rows(client):
-    client.set_project("default")
-
-    feature_refs = ["driver:rating", "driver:cost"]
-
-    later_df = basic_dataframe(
-        entities=["driver_id"],
-        features=["rating", "cost"],
-        ingest_time=datetime.utcnow(),
-        n_size=5,
-    )
-
-    earlier_df = basic_dataframe(
-        entities=["driver_id"],
-        features=["rating", "cost"],
-        ingest_time=datetime.utcnow() - timedelta(minutes=5),
-        n_size=5,
-    )
-
-    def try_get_features():
-        response = client.get_online_features(
-            entity_rows=[{"driver_id": Value(int64_val=later_df.iloc[0]["driver_id"])}],
-            feature_refs=feature_refs,
-        )  # type: GetOnlineFeaturesResponse
-        is_ok = all(
-            [check_online_response(ref, later_df, response) for ref in feature_refs]
-        )
-        return response, is_ok
-
-    # test compaction within batch
-    client.ingest("driver", pd.concat([earlier_df, later_df]))
-    wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-    # test read before write
-    client.ingest("driver", earlier_df)
-    time.sleep(10)
-    wait_retry_backoff(
-        retry_fn=try_get_features,
-        timeout_secs=90,
-        timeout_msg="Timed out trying to get online feature values",
-    )
-
-
-# TODO: rewrite these using python SDK once the labels are implemented there
-class TestsBasedOnGrpc:
-    GRPC_CONNECTION_TIMEOUT = 3
-    LABEL_KEY = "my"
-    LABEL_VALUE = "label"
-
-    @pytest.fixture(scope="module")
-    def core_service_stub(self, core_url):
-        if core_url.endswith(":443"):
-            core_channel = grpc.secure_channel(core_url, grpc.ssl_channel_credentials())
-        else:
-            core_channel = grpc.insecure_channel(core_url)
-
-        try:
-            grpc.channel_ready_future(core_channel).result(
-                timeout=self.GRPC_CONNECTION_TIMEOUT
-            )
-        except grpc.FutureTimeoutError:
-            raise ConnectionError(
-                f"Connection timed out while attempting to connect to Feast "
-                f"Core gRPC server {core_url} "
-            )
-        core_service_stub = CoreServiceStub(core_channel)
-        return core_service_stub
-
-    @pytest.fixture(scope="module")
-    def auth_meta_data(self, enable_auth):
-        if not enable_auth:
-            return None
-        else:
-            metadata = {CONFIG_AUTH_PROVIDER: AUTH_PROVIDER}
-            metadata_plugin = get_auth_metadata_plugin(config=Config(metadata))
-            return metadata_plugin.get_signed_meta()
-
-    def apply_feature_set(self, core_service_stub, feature_set_proto, auth_meta_data):
-        try:
-            apply_fs_response = core_service_stub.ApplyFeatureSet(
-                CoreService_pb2.ApplyFeatureSetRequest(feature_set=feature_set_proto),
-                timeout=self.GRPC_CONNECTION_TIMEOUT,
-                metadata=auth_meta_data,
-            )  # type: ApplyFeatureSetResponse
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-        return apply_fs_response.feature_set
-
-    def get_feature_set(self, core_service_stub, name, project, auth_meta_data):
-        try:
-            get_feature_set_response = core_service_stub.GetFeatureSet(
-                CoreService_pb2.GetFeatureSetRequest(
-                    project=project, name=name.strip(),
-                ),
-                metadata=auth_meta_data,
-            )  # type: GetFeatureSetResponse
-        except grpc.RpcError as e:
-            raise grpc.RpcError(e.details())
-        return get_feature_set_response.feature_set
-
-    @pytest.mark.timeout(45)
-    @pytest.mark.run(order=51)
-    def test_register_feature_set_with_labels(self, core_service_stub, auth_meta_data):
-        feature_set_name = "test_feature_set_labels"
-        feature_set_proto = FeatureSet(
-            name=feature_set_name,
-            project=PROJECT_NAME,
-            labels={self.LABEL_KEY: self.LABEL_VALUE},
-        ).to_proto()
-        self.apply_feature_set(core_service_stub, feature_set_proto, auth_meta_data)
-
-        retrieved_feature_set = self.get_feature_set(
-            core_service_stub, feature_set_name, PROJECT_NAME, auth_meta_data
-        )
-
-        assert self.LABEL_KEY in retrieved_feature_set.spec.labels
-        assert retrieved_feature_set.spec.labels[self.LABEL_KEY] == self.LABEL_VALUE
-
-    @pytest.mark.timeout(45)
-    @pytest.mark.run(order=52)
-    def test_register_feature_with_labels(self, core_service_stub, auth_meta_data):
-        feature_set_name = "test_feature_labels"
-        feature_set_proto = FeatureSet(
-            name=feature_set_name,
-            project=PROJECT_NAME,
-            features=[
-                Feature(
-                    name="rating",
-                    dtype=ValueType.INT64,
-                    labels={self.LABEL_KEY: self.LABEL_VALUE},
-                )
-            ],
-        ).to_proto()
-        self.apply_feature_set(core_service_stub, feature_set_proto, auth_meta_data)
-
-        retrieved_feature_set = self.get_feature_set(
-            core_service_stub, feature_set_name, PROJECT_NAME, auth_meta_data
-        )
-        retrieved_feature = retrieved_feature_set.spec.features[0]
-
-        assert self.LABEL_KEY in retrieved_feature.labels
-        assert retrieved_feature.labels[self.LABEL_KEY] == self.LABEL_VALUE
diff --git a/tests/e2e/redis/basic/cust_trans_fs.yaml b/tests/e2e/redis/basic/cust_trans_fs.yaml
deleted file mode 100644
index 941037670d..0000000000
--- a/tests/e2e/redis/basic/cust_trans_fs.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-kind: feature_set
-spec:
-  name: customer_transactions
-  entities:
-  - name: customer_id
-    valueType: INT64
-  features:
-  - name: daily_transactions
-    valueType: FLOAT
-  - name: total_transactions
-    valueType: FLOAT
-  - name: null_values
-    valueType: FLOAT
-  maxAge: 3600s
diff --git a/tests/e2e/redis/basic/data.csv b/tests/e2e/redis/basic/data.csv
deleted file mode 100644
index d2994d253a..0000000000
--- a/tests/e2e/redis/basic/data.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-datetime,customer_id,daily_transactions,total_transactions
-1570366527,1001,1.3,500
-1570366536,1002,1.4,600
\ No newline at end of file
diff --git a/tests/e2e/redis/basic/driver_fs.yaml b/tests/e2e/redis/basic/driver_fs.yaml
deleted file mode 100644
index f25ca95678..0000000000
--- a/tests/e2e/redis/basic/driver_fs.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-kind: feature_set
-spec:
-  name: driver
-  entities:
-  - name: driver_id
-    valueType: INT64
-  features:
-  - name: rating
-    valueType: FLOAT
-  - name: cost
-    valueType: FLOAT
-  maxAge: 3600s
diff --git a/tests/e2e/redis/large_volume/cust_trans_large_fs.yaml b/tests/e2e/redis/large_volume/cust_trans_large_fs.yaml
deleted file mode 100644
index 7f36151392..0000000000
--- a/tests/e2e/redis/large_volume/cust_trans_large_fs.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-kind: feature_set
-spec:
-  name: customer_transactions_large
-  entities:
-  - name: customer_id
-    valueType: INT64
-  features:
-  - name: daily_transactions_large
-    valueType: FLOAT
-  - name: total_transactions_large
-    valueType: FLOAT
-  maxAge: 3600s
diff --git a/tests/e2e/redis/parallel-ingest-redis-serving.py b/tests/e2e/redis/parallel-ingest-redis-serving.py
new file mode 100644
index 0000000000..fb30746a0f
--- /dev/null
+++ b/tests/e2e/redis/parallel-ingest-redis-serving.py
@@ -0,0 +1,192 @@
+import os
+import uuid
+from datetime import datetime
+
+import pytest
+from google.protobuf.duration_pb2 import Duration
+
+from feast.client import Client
+from feast.data_source import DataSource, FileOptions, SourceType
+from feast.entity import Entity
+from feast.feature import Feature
+from feast.feature_table import FeatureTable
+from feast.value_type import ValueType
+
+DIR_PATH = os.path.dirname(os.path.realpath(__file__))
+PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
+
+
+@pytest.fixture(scope="module")
+def client(pytestconfig):
+    core_url = pytestconfig.getoption("core_url")
+    serving_url = pytestconfig.getoption("serving_url")
+
+    client = Client(core_url=core_url, serving_url=serving_url,)
+
+    client.set_project(PROJECT_NAME)
+
+    return client
+
+
+@pytest.mark.incremental
+class TestBasicIngestionRetrieval:
+    def setup_class(cls):
+        prefix = "basic_ingestion"
+        suffix = str(int(datetime.now().timestamp()))
+        cls.customer_ft_name = f"{prefix}_customer_{suffix}"
+        cls.driver_ft_name = f"{prefix}_driver_{suffix}"
+
+        cls.customer_entity = Entity(
+            name="customer_id",
+            description="Customer entity for rides",
+            value_type=ValueType.STRING,
+            labels={"team": "customer_service", "common_key": "common_val"},
+        )
+
+        cls.driver_entity = Entity(
+            name="driver_id",
+            description="Driver entity for car rides",
+            value_type=ValueType.STRING,
+            labels={"team": "matchmaking", "common_key": "common_val"},
+        )
+
+        cls.basic_ft_spec = FeatureTable.from_yaml(
+            f"{DIR_PATH}/specifications/dev_ft.yaml"
+        )
+
+    def test_discovery(self, client):
+
+        # ApplyEntity
+        client.apply_entity(self.customer_entity)
+        client.apply_entity(self.driver_entity)
+
+        # GetEntity Check
+        assert client.get_entity(name="customer_id") == self.customer_entity
+        assert client.get_entity(name="driver_id") == self.driver_entity
+
+        # ListEntities Check
+        common_filtering_labels = {"common_key": "common_val"}
+        matchmaking_filtering_labels = {"team": "matchmaking"}
+
+        actual_common_entities = client.list_entities(labels=common_filtering_labels)
+        actual_matchmaking_entities = client.list_entities(
+            labels=matchmaking_filtering_labels
+        )
+
+        assert len(actual_common_entities) == 2
+        assert len(actual_matchmaking_entities) == 1
+
+        # ApplyFeatureTable
+        client.apply_feature_table(self.basic_ft_spec, PROJECT_NAME)
+
+        # GetFeatureTable Check
+        actual_get_feature_table = client.get_feature_table(name="dev_featuretable")
+        assert actual_get_feature_table.name == self.basic_ft_spec.name
+        assert actual_get_feature_table.entities == self.basic_ft_spec.entities
+        assert actual_get_feature_table.features == self.basic_ft_spec.features
+
+        # ListFeatureTables Check
+        actual_list_feature_table = client.list_feature_tables()[0]
+        assert actual_list_feature_table.name == self.basic_ft_spec.name
+        assert actual_list_feature_table.entities == self.basic_ft_spec.entities
+        assert actual_list_feature_table.features == self.basic_ft_spec.features
+
+    def test_basic_retrieval(self, client):
+        # TODO: Add ingest and retrieval check
+        pass
+
+
+@pytest.mark.incremental
+class TestAllTypesIngestionRetrieval:
+    def setup_class(cls):
+        prefix = "alltypes_ingestion"
+        suffix = str(int(datetime.now().timestamp()))
+        batch_source = DataSource(
+            type=SourceType(1).name,
+            field_mapping={
+                "ride_distance": "ride_distance",
+                "ride_duration": "ride_duration",
+            },
+            options=FileOptions(file_format="parquet", file_url="file://feast/*"),
+            timestamp_column="ts_col",
+            date_partition_column="date_partition_col",
+        )
+
+        cls.alltypes_entity = Entity(
+            name="alltypes_id",
+            description="Driver entity for car rides",
+            value_type=ValueType.STRING,
+            labels={"cat": "alltypes"},
+        )
+
+        cls.alltypes_ft_name = f"{prefix}_alltypes_{suffix}"
+        cls.alltypes_ft_spec = FeatureTable(
+            name="alltypes",
+            entities=["alltypes_id"],
+            features=[
+                Feature(name="float_feature", dtype=ValueType.FLOAT).to_proto(),
+                Feature(name="int64_feature", dtype=ValueType.INT64).to_proto(),
+                Feature(name="int32_feature", dtype=ValueType.INT32).to_proto(),
+                Feature(name="string_feature", dtype=ValueType.STRING).to_proto(),
+                Feature(name="bytes_feature", dtype=ValueType.BYTES).to_proto(),
+                Feature(name="bool_feature", dtype=ValueType.BOOL).to_proto(),
+                Feature(name="double_feature", dtype=ValueType.DOUBLE).to_proto(),
+                Feature(
+                    name="double_list_feature", dtype=ValueType.DOUBLE_LIST
+                ).to_proto(),
+                Feature(
+                    name="float_list_feature", dtype=ValueType.FLOAT_LIST
+                ).to_proto(),
+                Feature(
+                    name="int64_list_feature", dtype=ValueType.INT64_LIST
+                ).to_proto(),
+                Feature(
+                    name="int32_list_feature", dtype=ValueType.INT32_LIST
+                ).to_proto(),
+                Feature(
+                    name="string_list_feature", dtype=ValueType.STRING_LIST
+                ).to_proto(),
+                Feature(
+                    name="bytes_list_feature", dtype=ValueType.BYTES_LIST
+                ).to_proto(),
+                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST).to_proto(),
+            ],
+            max_age=Duration(seconds=3600),
+            batch_source=batch_source.to_proto(),
+            labels={"cat": "alltypes"},
+        )
+
+    def test_discovery(self, client):
+        # ApplyEntity
+        client.apply_entity(self.alltypes_entity)
+
+        # GetEntity Check
+        assert client.get_entity(name="alltypes_id") == self.alltypes_entity
+
+        # ListEntities Check
+        alltypes_filtering_labels = {"cat": "alltypes"}
+        actual_alltypes_entities = client.list_entities(
+            labels=alltypes_filtering_labels
+        )
+
+        assert len(client.list_entities()) == 1
+        assert len(actual_alltypes_entities) == 1
+
+        # ApplyFeatureTable
+        client.apply_feature_table(self.alltypes_ft_spec, PROJECT_NAME)
+
+        # GetFeatureTable Check
+        actual_get_feature_table = client.get_feature_table(name="alltypes")
+        assert actual_get_feature_table.name == self.alltypes_ft_spec.name
+        assert actual_get_feature_table.entities == self.alltypes_ft_spec.entities
+        assert actual_get_feature_table.features == self.alltypes_ft_spec.features
+
+        # ListFeatureTables Check
+        actual_list_feature_table = client.list_feature_tables()[0]
+        assert actual_list_feature_table.name == self.alltypes_ft_spec.name
+        assert actual_list_feature_table.entities == self.alltypes_ft_spec.entities
+        assert actual_list_feature_table.features == self.alltypes_ft_spec.features
+
+    def test_alltypes_retrieval(self, client):
+        # TODO: Add ingest and retrieval check
+        pass
diff --git a/tests/e2e/redis/specifications/dev_ft.yaml b/tests/e2e/redis/specifications/dev_ft.yaml
new file mode 100644
index 0000000000..59072b73b9
--- /dev/null
+++ b/tests/e2e/redis/specifications/dev_ft.yaml
@@ -0,0 +1,38 @@
+spec:
+  name: dev_featuretable
+  entities:
+    - driver_id
+    - customer_id
+  features:
+    - name: dev_feature_float
+      valueType: FLOAT
+    - name: dev_feature_string
+      valueType: STRING
+      labels:
+        feature_key1: feature_val1
+  batchSource:
+    type: BATCH_FILE
+    fieldMapping:
+      dev_entity: dev_entity_field
+      dev_feature_float: dev_feature_float_field
+      dev_feature_string: dev_feature_string_field
+    timestampColumn: datetime_col
+    datePartitionColumn: datetime
+    file_options:
+      file_format: PARQUET
+      file_url: gs://example/feast/*
+  streamSource:
+    type: STREAM_KAFKA
+    field_mapping:
+      dev_entity: dev_entity_field
+      dev_feature_float: dev_feature_float_field
+      dev_feature_string: dev_feature_string_field
+    timestampColumn: datetime_col
+    kafka_options:
+      bootstrap_servers: "localhost:9094"
+      topic: test_topic
+      class_path: random/path/to/test
+  maxAge: 14400s
+  labels:
+    key1: val1
+    key2: val2
\ No newline at end of file
diff --git a/tests/e2e/requirements.txt b/tests/e2e/requirements.txt
index 94c63ca120..9c6dd06ac1 100644
--- a/tests/e2e/requirements.txt
+++ b/tests/e2e/requirements.txt
@@ -7,6 +7,7 @@ pytest-benchmark==3.2.2
 pytest-mock==1.10.4
 pytest-timeout==1.3.3
 pytest-ordering==0.6.*
+pytest-xdist==2.1.0
 tensorflow-data-validation==0.21.2
 deepdiff==4.3.2
 tensorflow==2.1.0

From 86f27e3961fa4864b8e58c10960e1819417dae5e Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 12:12:27 +0800
Subject: [PATCH 05/34] Remove e2e bq tests

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 .../scripts/test-end-to-end-batch-dataflow.sh | 307 -------
 infra/scripts/test-end-to-end-batch.sh        | 153 ----
 tests/e2e/bq/bq-batch-retrieval.py            | 819 ------------------
 tests/e2e/bq/feature-stats.py                 | 256 ------
 tests/e2e/bq/testutils.py                     |  55 --
 5 files changed, 1590 deletions(-)
 delete mode 100755 infra/scripts/test-end-to-end-batch-dataflow.sh
 delete mode 100755 infra/scripts/test-end-to-end-batch.sh
 delete mode 100644 tests/e2e/bq/bq-batch-retrieval.py
 delete mode 100644 tests/e2e/bq/feature-stats.py
 delete mode 100644 tests/e2e/bq/testutils.py

diff --git a/infra/scripts/test-end-to-end-batch-dataflow.sh b/infra/scripts/test-end-to-end-batch-dataflow.sh
deleted file mode 100755
index 363ba7dc47..0000000000
--- a/infra/scripts/test-end-to-end-batch-dataflow.sh
+++ /dev/null
@@ -1,307 +0,0 @@
-#!/usr/bin/env bash
-echo "Preparing environment variables..."
-
-set -e
-set -o pipefail
-
-test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account-df/service-account-df.json"
-test -z ${GCLOUD_PROJECT} && GCLOUD_PROJECT="kf-feast"
-test -z ${GCLOUD_REGION} && GCLOUD_REGION="us-central1"
-test -z ${GCLOUD_NETWORK} && GCLOUD_NETWORK="default"
-test -z ${GCLOUD_SUBNET} && GCLOUD_SUBNET="default"
-test -z ${TEMP_BUCKET} && TEMP_BUCKET="kf-feast-dataflow-temp"
-test -z ${K8_CLUSTER_NAME} && K8_CLUSTER_NAME="feast-e2e-dataflow"
-test -z ${HELM_RELEASE_NAME} && HELM_RELEASE_NAME="pr-$PULL_NUMBER"
-test -z ${HELM_COMMON_NAME} && HELM_COMMON_NAME="deps"
-test -z ${DATASET_NAME} && DATASET_NAME=feast_e2e_$(date +%s)
-test -z ${SPECS_TOPIC} && SPECS_TOPIC=feast-specs-$(date +%s)
-test -z ${FEATURES_TOPIC} && FEATURES_TOPIC=feast-$(date +%s)
-
-
-feast_kafka_1_ip_name="feast-kafka-1"
-feast_kafka_2_ip_name="feast-kafka-2"
-feast_kafka_3_ip_name="feast-kafka-3"
-feast_redis_ip_name="feast-redis"
-feast_statsd_ip_name="feast-statsd"
-
-echo "
-This script will run end-to-end tests for Feast Core and Batch Serving using Dataflow Runner.
-
-1. Setup K8s cluster (optional, if it was not created before)
-2. Reuse existing IP addresses or generate new ones for stateful services
-3. Install stateful services (kafka, redis, postgres, etc) (optional)
-4. Build core & serving docker images (optional)
-5. Create temporary BQ table for Feast Serving.
-6. Rollout target images to cluster via helm in dedicated namespace (pr-{number})
-7. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from
-   tests/e2e via pytest.
-8. Tear down feast services, keep stateful services.
-"
-
-ORIGINAL_DIR=$(pwd)
-echo $ORIGINAL_DIR
-
-echo "Environment:"
-printenv
-
-export GOOGLE_APPLICATION_CREDENTIALS
-gcloud auth activate-service-account --key-file ${GOOGLE_APPLICATION_CREDENTIALS}
-gcloud -q auth configure-docker
-
-gcloud config set project ${GCLOUD_PROJECT}
-gcloud config set compute/region ${GCLOUD_REGION}
-gcloud config list
-
-apt-get -qq update
-apt-get -y install wget build-essential gettext-base curl
-
-curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
-chmod 700 $ORIGINAL_DIR/get_helm.sh
-$ORIGINAL_DIR/get_helm.sh
-
-
-function getPublicAddresses() {
-  existing_addresses=$(gcloud compute addresses list --filter="region:($GCLOUD_REGION) name:kafka" --format "list(name)")
-  if [[ -z "$existing_addresses" ]]; then
-    echo "
-============================================================
-Reserving IP addresses for Feast dependencies
-============================================================
-"
-
-    gcloud compute addresses create \
-      $feast_kafka_1_ip_name $feast_kafka_2_ip_name $feast_kafka_3_ip_name $feast_redis_ip_name $feast_statsd_ip_name \
-      --region ${GCLOUD_REGION} --subnet ${GCLOUD_SUBNET}
-  fi
-
-
-  export feast_kafka_1_ip=$(gcloud compute addresses describe $feast_kafka_1_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_kafka_2_ip=$(gcloud compute addresses describe $feast_kafka_2_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_kafka_3_ip=$(gcloud compute addresses describe $feast_kafka_3_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_redis_ip=$(gcloud compute addresses describe $feast_redis_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_statsd_ip=$(gcloud compute addresses describe $feast_statsd_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-}
-
-function createKubeCluster() {
-  echo "
-============================================================
-Creating GKE nodepool for Feast e2e test with DataflowRunner
-============================================================
-"
-  gcloud container clusters create ${K8_CLUSTER_NAME} --region ${GCLOUD_REGION} \
-      --enable-cloud-logging \
-      --enable-cloud-monitoring \
-      --network ${GCLOUD_NETWORK} \
-      --subnetwork ${GCLOUD_SUBNET} \
-      --scopes https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,\
-https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/service.management.readonly,\
-https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/trace.append,\
-https://www.googleapis.com/auth/bigquery \
-      --machine-type n1-standard-2
-
-  echo "
-============================================================
-Create feast-postgres-database Secret in GKE nodepool
-============================================================
-"
-  kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password
-
-  echo "
-============================================================
-Create feast-gcp-service-account Secret in GKE nodepool
-============================================================
-"
-  cd $ORIGINAL_DIR/infra/scripts
-  kubectl create secret generic feast-gcp-service-account --from-file=credentials.json=${GOOGLE_APPLICATION_CREDENTIALS}
-}
-
-function installDependencies() {
-  echo "
-============================================================
-Helm install common parts (kafka, redis, etc)
-============================================================
-"
-  cd $ORIGINAL_DIR/infra/charts/feast
-
-  helm install --replace --wait --debug --values="values-end-to-end-batch-dataflow-updated.yaml" \
-   --set "feast-core.enabled=false" \
-   --set "feast-online-serving.enabled=false" \
-   --set "feast-batch-serving.enabled=false" \
-   --set "postgresql.enabled=false"
-   "$HELM_COMMON_NAME" .
-
-}
-
-function buildAndPushImage()
-{
-  echo docker build -t $1:$2 --build-arg REVISION=$2 -f $3 $ORIGINAL_DIR
-  docker build -t $1:$2 --build-arg REVISION=$2 -f $3 $ORIGINAL_DIR
-  docker push $1:$2
-}
-
-function buildTarget() {
-  buildAndPushImage "gcr.io/kf-feast/feast-core" "$PULL_NUMBER" "$ORIGINAL_DIR/infra/docker/core/Dockerfile"
-  buildAndPushImage "gcr.io/kf-feast/feast-serving" "$PULL_NUMBER" "$ORIGINAL_DIR/infra/docker/serving/Dockerfile"
-}
-
-function installTarget() {
-  echo "
-============================================================
-Helm install feast
-============================================================
-"
-  cd $ORIGINAL_DIR/infra/charts/feast
-
-  helm install --wait --timeout 300s --debug --values="values-end-to-end-batch-dataflow-updated.yaml" \
-   --set "kafka.enabled=false" \
-   --set "redis.enabled=false" \
-   --set "prometheus-statsd-exporter.enabled=false" \
-   --set "prometheus.enabled=false" \
-    "$HELM_RELEASE_NAME" .
-
-}
-
-function clean() {
-  echo "
-  ============================================================
-  Cleaning up
-  ============================================================
-  "
-  cd $ORIGINAL_DIR/tests/e2e
-
-  # Remove BQ Dataset
-  bq rm -r -f ${GCLOUD_PROJECT}:${DATASET_NAME}
-
-  # Uninstall helm release before clearing PVCs
-  helm uninstall ${HELM_RELEASE_NAME}
-
-  kubectl delete pvc data-${HELM_RELEASE_NAME}-postgresql-0
-
-  # Stop Dataflow jobs from retrieved Dataflow job ids in ingesting_jobs.txt
-  if [ -f ingesting_jobs.txt ]; then
-    while read line
-    do
-        echo $line
-        gcloud dataflow jobs cancel $line --region=${GCLOUD_REGION}
-    done < ingesting_jobs.txt
-  fi
-}
-
-# 1.
-existing_cluster=$(gcloud container clusters list --format "list(name)" --filter "name:$K8_CLUSTER_NAME")
-if [[ -z $existing_cluster ]]; then
-  createKubeCluster "$@"
-else
-  gcloud container clusters get-credentials $K8_CLUSTER_NAME --region $GCLOUD_REGION --project $GCLOUD_PROJECT
-fi
-
-# 2.
-getPublicAddresses "$@"
-
-echo "
-============================================================
-Export required environment variables
-============================================================
-"
-
-export TEMP_BUCKET=$TEMP_BUCKET/$HELM_RELEASE_NAME/$(date +%s)
-export DATASET_NAME=$DATASET_NAME
-export GCLOUD_PROJECT=$GCLOUD_PROJECT
-export GCLOUD_NETWORK=$GCLOUD_NETWORK
-export GCLOUD_SUBNET=$GCLOUD_SUBNET
-export GCLOUD_REGION=$GCLOUD_REGION
-export HELM_COMMON_NAME=$HELM_COMMON_NAME
-export IMAGE_TAG=$PULL_PULL_SHA
-export SPECS_TOPIC=$SPECS_TOPIC
-export FEATURES_TOPIC=$FEATURES_TOPIC
-
-export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
-export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
-source ${SCRIPTS_DIR}/setup-common-functions.sh
-
-wait_for_docker_image gcr.io/kf-feast/feast-core:"${IMAGE_TAG}"
-wait_for_docker_image gcr.io/kf-feast/feast-serving:"${IMAGE_TAG}"
-
-envsubst $'$TEMP_BUCKET $DATASET_NAME $GCLOUD_PROJECT $GCLOUD_NETWORK $SPECS_TOPIC $FEATURES_TOPIC \
-  $GCLOUD_SUBNET $GCLOUD_REGION $IMAGE_TAG $HELM_COMMON_NAME $feast_kafka_1_ip
-  $feast_kafka_2_ip $feast_kafka_3_ip $feast_redis_ip $feast_statsd_ip' < $ORIGINAL_DIR/infra/scripts/test-templates/values-end-to-end-batch-dataflow.yaml > $ORIGINAL_DIR/infra/charts/feast/values-end-to-end-batch-dataflow-updated.yaml
-
-
-# 3.
-existing_deps=$(helm list --filter deps -q)
-if [[ -z $existing_deps ]]; then
-  installDependencies "$@"
-fi
-
-# 4.
-# buildTarget "$@"
-
-# 5.
-echo "
-============================================================
-Creating temp BQ table for Feast Serving
-============================================================
-"
-
-bq --location=US --project_id=${GCLOUD_PROJECT} mk \
-  --dataset \
-  --default_table_expiration 86400 \
-  ${GCLOUD_PROJECT}:${DATASET_NAME}
-
-
-# 6.
-
-set +e
-installTarget "$@"
-
-# 7.
-echo "
-============================================================
-Installing Python 3.7 with Miniconda and Feast SDK
-============================================================
-"
-cd $ORIGINAL_DIR
-# Install Python 3.7 with Miniconda
-wget -q https://repo.continuum.io/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh \
-   -O /tmp/miniconda.sh
-bash /tmp/miniconda.sh -b -p /root/miniconda -f
-/root/miniconda/bin/conda init
-source ~/.bashrc
-
-# Install Feast Python SDK and test requirements
-cd $ORIGINAL_DIR
-make compile-protos-python
-pip install -qe sdk/python
-pip install -qr tests/e2e/requirements.txt
-
-echo "
-============================================================
-Running end-to-end tests with pytest at 'tests/e2e'
-============================================================
-"
-# Default artifact location setting in Prow jobs
-LOGS_ARTIFACT_PATH=/logs/artifacts
-
-cd $ORIGINAL_DIR/tests/e2e
-
-core_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-core)
-serving_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-batch-serving)
-jobcontroller_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-jobcontroller)
-
-set +e
-pytest -s -v bq/bq-batch-retrieval.py -m dataflow_runner --core_url "$core_ip:6565" --serving_url "$serving_ip:6566" \
- --jobcontroller_url "$jobcontroller_ip:6570" --gcs_path "gs://${TEMP_BUCKET}" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
-TEST_EXIT_CODE=$?
-
-if [[ ${TEST_EXIT_CODE} != 0 ]]; then
-  echo "[DEBUG] Printing logs"
-  ls -ltrh /var/log/feast*
-  cat /var/log/feast-serving-warehouse.log /var/log/feast-core.log
-
-  echo "[DEBUG] Printing Python packages list"
-  pip list
-else
-  clean "$@"
-fi
-
-exit ${TEST_EXIT_CODE}
diff --git a/infra/scripts/test-end-to-end-batch.sh b/infra/scripts/test-end-to-end-batch.sh
deleted file mode 100755
index c741fe7168..0000000000
--- a/infra/scripts/test-end-to-end-batch.sh
+++ /dev/null
@@ -1,153 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-set -o pipefail
-
-PYTEST_MARK='direct_runner' #default
-
-print_usage() {
-  printf "Usage: ./test-end-to-end-batch -m pytest_mark"
-}
-
-while getopts 'm:' flag; do
-  case "${flag}" in
-    m) PYTEST_MARK="${OPTARG}" ;;
-    *) print_usage
-       exit 1 ;;
-  esac
-done
-
-test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account/service-account.json"
-test -z ${SKIP_BUILD_JARS} && SKIP_BUILD_JARS="false"
-test -z ${GOOGLE_CLOUD_PROJECT} && GOOGLE_CLOUD_PROJECT="kf-feast"
-test -z ${TEMP_BUCKET} && TEMP_BUCKET="feast-templocation-kf-feast"
-test -z ${JOBS_STAGING_LOCATION} && JOBS_STAGING_LOCATION="gs://${TEMP_BUCKET}/staging-location/$(date +%s)"
-
-# Get the current build version using maven (and pom.xml)
-export FEAST_BUILD_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
-echo Building version: $FEAST_BUILD_VERSION
-
-# Get Feast project repository root and scripts directory
-export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
-export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
-
-echo "
-This script will run end-to-end tests for Feast Core and Batch Serving.
-
-1. Install gcloud SDK
-2. Install Redis as the job store for Feast Batch Serving.
-4. Install Postgres for persisting Feast metadata.
-5. Install Kafka and Zookeeper as the Source in Feast.
-6. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from
-   tests/e2e via pytest.
-"
-
-source ${SCRIPTS_DIR}/setup-common-functions.sh
-
-install_test_tools
-install_gcloud_sdk
-install_and_start_local_redis
-install_and_start_local_postgres
-install_and_start_local_zookeeper_and_kafka
-
-if [[ ${SKIP_BUILD_JARS} != "true" ]]; then
-  build_feast_core_and_serving
-else
-  echo "[DEBUG] Skipping building jars"
-fi
-
-DATASET_NAME=feast_$(date +%s)
-bq --location=US --project_id=${GOOGLE_CLOUD_PROJECT} mk \
-  --dataset \
-  --default_table_expiration 86400 \
-  ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME}
-
-# Start Feast Core in background
-cat <<EOF > /tmp/jc.warehouse.application.yml
-feast:
-  core-host: localhost
-  core-port: 6565
-  jobs:
-    polling_interval_milliseconds: 10000
-    active_runner: direct
-    consolidate-jobs-per-source: true
-    runners:
-      - name: direct
-        type: DirectRunner
-        options:
-          tempLocation: gs://${TEMP_BUCKET}/tempLocation
-
-EOF
-
-cat <<EOF > /tmp/serving.warehouse.application.yml
-feast:
-  # GRPC service address for Feast Core
-  # Feast Serving requires connection to Feast Core to retrieve and reload Feast metadata (e.g. FeatureSpecs, Store information)
-  core-host: localhost
-  core-grpc-port: 6565
-
-  # Indicates the active store. Only a single store in the last can be active at one time. In the future this key
-  # will be deprecated in order to allow multiple stores to be served from a single serving instance
-  active_store: historical
-
-  # List of store configurations
-  stores:
-    - name: historical
-      type: BIGQUERY
-      config:
-        project_id: ${GOOGLE_CLOUD_PROJECT}
-        dataset_id: ${DATASET_NAME}
-        staging_location: ${JOBS_STAGING_LOCATION}
-        initial_retry_delay_seconds: 1
-        total_timeout_seconds: 21600
-        write_triggering_frequency_seconds: 1
-      subscriptions:
-        - name: "*"
-          project: "*"
-          version: "*"
-
-  job_store:
-    redis_host: localhost
-    redis_port: 6379
-
-  tracing:
-    enabled: false
-
-server:
-  port: 8081
-
-EOF
-
-cat /tmp/jc.warehouse.application.yml /tmp/serving.warehouse.application.yml
-
-start_feast_core
-start_feast_jobcontroller /tmp/jc.warehouse.application.yml
-start_feast_serving /tmp/serving.warehouse.application.yml
-
-install_python_with_miniconda_and_feast_sdk
-
-print_banner "Running end-to-end tests with pytest at 'tests/e2e'"
-# Default artifact location setting in Prow jobs
-LOGS_ARTIFACT_PATH=/logs/artifacts
-
-ORIGINAL_DIR=$(pwd)
-cd tests/e2e
-
-set +e
-pytest bq/* -v -m ${PYTEST_MARK} --gcs_path ${JOBS_STAGING_LOCATION} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
-TEST_EXIT_CODE=$?
-
-if [[ ${TEST_EXIT_CODE} != 0 ]]; then
-  echo "[DEBUG] Printing logs"
-  ls -ltrh /var/log/feast*
-  cat /var/log/feast-serving-online.log /var/log/feast-core.log /var/log/feast-jobcontroller.log
-
-  echo "[DEBUG] Printing Python packages list"
-  pip list
-else
-  print_banner "Cleaning up"
-
-  bq rm -r -f ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME}
-fi
-
-exit ${TEST_EXIT_CODE}
diff --git a/tests/e2e/bq/bq-batch-retrieval.py b/tests/e2e/bq/bq-batch-retrieval.py
deleted file mode 100644
index 2d94d2e6cf..0000000000
--- a/tests/e2e/bq/bq-batch-retrieval.py
+++ /dev/null
@@ -1,819 +0,0 @@
-import math
-import os
-import random
-import time
-import uuid
-from datetime import datetime, timedelta
-from urllib.parse import urlparse
-
-import numpy as np
-import pandas as pd
-import pytest
-import pytz
-import tensorflow_data_validation as tfdv
-from google.cloud import bigquery, storage
-from google.cloud.storage import Blob
-from google.protobuf.duration_pb2 import Duration
-from pandavro import to_avro
-
-from bq.testutils import assert_stats_equal, clear_unsupported_fields
-from feast.client import Client
-from feast.contrib.job_controller.client import Client as JCClient
-from feast.core.CoreService_pb2 import ListStoresRequest
-from feast.core.FeatureSet_pb2 import FeatureSetStatus
-from feast.core.IngestionJob_pb2 import IngestionJobStatus
-from feast.entity import Entity
-from feast.feature import Feature
-from feast.feature_set import FeatureSet
-from feast.type_map import ValueType
-from feast.wait import wait_retry_backoff
-
-pd.set_option("display.max_columns", None)
-
-PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6]
-
-
-@pytest.fixture(scope="module")
-def core_url(pytestconfig):
-    return pytestconfig.getoption("core_url")
-
-
-@pytest.fixture(scope="module")
-def serving_url(pytestconfig):
-    return pytestconfig.getoption("serving_url")
-
-
-@pytest.fixture(scope="module")
-def jobcontroller_url(pytestconfig):
-    return pytestconfig.getoption("jobcontroller_url")
-
-
-@pytest.fixture(scope="module")
-def allow_dirty(pytestconfig):
-    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
-
-
-@pytest.fixture(scope="module")
-def gcs_path(pytestconfig):
-    return pytestconfig.getoption("gcs_path")
-
-
-@pytest.fixture(scope="module")
-def client(core_url, serving_url, allow_dirty):
-    # Get client for core and serving
-    client = Client(core_url=core_url, serving_url=serving_url)
-    client.create_project(PROJECT_NAME)
-    client.set_project(PROJECT_NAME)
-
-    # Ensure Feast core is active, but empty
-    if not allow_dirty:
-        feature_sets = client.list_feature_sets()
-        if len(feature_sets) > 0:
-            raise Exception(
-                "Feast cannot have existing feature sets registered. Exiting tests."
-            )
-
-    return client
-
-
-def wait_for(fn, timeout: timedelta, sleep=5):
-    until = datetime.now() + timeout
-    last_exc = BaseException()
-
-    while datetime.now() <= until:
-        try:
-            fn()
-        except Exception as exc:
-            last_exc = exc
-        else:
-            return
-        time.sleep(sleep)
-
-    raise last_exc
-
-
-@pytest.mark.first
-@pytest.mark.direct_runner
-@pytest.mark.dataflow_runner
-@pytest.mark.run(order=1)
-def test_batch_apply_all_featuresets(client):
-    client.set_project(PROJECT_NAME)
-
-    file_fs1 = FeatureSet(
-        "file_feature_set",
-        features=[Feature("feature_value1", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(file_fs1)
-
-    gcs_fs1 = FeatureSet(
-        "gcs_feature_set",
-        features=[Feature("feature_value2", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(gcs_fs1)
-
-    proc_time_fs = FeatureSet(
-        "processing_time",
-        features=[Feature("feature_value3", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(proc_time_fs)
-
-    add_cols_fs = FeatureSet(
-        "additional_columns",
-        features=[Feature("feature_value4", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(add_cols_fs)
-
-    historical_fs = FeatureSet(
-        "historical",
-        features=[Feature("feature_value5", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(historical_fs)
-
-    fs1 = FeatureSet(
-        "feature_set_1",
-        features=[Feature("feature_value6", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-
-    fs2 = FeatureSet(
-        "feature_set_2",
-        features=[Feature("other_feature_value7", ValueType.INT64)],
-        entities=[Entity("other_entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(fs1)
-    client.apply(fs2)
-
-    no_max_age_fs = FeatureSet(
-        "no_max_age",
-        features=[Feature("feature_value8", ValueType.INT64)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=0),
-    )
-    client.apply(no_max_age_fs)
-
-
-@pytest.mark.direct_runner
-@pytest.mark.dataflow_runner
-@pytest.mark.run(order=10)
-def test_batch_get_historical_features_with_file(client):
-    file_fs1 = client.get_feature_set(name="file_feature_set")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value1": [f"{i}" for i in range(N_ROWS)],
-        }
-    )
-
-    # feature set may be ready (direct runner set ready  right after job submitted),
-    # but kafka consumer is not configured
-    # give some time to warm up ingestion job
-    wait_retry_backoff(
-        retry_fn=(
-            lambda: (
-                None,
-                client.get_feature_set(name="file_feature_set").status
-                == FeatureSetStatus.STATUS_READY,
-            )
-        ),
-        timeout_secs=480,
-        timeout_msg="Wait for FeatureSet to be READY",
-    )
-    time.sleep(20)
-
-    client.ingest(file_fs1, features_1_df, timeout=480)
-
-    # Rename column (datetime -> event_timestamp)
-    features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"})
-
-    to_avro(
-        df=features_1_df[["event_timestamp", "entity_id"]],
-        file_path_or_buffer="file_feature_set.avro",
-    )
-
-    time.sleep(10)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows="file://file_feature_set.avro",
-            feature_refs=["feature_value1"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["entity_id"].to_list() == [
-            int(i) for i in output["feature_value1"].to_list()
-        ]
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=10))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.dataflow_runner
-@pytest.mark.run(order=11)
-def test_batch_get_historical_features_with_gs_path(client, gcs_path):
-    gcs_fs1 = client.get_feature_set(name="gcs_feature_set")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value2": [f"{i}" for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(gcs_fs1, features_1_df, timeout=360)
-
-    # Rename column (datetime -> event_timestamp)
-    features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"})
-
-    # Output file to local
-    file_name = "gcs_feature_set.avro"
-    to_avro(
-        df=features_1_df[["event_timestamp", "entity_id"]],
-        file_path_or_buffer=file_name,
-    )
-
-    uri = urlparse(gcs_path)
-    bucket = uri.hostname
-    ts = int(time.time())
-    remote_path = str(uri.path).strip("/") + f"/{ts}/{file_name}"
-
-    # Upload file to gcs
-    storage_client = storage.Client(project=None)
-    bucket = storage_client.get_bucket(bucket)
-    blob = bucket.blob(remote_path)
-    blob.upload_from_filename(file_name)
-
-    time.sleep(10)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=f"{gcs_path}/{ts}/*",
-            feature_refs=["feature_value2"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-        assert output["entity_id"].to_list() == [
-            int(i) for i in output["feature_value2"].to_list()
-        ]
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-        blob.delete()
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=12)
-def test_batch_order_by_creation_time(client):
-    proc_time_fs = client.get_feature_set(name="processing_time")
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    N_ROWS = 10
-    incorrect_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value3": ["WRONG"] * N_ROWS,
-        }
-    )
-    correct_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value3": ["CORRECT"] * N_ROWS,
-        }
-    )
-    client.ingest(proc_time_fs, incorrect_df)
-    time.sleep(15)
-    client.ingest(proc_time_fs, correct_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=incorrect_df[["datetime", "entity_id"]],
-            feature_refs=["feature_value3"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["feature_value3"].to_list() == ["CORRECT"] * N_ROWS
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=13)
-def test_batch_additional_columns_in_entity_table(client):
-    add_cols_fs = client.get_feature_set(name="additional_columns")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value4": ["abc"] * N_ROWS,
-        }
-    )
-    client.ingest(add_cols_fs, features_df)
-
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "additional_string_col": ["hello im extra"] * N_ROWS,
-            "additional_float_col": [random.random() for i in range(N_ROWS)],
-        }
-    )
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=entity_df,
-            feature_refs=["feature_value4"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-            by=["entity_id"]
-        )
-        print(output.head(10))
-
-        assert np.allclose(
-            output["additional_float_col"], entity_df["additional_float_col"]
-        )
-        assert (
-            output["additional_string_col"].to_list()
-            == entity_df["additional_string_col"].to_list()
-        )
-        assert (
-            output["feature_value4"].to_list()
-            == features_df["feature_value4"].to_list()
-        )
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=14)
-def test_batch_point_in_time_correctness_join(client):
-    historical_fs = client.get_feature_set(name="historical")
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    N_EXAMPLES = 10
-    historical_df = pd.DataFrame(
-        {
-            "datetime": [
-                time_offset - timedelta(seconds=50),
-                time_offset - timedelta(seconds=30),
-                time_offset - timedelta(seconds=10),
-            ]
-            * N_EXAMPLES,
-            "entity_id": [i for i in range(N_EXAMPLES) for _ in range(3)],
-            "feature_value5": ["WRONG", "WRONG", "CORRECT"] * N_EXAMPLES,
-        }
-    )
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset - timedelta(seconds=10)] * N_EXAMPLES,
-            "entity_id": [i for i in range(N_EXAMPLES)],
-        }
-    )
-
-    client.ingest(historical_fs, historical_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=entity_df,
-            feature_refs=["feature_value5"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["feature_value5"].to_list() == ["CORRECT"] * N_EXAMPLES
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=15)
-def test_batch_multiple_featureset_joins(client):
-    fs1 = client.get_feature_set(name="feature_set_1")
-    fs2 = client.get_feature_set(name="feature_set_2")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value6": [f"{i}" for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(fs1, features_1_df)
-
-    features_2_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "other_entity_id": [i for i in range(N_ROWS)],
-            "other_feature_value7": [i for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(fs2, features_2_df)
-
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)],
-        }
-    )
-
-    # Test retrieve with different variations of the string feature refs
-    # ie feature set inference for feature refs without specified feature set
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=entity_df,
-            feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["entity_id"].to_list() == [
-            int(i) for i in output["feature_value6"].to_list()
-        ]
-        assert (
-            output["other_entity_id"].to_list()
-            == output["feature_set_2__other_feature_value7"].to_list()
-        )
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=16)
-def test_batch_no_max_age(client):
-    no_max_age_fs = client.get_feature_set(name="no_max_age")
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    N_ROWS = 10
-    features_8_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value8": [i for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(no_max_age_fs, features_8_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=features_8_df[["datetime", "entity_id"]],
-            feature_refs=["feature_value8"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["entity_id"].to_list() == output["feature_value8"].to_list()
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.fixture(scope="module", autouse=True)
-def infra_teardown(pytestconfig, jobcontroller_url):
-    client = JCClient(jobcontroller_url=jobcontroller_url)
-
-    marker = pytestconfig.getoption("-m")
-    yield marker
-    if marker == "dataflow_runner":
-        ingest_jobs = client.list_ingest_jobs()
-        ingest_jobs = [
-            client.list_ingest_jobs(job.id)[0].external_id
-            for job in ingest_jobs
-            if job.status == IngestionJobStatus.RUNNING
-        ]
-
-        cwd = os.getcwd()
-        with open(f"{cwd}/ingesting_jobs.txt", "w+") as output:
-            for job in ingest_jobs:
-                output.write("%s\n" % job)
-    else:
-        print("Cleaning up not required")
-
-
-"""
-This suite of tests tests the apply feature set - update feature set - retrieve
-event sequence. It ensures that when a feature set is updated, tombstoned features
-are no longer retrieved, and added features are null for previously ingested
-rows.
-
-It is marked separately because of the length of time required
-to perform this test, due to bigquery schema caching for streaming writes.
-"""
-
-
-@pytest.fixture(scope="module")
-def update_featureset_dataframe():
-    n_rows = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    return pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [i for i in range(n_rows)],
-            "update_feature1": ["a" for i in range(n_rows)],
-            "update_feature2": [i + 2 for i in range(n_rows)],
-            "update_feature3": [i for i in range(n_rows)],
-            "update_feature4": ["b" for i in range(n_rows)],
-        }
-    )
-
-
-@pytest.mark.fs_update
-@pytest.mark.run(order=20)
-def test_update_featureset_apply_featureset_and_ingest_first_subset(
-    client, update_featureset_dataframe
-):
-    subset_columns = ["datetime", "entity_id", "update_feature1", "update_feature2"]
-    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
-    update_fs = FeatureSet(
-        "update_fs",
-        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
-        max_age=Duration(seconds=432000),
-    )
-    update_fs.infer_fields_from_df(subset_df)
-    client.apply(update_fs)
-
-    client.ingest(feature_set=update_fs, source=subset_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5],
-            feature_refs=["update_feature1", "update_feature2"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-            by=["entity_id"]
-        )
-        print(output.head())
-
-        assert (
-            output["update_feature1"].to_list()
-            == subset_df["update_feature1"].to_list()
-        )
-        assert (
-            output["update_feature2"].to_list()
-            == subset_df["update_feature2"].to_list()
-        )
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.fs_update
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=21)
-def test_update_featureset_update_featureset_and_ingest_second_subset(
-    client, update_featureset_dataframe
-):
-    subset_columns = [
-        "datetime",
-        "entity_id",
-        "update_feature1",
-        "update_feature3",
-        "update_feature4",
-    ]
-    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
-    update_fs = FeatureSet(
-        "update_fs",
-        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
-        max_age=Duration(seconds=432000),
-    )
-    update_fs.infer_fields_from_df(subset_df)
-    client.apply(update_fs)
-
-    # We keep retrying this ingestion until all values make it into the buffer.
-    # This is a necessary step because bigquery streaming caches table schemas
-    # and as a result, rows may be lost.
-    while True:
-        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
-        time.sleep(15)  # wait for rows to get written to bq
-        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
-        if rows_ingested == len(subset_df):
-            print(f"Number of rows successfully ingested: {rows_ingested}. Continuing.")
-            break
-        print(
-            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
-        )
-        time.sleep(30)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:],
-            feature_refs=["update_feature1", "update_feature3", "update_feature4"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-            by=["entity_id"]
-        )
-        print(output.head())
-
-        assert (
-            output["update_feature1"].to_list()
-            == subset_df["update_feature1"].to_list()
-        )
-        assert (
-            output["update_feature3"].to_list()
-            == subset_df["update_feature3"].to_list()
-        )
-        assert (
-            output["update_feature4"].to_list()
-            == subset_df["update_feature4"].to_list()
-        )
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.fs_update
-@pytest.mark.run(order=22)
-def test_update_featureset_retrieve_all_fields(client, update_featureset_dataframe):
-    with pytest.raises(Exception):
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
-            feature_refs=[
-                "update_feature1",
-                "update_feature2",
-                "update_feature3",
-                "update_feature4",
-            ],
-            project=PROJECT_NAME,
-        )
-        feature_retrieval_job.result()
-
-
-@pytest.mark.fs_update
-@pytest.mark.run(order=23)
-def test_update_featureset_retrieve_valid_fields(client, update_featureset_dataframe):
-    feature_retrieval_job = client.get_historical_features(
-        entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
-        feature_refs=["update_feature1", "update_feature3", "update_feature4"],
-        project=PROJECT_NAME,
-    )
-    output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-        by=["entity_id"]
-    )
-    clean_up_remote_files(feature_retrieval_job.get_avro_files())
-    print(output.head(10))
-    assert (
-        output["update_feature1"].to_list()
-        == update_featureset_dataframe["update_feature1"].to_list()
-    )
-    # we have to convert to float because the column contains np.NaN
-    assert [math.isnan(i) for i in output["update_feature3"].to_list()[:5]] == [
-        True
-    ] * 5
-    assert output["update_feature3"].to_list()[5:] == [
-        float(i) for i in update_featureset_dataframe["update_feature3"].to_list()[5:]
-    ]
-    assert (
-        output["update_feature4"].to_list()
-        == [None] * 5 + update_featureset_dataframe["update_feature4"].to_list()[5:]
-    )
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=31)
-@pytest.mark.timeout(600)
-def test_batch_dataset_statistics(client):
-    fs1 = client.get_feature_set(name="feature_set_1")
-    fs2 = client.get_feature_set(name="feature_set_2")
-    id_offset = 20
-
-    n_rows = 21
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [id_offset + i for i in range(n_rows)],
-            "feature_value6": ["a" for i in range(n_rows)],
-        }
-    )
-    ingestion_id1 = client.ingest(fs1, features_1_df)
-
-    features_2_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "other_entity_id": [id_offset + i for i in range(n_rows)],
-            "other_feature_value7": [int(i) % 10 for i in range(0, n_rows)],
-        }
-    )
-    ingestion_id2 = client.ingest(fs2, features_2_df)
-
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [id_offset + i for i in range(n_rows)],
-            "other_entity_id": [id_offset + i for i in range(n_rows)],
-        }
-    )
-
-    time.sleep(15)  # wait for rows to get written to bq
-    while True:
-        rows_ingested1 = get_rows_ingested(client, fs1, ingestion_id1)
-        rows_ingested2 = get_rows_ingested(client, fs2, ingestion_id2)
-        if rows_ingested1 == len(features_1_df) and rows_ingested2 == len(
-            features_2_df
-        ):
-            print(
-                f"Number of rows successfully ingested: {rows_ingested1}, {rows_ingested2}. Continuing."
-            )
-            break
-        time.sleep(30)
-
-    feature_retrieval_job = client.get_historical_features(
-        entity_rows=entity_df,
-        feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
-        project=PROJECT_NAME,
-        compute_statistics=True,
-    )
-    output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-    print(output.head(10))
-    stats = feature_retrieval_job.statistics(timeout_sec=180)
-    clear_unsupported_fields(stats)
-
-    expected_stats = tfdv.generate_statistics_from_dataframe(
-        output[["feature_value6", "feature_set_2__other_feature_value7"]]
-    )
-    clear_unsupported_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = output[name].std()
-            feature.num_stats.std_dev = std
-
-    assert_stats_equal(expected_stats, stats)
-    clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-
-def get_rows_ingested(
-    client: Client, feature_set: FeatureSet, ingestion_id: str
-) -> int:
-    response = client._core_service.ListStores(
-        ListStoresRequest(filter=ListStoresRequest.Filter(name="historical"))
-    )
-    bq_config = response.store[0].bigquery_config
-    project = bq_config.project_id
-    dataset = bq_config.dataset_id
-    table = f"{PROJECT_NAME}_{feature_set.name}"
-
-    bq_client = bigquery.Client(project=project)
-    rows = bq_client.query(
-        f'SELECT COUNT(*) as count FROM `{project}.{dataset}.{table}` WHERE ingestion_id = "{ingestion_id}"'
-    ).result()
-
-    return list(rows)[0]["count"]
-
-
-def clean_up_remote_files(files):
-    storage_client = storage.Client()
-    for file_uri in files:
-        if file_uri.scheme == "gs":
-            blob = Blob.from_string(file_uri.geturl(), client=storage_client)
-            blob.delete()
diff --git a/tests/e2e/bq/feature-stats.py b/tests/e2e/bq/feature-stats.py
deleted file mode 100644
index 226dc358f1..0000000000
--- a/tests/e2e/bq/feature-stats.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import os
-import time
-import uuid
-from datetime import datetime, timedelta
-
-import pandas as pd
-import pytest
-import pytz
-import tensorflow_data_validation as tfdv
-from google.protobuf.duration_pb2 import Duration
-
-from bq.testutils import (
-    assert_stats_equal,
-    clear_unsupported_agg_fields,
-    clear_unsupported_fields,
-)
-from feast.client import Client
-from feast.entity import Entity
-from feast.feature import Feature
-from feast.feature_set import FeatureSet
-from feast.type_map import ValueType
-
-pd.set_option("display.max_columns", None)
-
-PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6]
-STORE_NAME = "historical"
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-
-
-@pytest.fixture(scope="module")
-def core_url(pytestconfig):
-    return pytestconfig.getoption("core_url")
-
-
-@pytest.fixture(scope="module")
-def serving_url(pytestconfig):
-    return pytestconfig.getoption("serving_url")
-
-
-@pytest.fixture(scope="module")
-def allow_dirty(pytestconfig):
-    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
-
-
-@pytest.fixture(scope="module")
-def gcs_path(pytestconfig):
-    return pytestconfig.getoption("gcs_path")
-
-
-@pytest.fixture(scope="module")
-def client(core_url, allow_dirty):
-    # Get client for core and serving
-    client = Client(core_url=core_url)
-    client.create_project(PROJECT_NAME)
-    client.set_project(PROJECT_NAME)
-
-    # Ensure Feast core is active, but empty
-    if not allow_dirty:
-        feature_sets = client.list_feature_sets()
-        if len(feature_sets) > 0:
-            raise Exception(
-                "Feast cannot have existing feature sets registered. Exiting tests."
-            )
-
-    return client
-
-
-@pytest.fixture(scope="module")
-def feature_stats_feature_set(client):
-    fv_fs = FeatureSet(
-        "feature_stats",
-        features=[
-            Feature("strings", ValueType.STRING),
-            Feature("ints", ValueType.INT64),
-            Feature("floats", ValueType.FLOAT),
-        ],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(fv_fs)
-    return fv_fs
-
-
-@pytest.fixture(scope="module")
-def feature_stats_dataset_basic(client, feature_stats_feature_set):
-
-    n_rows = 20
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [i for i in range(n_rows)],
-            "strings": ["a", "b"] * int(n_rows / 2),
-            "ints": [int(i) for i in range(n_rows)],
-            "floats": [10.5 - i for i in range(n_rows)],
-        }
-    )
-
-    expected_stats = tfdv.generate_statistics_from_dataframe(
-        df[["strings", "ints", "floats"]]
-    )
-    clear_unsupported_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = df[name].std()
-            feature.num_stats.std_dev = std
-
-    ingestion_id = client.ingest(feature_stats_feature_set, df)
-    time.sleep(10)
-    return {
-        "df": df,
-        "id": ingestion_id,
-        "date": datetime(time_offset.year, time_offset.month, time_offset.day).replace(
-            tzinfo=pytz.utc
-        ),
-        "stats": expected_stats,
-    }
-
-
-@pytest.fixture(scope="module")
-def feature_stats_dataset_agg(client, feature_stats_feature_set):
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    start_date = time_offset - timedelta(days=10)
-    end_date = time_offset - timedelta(days=7)
-    df1 = pd.DataFrame(
-        {
-            "datetime": [start_date] * 5,
-            "entity_id": [i for i in range(5)],
-            "strings": ["a", "b", "b", "b", "a"],
-            "ints": [4, 3, 2, 6, 3],
-            "floats": [2.1, 5.2, 4.3, 0.6, 0.1],
-        }
-    )
-    ingestion_id_1 = client.ingest(feature_stats_feature_set, df1)
-    df2 = pd.DataFrame(
-        {
-            "datetime": [start_date + timedelta(days=1)] * 3,
-            "entity_id": [i for i in range(3)],
-            "strings": ["a", "b", "c"],
-            "ints": [2, 6, 7],
-            "floats": [1.6, 2.4, 2],
-        }
-    )
-    ingestion_id_2 = client.ingest(feature_stats_feature_set, df2)
-
-    combined_df = pd.concat([df1, df2])[["strings", "ints", "floats"]]
-    expected_stats = tfdv.generate_statistics_from_dataframe(combined_df)
-    clear_unsupported_agg_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = combined_df[name].std()
-            feature.num_stats.std_dev = std
-
-    time.sleep(10)
-
-    return {
-        "ids": [ingestion_id_1, ingestion_id_2],
-        "start_date": datetime(
-            start_date.year, start_date.month, start_date.day
-        ).replace(tzinfo=pytz.utc),
-        "end_date": datetime(end_date.year, end_date.month, end_date.day).replace(
-            tzinfo=pytz.utc
-        ),
-        "stats": expected_stats,
-    }
-
-
-def test_feature_stats_retrieval_by_single_dataset(client, feature_stats_dataset_basic):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        ingestion_ids=[feature_stats_dataset_basic["id"]],
-    )
-
-    assert_stats_equal(feature_stats_dataset_basic["stats"], stats)
-
-
-def test_feature_stats_by_date(client, feature_stats_dataset_basic):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        start_date=feature_stats_dataset_basic["date"],
-        end_date=feature_stats_dataset_basic["date"] + timedelta(days=1),
-    )
-    assert_stats_equal(feature_stats_dataset_basic["stats"], stats)
-
-
-def test_feature_stats_agg_over_datasets(client, feature_stats_dataset_agg):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        ingestion_ids=feature_stats_dataset_agg["ids"],
-    )
-    assert_stats_equal(feature_stats_dataset_agg["stats"], stats)
-
-
-def test_feature_stats_agg_over_dates(client, feature_stats_dataset_agg):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        start_date=feature_stats_dataset_agg["start_date"],
-        end_date=feature_stats_dataset_agg["end_date"],
-    )
-    assert_stats_equal(feature_stats_dataset_agg["stats"], stats)
-
-
-def test_feature_stats_force_refresh(
-    client, feature_stats_dataset_basic, feature_stats_feature_set
-):
-    df = feature_stats_dataset_basic["df"]
-
-    df2 = pd.DataFrame(
-        {
-            "datetime": [df.iloc[0].datetime],
-            "entity_id": [10],
-            "strings": ["c"],
-            "ints": [2],
-            "floats": [1.3],
-        }
-    )
-    client.ingest(feature_stats_feature_set, df2)
-    time.sleep(10)
-
-    actual_stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store="historical",
-        start_date=feature_stats_dataset_basic["date"],
-        end_date=feature_stats_dataset_basic["date"] + timedelta(days=1),
-        force_refresh=True,
-    )
-
-    combined_df = pd.concat([df, df2])
-    expected_stats = tfdv.generate_statistics_from_dataframe(combined_df)
-
-    clear_unsupported_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = combined_df[name].std()
-            feature.num_stats.std_dev = std
-
-    assert_stats_equal(expected_stats, actual_stats)
diff --git a/tests/e2e/bq/testutils.py b/tests/e2e/bq/testutils.py
deleted file mode 100644
index 9ac678bc59..0000000000
--- a/tests/e2e/bq/testutils.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from deepdiff import DeepDiff
-from google.protobuf.json_format import MessageToDict
-
-
-def clear_unsupported_fields(datasets):
-    dataset = datasets.datasets[0]
-    for feature in dataset.features:
-        if feature.HasField("num_stats"):
-            feature.num_stats.common_stats.ClearField("num_values_histogram")
-            # Since difference in how BQ and TFDV compute histogram values make them
-            # approximate but uncomparable
-            feature.num_stats.ClearField("histograms")
-        elif feature.HasField("string_stats"):
-            feature.string_stats.common_stats.ClearField("num_values_histogram")
-            for bucket in feature.string_stats.rank_histogram.buckets:
-                bucket.ClearField("low_rank")
-                bucket.ClearField("high_rank")
-        elif feature.HasField("struct_stats"):
-            feature.string_stats.struct_stats.ClearField("num_values_histogram")
-        elif feature.HasField("bytes_stats"):
-            feature.string_stats.bytes_stats.ClearField("num_values_histogram")
-
-
-def clear_unsupported_agg_fields(datasets):
-    dataset = datasets.datasets[0]
-    for feature in dataset.features:
-        if feature.HasField("num_stats"):
-            feature.num_stats.common_stats.ClearField("num_values_histogram")
-            feature.num_stats.ClearField("histograms")
-            feature.num_stats.ClearField("median")
-        elif feature.HasField("string_stats"):
-            feature.string_stats.common_stats.ClearField("num_values_histogram")
-            feature.string_stats.ClearField("rank_histogram")
-            feature.string_stats.ClearField("top_values")
-            feature.string_stats.ClearField("unique")
-        elif feature.HasField("struct_stats"):
-            feature.struct_stats.ClearField("num_values_histogram")
-        elif feature.HasField("bytes_stats"):
-            feature.bytes_stats.ClearField("num_values_histogram")
-            feature.bytes_stats.ClearField("unique")
-
-
-def assert_stats_equal(left, right):
-    left_stats = MessageToDict(left)["datasets"][0]
-    right_stats = MessageToDict(right)["datasets"][0]
-    assert (
-        left_stats["numExamples"] == right_stats["numExamples"]
-    ), f"Number of examples do not match. Expected {left_stats['numExamples']}, got {right_stats['numExamples']}"
-
-    left_features = sorted(left_stats["features"], key=lambda k: k["path"]["step"][0])
-    right_features = sorted(right_stats["features"], key=lambda k: k["path"]["step"][0])
-    diff = DeepDiff(left_features, right_features, significant_digits=3)
-    assert (
-        len(diff) == 0
-    ), f"Feature statistics do not match: \nwanted: {left_features}\n got: {right_features}"

From 0205ea8b2fe88f156e64ad6c012cc99fc062b238 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 12:36:04 +0800
Subject: [PATCH 06/34] Fix env variable

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 infra/scripts/test-end-to-end-redis-cluster.sh | 2 +-
 infra/scripts/test-end-to-end.sh               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index 544c1f4d3d..083079a32b 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -103,7 +103,7 @@ cd tests/e2e
 
 set +e
 CORE_NO=$(nproc --all)
-pytest redis/parallel-ingest-redis-serving.py -n CORE_NO --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+pytest redis/parallel-ingest-redis-serving.py -n ${CORE_NO} --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/infra/scripts/test-end-to-end.sh b/infra/scripts/test-end-to-end.sh
index e65c72b0ba..a7dadd5a1f 100755
--- a/infra/scripts/test-end-to-end.sh
+++ b/infra/scripts/test-end-to-end.sh
@@ -120,7 +120,7 @@ cd tests/e2e
 set +e
 export GOOGLE_APPLICATION_CREDENTIALS=/etc/gcloud/service-account.json
 CORE_NO=$(nproc --all)
-pytest redis/parallel-ingest-redis-serving.py -n CORE_NO --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+pytest redis/parallel-ingest-redis-serving.py -n ${CORE_NO} --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then

From 894ee48819a159a3b35640d6e19f887a3ee10631 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 14:05:56 +0800
Subject: [PATCH 07/34] Fix pytest redis

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 tests/e2e/redis/parallel-ingest-redis-serving.py | 3 ---
 tests/e2e/requirements.txt                       | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/e2e/redis/parallel-ingest-redis-serving.py b/tests/e2e/redis/parallel-ingest-redis-serving.py
index fb30746a0f..7a213dc8af 100644
--- a/tests/e2e/redis/parallel-ingest-redis-serving.py
+++ b/tests/e2e/redis/parallel-ingest-redis-serving.py
@@ -72,7 +72,6 @@ def test_discovery(self, client):
         actual_matchmaking_entities = client.list_entities(
             labels=matchmaking_filtering_labels
         )
-
         assert len(actual_common_entities) == 2
         assert len(actual_matchmaking_entities) == 1
 
@@ -168,8 +167,6 @@ def test_discovery(self, client):
         actual_alltypes_entities = client.list_entities(
             labels=alltypes_filtering_labels
         )
-
-        assert len(client.list_entities()) == 1
         assert len(actual_alltypes_entities) == 1
 
         # ApplyFeatureTable
diff --git a/tests/e2e/requirements.txt b/tests/e2e/requirements.txt
index 9c6dd06ac1..68595ee1b5 100644
--- a/tests/e2e/requirements.txt
+++ b/tests/e2e/requirements.txt
@@ -2,7 +2,7 @@ mock==2.0.0
 numpy==1.16.4
 pandas~=1.0.0
 pandavro==1.5.*
-pytest==5.2.1
+pytest==6.0.0
 pytest-benchmark==3.2.2
 pytest-mock==1.10.4
 pytest-timeout==1.3.3

From f9a9a3236969357bef39a09c20217c17eb5f9f37 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 14:06:58 +0800
Subject: [PATCH 08/34] Revert "Remove e2e bq tests"

This reverts commit fa0bcab17b244142dbdc44f6c48a4108e5a2b522.

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 .prow/config.yaml                             |  91 ++
 Makefile                                      |   2 +-
 .../scripts/test-end-to-end-batch-dataflow.sh | 307 +++++++
 infra/scripts/test-end-to-end-batch.sh        | 153 ++++
 tests/e2e/bq/bq-batch-retrieval.py            | 819 ++++++++++++++++++
 tests/e2e/bq/feature-stats.py                 | 256 ++++++
 tests/e2e/bq/testutils.py                     |  55 ++
 7 files changed, 1682 insertions(+), 1 deletion(-)
 create mode 100755 infra/scripts/test-end-to-end-batch-dataflow.sh
 create mode 100755 infra/scripts/test-end-to-end-batch.sh
 create mode 100644 tests/e2e/bq/bq-batch-retrieval.py
 create mode 100644 tests/e2e/bq/feature-stats.py
 create mode 100644 tests/e2e/bq/testutils.py

diff --git a/.prow/config.yaml b/.prow/config.yaml
index 39c275603d..d2269fcc6d 100644
--- a/.prow/config.yaml
+++ b/.prow/config.yaml
@@ -254,6 +254,97 @@ presubmits:
     branches:
     - ^v0\.(3|4)-branch$
 
+  - name: test-end-to-end-batch
+    decorate: true
+    always_run: true
+    spec:
+      volumes:
+      - name: service-account
+        secret:
+          secretName: feast-service-account
+      containers:
+      - image: maven:3.6-jdk-11
+        command: ["infra/scripts/test-end-to-end-batch.sh"]
+        resources:
+          requests:
+            cpu: "6"
+            memory: "6144Mi"
+        volumeMounts:
+        - name: service-account
+          mountPath: "/etc/service-account"
+    skip_branches:
+    - ^v0\.(3|4)-branch$
+
+  - name: test-end-to-end-batch-fs-update
+    decorate: true
+    always_run: false
+    spec:
+      volumes:
+        - name: service-account
+          secret:
+            secretName: feast-service-account
+      containers:
+        - image: maven:3.6-jdk-11
+          command: ["infra/scripts/test-end-to-end-batch.sh", "-m", "fs_update"]
+          resources:
+            requests:
+              cpu: "6"
+              memory: "6144Mi"
+          volumeMounts:
+            - name: service-account
+              mountPath: "/etc/service-account"
+    skip_branches:
+      - ^v0\.(3|4)-branch$
+
+  - name: test-end-to-end-batch-java-8
+    decorate: true
+    always_run: true
+    spec:
+      volumes:
+        - name: service-account
+          secret:
+            secretName: feast-service-account
+      containers:
+        - image: maven:3.6-jdk-8
+          command: ["infra/scripts/test-end-to-end-batch.sh"]
+          resources:
+            requests:
+              cpu: "6"
+              memory: "6144Mi"
+          volumeMounts:
+            - name: service-account
+              mountPath: "/etc/service-account"
+    branches:
+    - ^v0\.(3|4)-branch$
+
+  - name: test-end-to-end-batch-dataflow
+    decorate: true
+    always_run: true
+    spec:
+      volumes:
+      - name: service-account-df
+        secret:
+          secretName: feast-e2e-service-account
+      - name: docker-socket
+        hostPath:
+          path: /var/run/docker.sock
+      containers:
+      - image: google/cloud-sdk:302.0.0
+        command: ["infra/scripts/test-end-to-end-batch-dataflow.sh"]
+        resources:
+          requests:
+            cpu: "6"
+            memory: "6144Mi"
+        volumeMounts:
+        - name: service-account-df
+          mountPath: "/etc/service-account-df"
+        - name: docker-socket
+          mountPath: /var/run/docker.sock
+        securityContext:
+          privileged: true
+    skip_branches:
+    - ^v0\.(3|4)-branch$
+
 postsubmits:
   feast-dev/feast:
   - name: publish-python-sdk
diff --git a/Makefile b/Makefile
index f159ad624d..8fffe20816 100644
--- a/Makefile
+++ b/Makefile
@@ -86,7 +86,7 @@ lint-python:
 	cd ${ROOT_DIR}/sdk/python; flake8 feast/ tests/
 	cd ${ROOT_DIR}/sdk/python; black --check feast tests
 
-	cd ${ROOT_DIR}/tests/e2e; mypy redis/
+	cd ${ROOT_DIR}/tests/e2e; mypy bq/ redis/
 	cd ${ROOT_DIR}/tests/e2e; isort . --check-only
 	cd ${ROOT_DIR}/tests/e2e; flake8 .
 	cd ${ROOT_DIR}/tests/e2e; black --check .
diff --git a/infra/scripts/test-end-to-end-batch-dataflow.sh b/infra/scripts/test-end-to-end-batch-dataflow.sh
new file mode 100755
index 0000000000..363ba7dc47
--- /dev/null
+++ b/infra/scripts/test-end-to-end-batch-dataflow.sh
@@ -0,0 +1,307 @@
+#!/usr/bin/env bash
+echo "Preparing environment variables..."
+
+set -e
+set -o pipefail
+
+test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account-df/service-account-df.json"
+test -z ${GCLOUD_PROJECT} && GCLOUD_PROJECT="kf-feast"
+test -z ${GCLOUD_REGION} && GCLOUD_REGION="us-central1"
+test -z ${GCLOUD_NETWORK} && GCLOUD_NETWORK="default"
+test -z ${GCLOUD_SUBNET} && GCLOUD_SUBNET="default"
+test -z ${TEMP_BUCKET} && TEMP_BUCKET="kf-feast-dataflow-temp"
+test -z ${K8_CLUSTER_NAME} && K8_CLUSTER_NAME="feast-e2e-dataflow"
+test -z ${HELM_RELEASE_NAME} && HELM_RELEASE_NAME="pr-$PULL_NUMBER"
+test -z ${HELM_COMMON_NAME} && HELM_COMMON_NAME="deps"
+test -z ${DATASET_NAME} && DATASET_NAME=feast_e2e_$(date +%s)
+test -z ${SPECS_TOPIC} && SPECS_TOPIC=feast-specs-$(date +%s)
+test -z ${FEATURES_TOPIC} && FEATURES_TOPIC=feast-$(date +%s)
+
+
+feast_kafka_1_ip_name="feast-kafka-1"
+feast_kafka_2_ip_name="feast-kafka-2"
+feast_kafka_3_ip_name="feast-kafka-3"
+feast_redis_ip_name="feast-redis"
+feast_statsd_ip_name="feast-statsd"
+
+echo "
+This script will run end-to-end tests for Feast Core and Batch Serving using Dataflow Runner.
+
+1. Setup K8s cluster (optional, if it was not created before)
+2. Reuse existing IP addresses or generate new ones for stateful services
+3. Install stateful services (kafka, redis, postgres, etc) (optional)
+4. Build core & serving docker images (optional)
+5. Create temporary BQ table for Feast Serving.
+6. Rollout target images to cluster via helm in dedicated namespace (pr-{number})
+7. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from
+   tests/e2e via pytest.
+8. Tear down feast services, keep stateful services.
+"
+
+ORIGINAL_DIR=$(pwd)
+echo $ORIGINAL_DIR
+
+echo "Environment:"
+printenv
+
+export GOOGLE_APPLICATION_CREDENTIALS
+gcloud auth activate-service-account --key-file ${GOOGLE_APPLICATION_CREDENTIALS}
+gcloud -q auth configure-docker
+
+gcloud config set project ${GCLOUD_PROJECT}
+gcloud config set compute/region ${GCLOUD_REGION}
+gcloud config list
+
+apt-get -qq update
+apt-get -y install wget build-essential gettext-base curl
+
+curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
+chmod 700 $ORIGINAL_DIR/get_helm.sh
+$ORIGINAL_DIR/get_helm.sh
+
+
+function getPublicAddresses() {
+  existing_addresses=$(gcloud compute addresses list --filter="region:($GCLOUD_REGION) name:kafka" --format "list(name)")
+  if [[ -z "$existing_addresses" ]]; then
+    echo "
+============================================================
+Reserving IP addresses for Feast dependencies
+============================================================
+"
+
+    gcloud compute addresses create \
+      $feast_kafka_1_ip_name $feast_kafka_2_ip_name $feast_kafka_3_ip_name $feast_redis_ip_name $feast_statsd_ip_name \
+      --region ${GCLOUD_REGION} --subnet ${GCLOUD_SUBNET}
+  fi
+
+
+  export feast_kafka_1_ip=$(gcloud compute addresses describe $feast_kafka_1_ip_name --region=${GCLOUD_REGION} --format "value(address)")
+  export feast_kafka_2_ip=$(gcloud compute addresses describe $feast_kafka_2_ip_name --region=${GCLOUD_REGION} --format "value(address)")
+  export feast_kafka_3_ip=$(gcloud compute addresses describe $feast_kafka_3_ip_name --region=${GCLOUD_REGION} --format "value(address)")
+  export feast_redis_ip=$(gcloud compute addresses describe $feast_redis_ip_name --region=${GCLOUD_REGION} --format "value(address)")
+  export feast_statsd_ip=$(gcloud compute addresses describe $feast_statsd_ip_name --region=${GCLOUD_REGION} --format "value(address)")
+}
+
+function createKubeCluster() {
+  echo "
+============================================================
+Creating GKE nodepool for Feast e2e test with DataflowRunner
+============================================================
+"
+  gcloud container clusters create ${K8_CLUSTER_NAME} --region ${GCLOUD_REGION} \
+      --enable-cloud-logging \
+      --enable-cloud-monitoring \
+      --network ${GCLOUD_NETWORK} \
+      --subnetwork ${GCLOUD_SUBNET} \
+      --scopes https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,\
+https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/service.management.readonly,\
+https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/trace.append,\
+https://www.googleapis.com/auth/bigquery \
+      --machine-type n1-standard-2
+
+  echo "
+============================================================
+Create feast-postgres-database Secret in GKE nodepool
+============================================================
+"
+  kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password
+
+  echo "
+============================================================
+Create feast-gcp-service-account Secret in GKE nodepool
+============================================================
+"
+  cd $ORIGINAL_DIR/infra/scripts
+  kubectl create secret generic feast-gcp-service-account --from-file=credentials.json=${GOOGLE_APPLICATION_CREDENTIALS}
+}
+
+function installDependencies() {
+  echo "
+============================================================
+Helm install common parts (kafka, redis, etc)
+============================================================
+"
+  cd $ORIGINAL_DIR/infra/charts/feast
+
+  helm install --replace --wait --debug --values="values-end-to-end-batch-dataflow-updated.yaml" \
+   --set "feast-core.enabled=false" \
+   --set "feast-online-serving.enabled=false" \
+   --set "feast-batch-serving.enabled=false" \
+   --set "postgresql.enabled=false"
+   "$HELM_COMMON_NAME" .
+
+}
+
+function buildAndPushImage()
+{
+  echo docker build -t $1:$2 --build-arg REVISION=$2 -f $3 $ORIGINAL_DIR
+  docker build -t $1:$2 --build-arg REVISION=$2 -f $3 $ORIGINAL_DIR
+  docker push $1:$2
+}
+
+function buildTarget() {
+  buildAndPushImage "gcr.io/kf-feast/feast-core" "$PULL_NUMBER" "$ORIGINAL_DIR/infra/docker/core/Dockerfile"
+  buildAndPushImage "gcr.io/kf-feast/feast-serving" "$PULL_NUMBER" "$ORIGINAL_DIR/infra/docker/serving/Dockerfile"
+}
+
+function installTarget() {
+  echo "
+============================================================
+Helm install feast
+============================================================
+"
+  cd $ORIGINAL_DIR/infra/charts/feast
+
+  helm install --wait --timeout 300s --debug --values="values-end-to-end-batch-dataflow-updated.yaml" \
+   --set "kafka.enabled=false" \
+   --set "redis.enabled=false" \
+   --set "prometheus-statsd-exporter.enabled=false" \
+   --set "prometheus.enabled=false" \
+    "$HELM_RELEASE_NAME" .
+
+}
+
+function clean() {
+  echo "
+  ============================================================
+  Cleaning up
+  ============================================================
+  "
+  cd $ORIGINAL_DIR/tests/e2e
+
+  # Remove BQ Dataset
+  bq rm -r -f ${GCLOUD_PROJECT}:${DATASET_NAME}
+
+  # Uninstall helm release before clearing PVCs
+  helm uninstall ${HELM_RELEASE_NAME}
+
+  kubectl delete pvc data-${HELM_RELEASE_NAME}-postgresql-0
+
+  # Stop Dataflow jobs from retrieved Dataflow job ids in ingesting_jobs.txt
+  if [ -f ingesting_jobs.txt ]; then
+    while read line
+    do
+        echo $line
+        gcloud dataflow jobs cancel $line --region=${GCLOUD_REGION}
+    done < ingesting_jobs.txt
+  fi
+}
+
+# 1.
+existing_cluster=$(gcloud container clusters list --format "list(name)" --filter "name:$K8_CLUSTER_NAME")
+if [[ -z $existing_cluster ]]; then
+  createKubeCluster "$@"
+else
+  gcloud container clusters get-credentials $K8_CLUSTER_NAME --region $GCLOUD_REGION --project $GCLOUD_PROJECT
+fi
+
+# 2.
+getPublicAddresses "$@"
+
+echo "
+============================================================
+Export required environment variables
+============================================================
+"
+
+export TEMP_BUCKET=$TEMP_BUCKET/$HELM_RELEASE_NAME/$(date +%s)
+export DATASET_NAME=$DATASET_NAME
+export GCLOUD_PROJECT=$GCLOUD_PROJECT
+export GCLOUD_NETWORK=$GCLOUD_NETWORK
+export GCLOUD_SUBNET=$GCLOUD_SUBNET
+export GCLOUD_REGION=$GCLOUD_REGION
+export HELM_COMMON_NAME=$HELM_COMMON_NAME
+export IMAGE_TAG=$PULL_PULL_SHA
+export SPECS_TOPIC=$SPECS_TOPIC
+export FEATURES_TOPIC=$FEATURES_TOPIC
+
+export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
+export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
+source ${SCRIPTS_DIR}/setup-common-functions.sh
+
+wait_for_docker_image gcr.io/kf-feast/feast-core:"${IMAGE_TAG}"
+wait_for_docker_image gcr.io/kf-feast/feast-serving:"${IMAGE_TAG}"
+
+envsubst $'$TEMP_BUCKET $DATASET_NAME $GCLOUD_PROJECT $GCLOUD_NETWORK $SPECS_TOPIC $FEATURES_TOPIC \
+  $GCLOUD_SUBNET $GCLOUD_REGION $IMAGE_TAG $HELM_COMMON_NAME $feast_kafka_1_ip
+  $feast_kafka_2_ip $feast_kafka_3_ip $feast_redis_ip $feast_statsd_ip' < $ORIGINAL_DIR/infra/scripts/test-templates/values-end-to-end-batch-dataflow.yaml > $ORIGINAL_DIR/infra/charts/feast/values-end-to-end-batch-dataflow-updated.yaml
+
+
+# 3.
+existing_deps=$(helm list --filter deps -q)
+if [[ -z $existing_deps ]]; then
+  installDependencies "$@"
+fi
+
+# 4.
+# buildTarget "$@"
+
+# 5.
+echo "
+============================================================
+Creating temp BQ table for Feast Serving
+============================================================
+"
+
+bq --location=US --project_id=${GCLOUD_PROJECT} mk \
+  --dataset \
+  --default_table_expiration 86400 \
+  ${GCLOUD_PROJECT}:${DATASET_NAME}
+
+
+# 6.
+
+set +e
+installTarget "$@"
+
+# 7.
+echo "
+============================================================
+Installing Python 3.7 with Miniconda and Feast SDK
+============================================================
+"
+cd $ORIGINAL_DIR
+# Install Python 3.7 with Miniconda
+wget -q https://repo.continuum.io/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh \
+   -O /tmp/miniconda.sh
+bash /tmp/miniconda.sh -b -p /root/miniconda -f
+/root/miniconda/bin/conda init
+source ~/.bashrc
+
+# Install Feast Python SDK and test requirements
+cd $ORIGINAL_DIR
+make compile-protos-python
+pip install -qe sdk/python
+pip install -qr tests/e2e/requirements.txt
+
+echo "
+============================================================
+Running end-to-end tests with pytest at 'tests/e2e'
+============================================================
+"
+# Default artifact location setting in Prow jobs
+LOGS_ARTIFACT_PATH=/logs/artifacts
+
+cd $ORIGINAL_DIR/tests/e2e
+
+core_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-core)
+serving_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-batch-serving)
+jobcontroller_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-jobcontroller)
+
+set +e
+pytest -s -v bq/bq-batch-retrieval.py -m dataflow_runner --core_url "$core_ip:6565" --serving_url "$serving_ip:6566" \
+ --jobcontroller_url "$jobcontroller_ip:6570" --gcs_path "gs://${TEMP_BUCKET}" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+TEST_EXIT_CODE=$?
+
+if [[ ${TEST_EXIT_CODE} != 0 ]]; then
+  echo "[DEBUG] Printing logs"
+  ls -ltrh /var/log/feast*
+  cat /var/log/feast-serving-warehouse.log /var/log/feast-core.log
+
+  echo "[DEBUG] Printing Python packages list"
+  pip list
+else
+  clean "$@"
+fi
+
+exit ${TEST_EXIT_CODE}
diff --git a/infra/scripts/test-end-to-end-batch.sh b/infra/scripts/test-end-to-end-batch.sh
new file mode 100755
index 0000000000..c741fe7168
--- /dev/null
+++ b/infra/scripts/test-end-to-end-batch.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+
+set -e
+set -o pipefail
+
+PYTEST_MARK='direct_runner' #default
+
+print_usage() {
+  printf "Usage: ./test-end-to-end-batch -m pytest_mark"
+}
+
+while getopts 'm:' flag; do
+  case "${flag}" in
+    m) PYTEST_MARK="${OPTARG}" ;;
+    *) print_usage
+       exit 1 ;;
+  esac
+done
+
+test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account/service-account.json"
+test -z ${SKIP_BUILD_JARS} && SKIP_BUILD_JARS="false"
+test -z ${GOOGLE_CLOUD_PROJECT} && GOOGLE_CLOUD_PROJECT="kf-feast"
+test -z ${TEMP_BUCKET} && TEMP_BUCKET="feast-templocation-kf-feast"
+test -z ${JOBS_STAGING_LOCATION} && JOBS_STAGING_LOCATION="gs://${TEMP_BUCKET}/staging-location/$(date +%s)"
+
+# Get the current build version using maven (and pom.xml)
+export FEAST_BUILD_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
+echo Building version: $FEAST_BUILD_VERSION
+
+# Get Feast project repository root and scripts directory
+export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
+export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
+
+echo "
+This script will run end-to-end tests for Feast Core and Batch Serving.
+
+1. Install gcloud SDK
+2. Install Redis as the job store for Feast Batch Serving.
+4. Install Postgres for persisting Feast metadata.
+5. Install Kafka and Zookeeper as the Source in Feast.
+6. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from
+   tests/e2e via pytest.
+"
+
+source ${SCRIPTS_DIR}/setup-common-functions.sh
+
+install_test_tools
+install_gcloud_sdk
+install_and_start_local_redis
+install_and_start_local_postgres
+install_and_start_local_zookeeper_and_kafka
+
+if [[ ${SKIP_BUILD_JARS} != "true" ]]; then
+  build_feast_core_and_serving
+else
+  echo "[DEBUG] Skipping building jars"
+fi
+
+DATASET_NAME=feast_$(date +%s)
+bq --location=US --project_id=${GOOGLE_CLOUD_PROJECT} mk \
+  --dataset \
+  --default_table_expiration 86400 \
+  ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME}
+
+# Start Feast Core in background
+cat <<EOF > /tmp/jc.warehouse.application.yml
+feast:
+  core-host: localhost
+  core-port: 6565
+  jobs:
+    polling_interval_milliseconds: 10000
+    active_runner: direct
+    consolidate-jobs-per-source: true
+    runners:
+      - name: direct
+        type: DirectRunner
+        options:
+          tempLocation: gs://${TEMP_BUCKET}/tempLocation
+
+EOF
+
+cat <<EOF > /tmp/serving.warehouse.application.yml
+feast:
+  # GRPC service address for Feast Core
+  # Feast Serving requires connection to Feast Core to retrieve and reload Feast metadata (e.g. FeatureSpecs, Store information)
+  core-host: localhost
+  core-grpc-port: 6565
+
+  # Indicates the active store. Only a single store in the last can be active at one time. In the future this key
+  # will be deprecated in order to allow multiple stores to be served from a single serving instance
+  active_store: historical
+
+  # List of store configurations
+  stores:
+    - name: historical
+      type: BIGQUERY
+      config:
+        project_id: ${GOOGLE_CLOUD_PROJECT}
+        dataset_id: ${DATASET_NAME}
+        staging_location: ${JOBS_STAGING_LOCATION}
+        initial_retry_delay_seconds: 1
+        total_timeout_seconds: 21600
+        write_triggering_frequency_seconds: 1
+      subscriptions:
+        - name: "*"
+          project: "*"
+          version: "*"
+
+  job_store:
+    redis_host: localhost
+    redis_port: 6379
+
+  tracing:
+    enabled: false
+
+server:
+  port: 8081
+
+EOF
+
+cat /tmp/jc.warehouse.application.yml /tmp/serving.warehouse.application.yml
+
+start_feast_core
+start_feast_jobcontroller /tmp/jc.warehouse.application.yml
+start_feast_serving /tmp/serving.warehouse.application.yml
+
+install_python_with_miniconda_and_feast_sdk
+
+print_banner "Running end-to-end tests with pytest at 'tests/e2e'"
+# Default artifact location setting in Prow jobs
+LOGS_ARTIFACT_PATH=/logs/artifacts
+
+ORIGINAL_DIR=$(pwd)
+cd tests/e2e
+
+set +e
+pytest bq/* -v -m ${PYTEST_MARK} --gcs_path ${JOBS_STAGING_LOCATION} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+TEST_EXIT_CODE=$?
+
+if [[ ${TEST_EXIT_CODE} != 0 ]]; then
+  echo "[DEBUG] Printing logs"
+  ls -ltrh /var/log/feast*
+  cat /var/log/feast-serving-online.log /var/log/feast-core.log /var/log/feast-jobcontroller.log
+
+  echo "[DEBUG] Printing Python packages list"
+  pip list
+else
+  print_banner "Cleaning up"
+
+  bq rm -r -f ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME}
+fi
+
+exit ${TEST_EXIT_CODE}
diff --git a/tests/e2e/bq/bq-batch-retrieval.py b/tests/e2e/bq/bq-batch-retrieval.py
new file mode 100644
index 0000000000..2d94d2e6cf
--- /dev/null
+++ b/tests/e2e/bq/bq-batch-retrieval.py
@@ -0,0 +1,819 @@
+import math
+import os
+import random
+import time
+import uuid
+from datetime import datetime, timedelta
+from urllib.parse import urlparse
+
+import numpy as np
+import pandas as pd
+import pytest
+import pytz
+import tensorflow_data_validation as tfdv
+from google.cloud import bigquery, storage
+from google.cloud.storage import Blob
+from google.protobuf.duration_pb2 import Duration
+from pandavro import to_avro
+
+from bq.testutils import assert_stats_equal, clear_unsupported_fields
+from feast.client import Client
+from feast.contrib.job_controller.client import Client as JCClient
+from feast.core.CoreService_pb2 import ListStoresRequest
+from feast.core.FeatureSet_pb2 import FeatureSetStatus
+from feast.core.IngestionJob_pb2 import IngestionJobStatus
+from feast.entity import Entity
+from feast.feature import Feature
+from feast.feature_set import FeatureSet
+from feast.type_map import ValueType
+from feast.wait import wait_retry_backoff
+
+pd.set_option("display.max_columns", None)
+
+PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6]
+
+
+@pytest.fixture(scope="module")
+def core_url(pytestconfig):
+    return pytestconfig.getoption("core_url")
+
+
+@pytest.fixture(scope="module")
+def serving_url(pytestconfig):
+    return pytestconfig.getoption("serving_url")
+
+
+@pytest.fixture(scope="module")
+def jobcontroller_url(pytestconfig):
+    return pytestconfig.getoption("jobcontroller_url")
+
+
+@pytest.fixture(scope="module")
+def allow_dirty(pytestconfig):
+    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
+
+
+@pytest.fixture(scope="module")
+def gcs_path(pytestconfig):
+    return pytestconfig.getoption("gcs_path")
+
+
+@pytest.fixture(scope="module")
+def client(core_url, serving_url, allow_dirty):
+    # Get client for core and serving
+    client = Client(core_url=core_url, serving_url=serving_url)
+    client.create_project(PROJECT_NAME)
+    client.set_project(PROJECT_NAME)
+
+    # Ensure Feast core is active, but empty
+    if not allow_dirty:
+        feature_sets = client.list_feature_sets()
+        if len(feature_sets) > 0:
+            raise Exception(
+                "Feast cannot have existing feature sets registered. Exiting tests."
+            )
+
+    return client
+
+
+def wait_for(fn, timeout: timedelta, sleep=5):
+    until = datetime.now() + timeout
+    last_exc = BaseException()
+
+    while datetime.now() <= until:
+        try:
+            fn()
+        except Exception as exc:
+            last_exc = exc
+        else:
+            return
+        time.sleep(sleep)
+
+    raise last_exc
+
+
+@pytest.mark.first
+@pytest.mark.direct_runner
+@pytest.mark.dataflow_runner
+@pytest.mark.run(order=1)
+def test_batch_apply_all_featuresets(client):
+    client.set_project(PROJECT_NAME)
+
+    file_fs1 = FeatureSet(
+        "file_feature_set",
+        features=[Feature("feature_value1", ValueType.STRING)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(file_fs1)
+
+    gcs_fs1 = FeatureSet(
+        "gcs_feature_set",
+        features=[Feature("feature_value2", ValueType.STRING)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(gcs_fs1)
+
+    proc_time_fs = FeatureSet(
+        "processing_time",
+        features=[Feature("feature_value3", ValueType.STRING)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(proc_time_fs)
+
+    add_cols_fs = FeatureSet(
+        "additional_columns",
+        features=[Feature("feature_value4", ValueType.STRING)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(add_cols_fs)
+
+    historical_fs = FeatureSet(
+        "historical",
+        features=[Feature("feature_value5", ValueType.STRING)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(historical_fs)
+
+    fs1 = FeatureSet(
+        "feature_set_1",
+        features=[Feature("feature_value6", ValueType.STRING)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+
+    fs2 = FeatureSet(
+        "feature_set_2",
+        features=[Feature("other_feature_value7", ValueType.INT64)],
+        entities=[Entity("other_entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(fs1)
+    client.apply(fs2)
+
+    no_max_age_fs = FeatureSet(
+        "no_max_age",
+        features=[Feature("feature_value8", ValueType.INT64)],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=0),
+    )
+    client.apply(no_max_age_fs)
+
+
+@pytest.mark.direct_runner
+@pytest.mark.dataflow_runner
+@pytest.mark.run(order=10)
+def test_batch_get_historical_features_with_file(client):
+    file_fs1 = client.get_feature_set(name="file_feature_set")
+
+    N_ROWS = 10
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    features_1_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value1": [f"{i}" for i in range(N_ROWS)],
+        }
+    )
+
+    # feature set may be ready (direct runner set ready  right after job submitted),
+    # but kafka consumer is not configured
+    # give some time to warm up ingestion job
+    wait_retry_backoff(
+        retry_fn=(
+            lambda: (
+                None,
+                client.get_feature_set(name="file_feature_set").status
+                == FeatureSetStatus.STATUS_READY,
+            )
+        ),
+        timeout_secs=480,
+        timeout_msg="Wait for FeatureSet to be READY",
+    )
+    time.sleep(20)
+
+    client.ingest(file_fs1, features_1_df, timeout=480)
+
+    # Rename column (datetime -> event_timestamp)
+    features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"})
+
+    to_avro(
+        df=features_1_df[["event_timestamp", "entity_id"]],
+        file_path_or_buffer="file_feature_set.avro",
+    )
+
+    time.sleep(10)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows="file://file_feature_set.avro",
+            feature_refs=["feature_value1"],
+            project=PROJECT_NAME,
+        )
+
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+        print(output.head())
+
+        assert output["entity_id"].to_list() == [
+            int(i) for i in output["feature_value1"].to_list()
+        ]
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=10))
+
+
+@pytest.mark.direct_runner
+@pytest.mark.dataflow_runner
+@pytest.mark.run(order=11)
+def test_batch_get_historical_features_with_gs_path(client, gcs_path):
+    gcs_fs1 = client.get_feature_set(name="gcs_feature_set")
+
+    N_ROWS = 10
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    features_1_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value2": [f"{i}" for i in range(N_ROWS)],
+        }
+    )
+    client.ingest(gcs_fs1, features_1_df, timeout=360)
+
+    # Rename column (datetime -> event_timestamp)
+    features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"})
+
+    # Output file to local
+    file_name = "gcs_feature_set.avro"
+    to_avro(
+        df=features_1_df[["event_timestamp", "entity_id"]],
+        file_path_or_buffer=file_name,
+    )
+
+    uri = urlparse(gcs_path)
+    bucket = uri.hostname
+    ts = int(time.time())
+    remote_path = str(uri.path).strip("/") + f"/{ts}/{file_name}"
+
+    # Upload file to gcs
+    storage_client = storage.Client(project=None)
+    bucket = storage_client.get_bucket(bucket)
+    blob = bucket.blob(remote_path)
+    blob.upload_from_filename(file_name)
+
+    time.sleep(10)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=f"{gcs_path}/{ts}/*",
+            feature_refs=["feature_value2"],
+            project=PROJECT_NAME,
+        )
+
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+        print(output.head())
+        assert output["entity_id"].to_list() == [
+            int(i) for i in output["feature_value2"].to_list()
+        ]
+
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+        blob.delete()
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.direct_runner
+@pytest.mark.run(order=12)
+def test_batch_order_by_creation_time(client):
+    proc_time_fs = client.get_feature_set(name="processing_time")
+
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    N_ROWS = 10
+    incorrect_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value3": ["WRONG"] * N_ROWS,
+        }
+    )
+    correct_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value3": ["CORRECT"] * N_ROWS,
+        }
+    )
+    client.ingest(proc_time_fs, incorrect_df)
+    time.sleep(15)
+    client.ingest(proc_time_fs, correct_df)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=incorrect_df[["datetime", "entity_id"]],
+            feature_refs=["feature_value3"],
+            project=PROJECT_NAME,
+        )
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+        print(output.head())
+
+        assert output["feature_value3"].to_list() == ["CORRECT"] * N_ROWS
+
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.direct_runner
+@pytest.mark.run(order=13)
+def test_batch_additional_columns_in_entity_table(client):
+    add_cols_fs = client.get_feature_set(name="additional_columns")
+
+    N_ROWS = 10
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    features_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value4": ["abc"] * N_ROWS,
+        }
+    )
+    client.ingest(add_cols_fs, features_df)
+
+    entity_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "additional_string_col": ["hello im extra"] * N_ROWS,
+            "additional_float_col": [random.random() for i in range(N_ROWS)],
+        }
+    )
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=entity_df,
+            feature_refs=["feature_value4"],
+            project=PROJECT_NAME,
+        )
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
+            by=["entity_id"]
+        )
+        print(output.head(10))
+
+        assert np.allclose(
+            output["additional_float_col"], entity_df["additional_float_col"]
+        )
+        assert (
+            output["additional_string_col"].to_list()
+            == entity_df["additional_string_col"].to_list()
+        )
+        assert (
+            output["feature_value4"].to_list()
+            == features_df["feature_value4"].to_list()
+        )
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.direct_runner
+@pytest.mark.run(order=14)
+def test_batch_point_in_time_correctness_join(client):
+    historical_fs = client.get_feature_set(name="historical")
+
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    N_EXAMPLES = 10
+    historical_df = pd.DataFrame(
+        {
+            "datetime": [
+                time_offset - timedelta(seconds=50),
+                time_offset - timedelta(seconds=30),
+                time_offset - timedelta(seconds=10),
+            ]
+            * N_EXAMPLES,
+            "entity_id": [i for i in range(N_EXAMPLES) for _ in range(3)],
+            "feature_value5": ["WRONG", "WRONG", "CORRECT"] * N_EXAMPLES,
+        }
+    )
+    entity_df = pd.DataFrame(
+        {
+            "datetime": [time_offset - timedelta(seconds=10)] * N_EXAMPLES,
+            "entity_id": [i for i in range(N_EXAMPLES)],
+        }
+    )
+
+    client.ingest(historical_fs, historical_df)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=entity_df,
+            feature_refs=["feature_value5"],
+            project=PROJECT_NAME,
+        )
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+        print(output.head())
+
+        assert output["feature_value5"].to_list() == ["CORRECT"] * N_EXAMPLES
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.direct_runner
+@pytest.mark.run(order=15)
+def test_batch_multiple_featureset_joins(client):
+    fs1 = client.get_feature_set(name="feature_set_1")
+    fs2 = client.get_feature_set(name="feature_set_2")
+
+    N_ROWS = 10
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    features_1_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value6": [f"{i}" for i in range(N_ROWS)],
+        }
+    )
+    client.ingest(fs1, features_1_df)
+
+    features_2_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "other_entity_id": [i for i in range(N_ROWS)],
+            "other_feature_value7": [i for i in range(N_ROWS)],
+        }
+    )
+    client.ingest(fs2, features_2_df)
+
+    entity_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)],
+        }
+    )
+
+    # Test retrieve with different variations of the string feature refs
+    # ie feature set inference for feature refs without specified feature set
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=entity_df,
+            feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
+            project=PROJECT_NAME,
+        )
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+        print(output.head())
+
+        assert output["entity_id"].to_list() == [
+            int(i) for i in output["feature_value6"].to_list()
+        ]
+        assert (
+            output["other_entity_id"].to_list()
+            == output["feature_set_2__other_feature_value7"].to_list()
+        )
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.direct_runner
+@pytest.mark.run(order=16)
+def test_batch_no_max_age(client):
+    no_max_age_fs = client.get_feature_set(name="no_max_age")
+
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    N_ROWS = 10
+    features_8_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "entity_id": [i for i in range(N_ROWS)],
+            "feature_value8": [i for i in range(N_ROWS)],
+        }
+    )
+    client.ingest(no_max_age_fs, features_8_df)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=features_8_df[["datetime", "entity_id"]],
+            feature_refs=["feature_value8"],
+            project=PROJECT_NAME,
+        )
+
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+        print(output.head())
+
+        assert output["entity_id"].to_list() == output["feature_value8"].to_list()
+
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.fixture(scope="module", autouse=True)
+def infra_teardown(pytestconfig, jobcontroller_url):
+    client = JCClient(jobcontroller_url=jobcontroller_url)
+
+    marker = pytestconfig.getoption("-m")
+    yield marker
+    if marker == "dataflow_runner":
+        ingest_jobs = client.list_ingest_jobs()
+        ingest_jobs = [
+            client.list_ingest_jobs(job.id)[0].external_id
+            for job in ingest_jobs
+            if job.status == IngestionJobStatus.RUNNING
+        ]
+
+        cwd = os.getcwd()
+        with open(f"{cwd}/ingesting_jobs.txt", "w+") as output:
+            for job in ingest_jobs:
+                output.write("%s\n" % job)
+    else:
+        print("Cleaning up not required")
+
+
+"""
+This suite of tests tests the apply feature set - update feature set - retrieve
+event sequence. It ensures that when a feature set is updated, tombstoned features
+are no longer retrieved, and added features are null for previously ingested
+rows.
+
+It is marked separately because of the length of time required
+to perform this test, due to bigquery schema caching for streaming writes.
+"""
+
+
+@pytest.fixture(scope="module")
+def update_featureset_dataframe():
+    n_rows = 10
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    return pd.DataFrame(
+        {
+            "datetime": [time_offset] * n_rows,
+            "entity_id": [i for i in range(n_rows)],
+            "update_feature1": ["a" for i in range(n_rows)],
+            "update_feature2": [i + 2 for i in range(n_rows)],
+            "update_feature3": [i for i in range(n_rows)],
+            "update_feature4": ["b" for i in range(n_rows)],
+        }
+    )
+
+
+@pytest.mark.fs_update
+@pytest.mark.run(order=20)
+def test_update_featureset_apply_featureset_and_ingest_first_subset(
+    client, update_featureset_dataframe
+):
+    subset_columns = ["datetime", "entity_id", "update_feature1", "update_feature2"]
+    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
+    update_fs = FeatureSet(
+        "update_fs",
+        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
+        max_age=Duration(seconds=432000),
+    )
+    update_fs.infer_fields_from_df(subset_df)
+    client.apply(update_fs)
+
+    client.ingest(feature_set=update_fs, source=subset_df)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5],
+            feature_refs=["update_feature1", "update_feature2"],
+            project=PROJECT_NAME,
+        )
+
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
+            by=["entity_id"]
+        )
+        print(output.head())
+
+        assert (
+            output["update_feature1"].to_list()
+            == subset_df["update_feature1"].to_list()
+        )
+        assert (
+            output["update_feature2"].to_list()
+            == subset_df["update_feature2"].to_list()
+        )
+
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.fs_update
+@pytest.mark.timeout(600)
+@pytest.mark.run(order=21)
+def test_update_featureset_update_featureset_and_ingest_second_subset(
+    client, update_featureset_dataframe
+):
+    subset_columns = [
+        "datetime",
+        "entity_id",
+        "update_feature1",
+        "update_feature3",
+        "update_feature4",
+    ]
+    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
+    update_fs = FeatureSet(
+        "update_fs",
+        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
+        max_age=Duration(seconds=432000),
+    )
+    update_fs.infer_fields_from_df(subset_df)
+    client.apply(update_fs)
+
+    # We keep retrying this ingestion until all values make it into the buffer.
+    # This is a necessary step because bigquery streaming caches table schemas
+    # and as a result, rows may be lost.
+    while True:
+        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
+        time.sleep(15)  # wait for rows to get written to bq
+        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
+        if rows_ingested == len(subset_df):
+            print(f"Number of rows successfully ingested: {rows_ingested}. Continuing.")
+            break
+        print(
+            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
+        )
+        time.sleep(30)
+
+    def check():
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:],
+            feature_refs=["update_feature1", "update_feature3", "update_feature4"],
+            project=PROJECT_NAME,
+        )
+
+        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
+            by=["entity_id"]
+        )
+        print(output.head())
+
+        assert (
+            output["update_feature1"].to_list()
+            == subset_df["update_feature1"].to_list()
+        )
+        assert (
+            output["update_feature3"].to_list()
+            == subset_df["update_feature3"].to_list()
+        )
+        assert (
+            output["update_feature4"].to_list()
+            == subset_df["update_feature4"].to_list()
+        )
+        clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+    wait_for(check, timedelta(minutes=5))
+
+
+@pytest.mark.fs_update
+@pytest.mark.run(order=22)
+def test_update_featureset_retrieve_all_fields(client, update_featureset_dataframe):
+    with pytest.raises(Exception):
+        feature_retrieval_job = client.get_historical_features(
+            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
+            feature_refs=[
+                "update_feature1",
+                "update_feature2",
+                "update_feature3",
+                "update_feature4",
+            ],
+            project=PROJECT_NAME,
+        )
+        feature_retrieval_job.result()
+
+
+@pytest.mark.fs_update
+@pytest.mark.run(order=23)
+def test_update_featureset_retrieve_valid_fields(client, update_featureset_dataframe):
+    feature_retrieval_job = client.get_historical_features(
+        entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
+        feature_refs=["update_feature1", "update_feature3", "update_feature4"],
+        project=PROJECT_NAME,
+    )
+    output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
+        by=["entity_id"]
+    )
+    clean_up_remote_files(feature_retrieval_job.get_avro_files())
+    print(output.head(10))
+    assert (
+        output["update_feature1"].to_list()
+        == update_featureset_dataframe["update_feature1"].to_list()
+    )
+    # we have to convert to float because the column contains np.NaN
+    assert [math.isnan(i) for i in output["update_feature3"].to_list()[:5]] == [
+        True
+    ] * 5
+    assert output["update_feature3"].to_list()[5:] == [
+        float(i) for i in update_featureset_dataframe["update_feature3"].to_list()[5:]
+    ]
+    assert (
+        output["update_feature4"].to_list()
+        == [None] * 5 + update_featureset_dataframe["update_feature4"].to_list()[5:]
+    )
+
+
+@pytest.mark.direct_runner
+@pytest.mark.run(order=31)
+@pytest.mark.timeout(600)
+def test_batch_dataset_statistics(client):
+    fs1 = client.get_feature_set(name="feature_set_1")
+    fs2 = client.get_feature_set(name="feature_set_2")
+    id_offset = 20
+
+    n_rows = 21
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    features_1_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * n_rows,
+            "entity_id": [id_offset + i for i in range(n_rows)],
+            "feature_value6": ["a" for i in range(n_rows)],
+        }
+    )
+    ingestion_id1 = client.ingest(fs1, features_1_df)
+
+    features_2_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * n_rows,
+            "other_entity_id": [id_offset + i for i in range(n_rows)],
+            "other_feature_value7": [int(i) % 10 for i in range(0, n_rows)],
+        }
+    )
+    ingestion_id2 = client.ingest(fs2, features_2_df)
+
+    entity_df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * n_rows,
+            "entity_id": [id_offset + i for i in range(n_rows)],
+            "other_entity_id": [id_offset + i for i in range(n_rows)],
+        }
+    )
+
+    time.sleep(15)  # wait for rows to get written to bq
+    while True:
+        rows_ingested1 = get_rows_ingested(client, fs1, ingestion_id1)
+        rows_ingested2 = get_rows_ingested(client, fs2, ingestion_id2)
+        if rows_ingested1 == len(features_1_df) and rows_ingested2 == len(
+            features_2_df
+        ):
+            print(
+                f"Number of rows successfully ingested: {rows_ingested1}, {rows_ingested2}. Continuing."
+            )
+            break
+        time.sleep(30)
+
+    feature_retrieval_job = client.get_historical_features(
+        entity_rows=entity_df,
+        feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
+        project=PROJECT_NAME,
+        compute_statistics=True,
+    )
+    output = feature_retrieval_job.to_dataframe(timeout_sec=180)
+    print(output.head(10))
+    stats = feature_retrieval_job.statistics(timeout_sec=180)
+    clear_unsupported_fields(stats)
+
+    expected_stats = tfdv.generate_statistics_from_dataframe(
+        output[["feature_value6", "feature_set_2__other_feature_value7"]]
+    )
+    clear_unsupported_fields(expected_stats)
+
+    # Since TFDV computes population std dev
+    for feature in expected_stats.datasets[0].features:
+        if feature.HasField("num_stats"):
+            name = feature.path.step[0]
+            std = output[name].std()
+            feature.num_stats.std_dev = std
+
+    assert_stats_equal(expected_stats, stats)
+    clean_up_remote_files(feature_retrieval_job.get_avro_files())
+
+
+def get_rows_ingested(
+    client: Client, feature_set: FeatureSet, ingestion_id: str
+) -> int:
+    response = client._core_service.ListStores(
+        ListStoresRequest(filter=ListStoresRequest.Filter(name="historical"))
+    )
+    bq_config = response.store[0].bigquery_config
+    project = bq_config.project_id
+    dataset = bq_config.dataset_id
+    table = f"{PROJECT_NAME}_{feature_set.name}"
+
+    bq_client = bigquery.Client(project=project)
+    rows = bq_client.query(
+        f'SELECT COUNT(*) as count FROM `{project}.{dataset}.{table}` WHERE ingestion_id = "{ingestion_id}"'
+    ).result()
+
+    return list(rows)[0]["count"]
+
+
+def clean_up_remote_files(files):
+    storage_client = storage.Client()
+    for file_uri in files:
+        if file_uri.scheme == "gs":
+            blob = Blob.from_string(file_uri.geturl(), client=storage_client)
+            blob.delete()
diff --git a/tests/e2e/bq/feature-stats.py b/tests/e2e/bq/feature-stats.py
new file mode 100644
index 0000000000..226dc358f1
--- /dev/null
+++ b/tests/e2e/bq/feature-stats.py
@@ -0,0 +1,256 @@
+import os
+import time
+import uuid
+from datetime import datetime, timedelta
+
+import pandas as pd
+import pytest
+import pytz
+import tensorflow_data_validation as tfdv
+from google.protobuf.duration_pb2 import Duration
+
+from bq.testutils import (
+    assert_stats_equal,
+    clear_unsupported_agg_fields,
+    clear_unsupported_fields,
+)
+from feast.client import Client
+from feast.entity import Entity
+from feast.feature import Feature
+from feast.feature_set import FeatureSet
+from feast.type_map import ValueType
+
+pd.set_option("display.max_columns", None)
+
+PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6]
+STORE_NAME = "historical"
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+
+@pytest.fixture(scope="module")
+def core_url(pytestconfig):
+    return pytestconfig.getoption("core_url")
+
+
+@pytest.fixture(scope="module")
+def serving_url(pytestconfig):
+    return pytestconfig.getoption("serving_url")
+
+
+@pytest.fixture(scope="module")
+def allow_dirty(pytestconfig):
+    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
+
+
+@pytest.fixture(scope="module")
+def gcs_path(pytestconfig):
+    return pytestconfig.getoption("gcs_path")
+
+
+@pytest.fixture(scope="module")
+def client(core_url, allow_dirty):
+    # Get client for core and serving
+    client = Client(core_url=core_url)
+    client.create_project(PROJECT_NAME)
+    client.set_project(PROJECT_NAME)
+
+    # Ensure Feast core is active, but empty
+    if not allow_dirty:
+        feature_sets = client.list_feature_sets()
+        if len(feature_sets) > 0:
+            raise Exception(
+                "Feast cannot have existing feature sets registered. Exiting tests."
+            )
+
+    return client
+
+
+@pytest.fixture(scope="module")
+def feature_stats_feature_set(client):
+    fv_fs = FeatureSet(
+        "feature_stats",
+        features=[
+            Feature("strings", ValueType.STRING),
+            Feature("ints", ValueType.INT64),
+            Feature("floats", ValueType.FLOAT),
+        ],
+        entities=[Entity("entity_id", ValueType.INT64)],
+        max_age=Duration(seconds=100),
+    )
+    client.apply(fv_fs)
+    return fv_fs
+
+
+@pytest.fixture(scope="module")
+def feature_stats_dataset_basic(client, feature_stats_feature_set):
+
+    n_rows = 20
+
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    df = pd.DataFrame(
+        {
+            "datetime": [time_offset] * n_rows,
+            "entity_id": [i for i in range(n_rows)],
+            "strings": ["a", "b"] * int(n_rows / 2),
+            "ints": [int(i) for i in range(n_rows)],
+            "floats": [10.5 - i for i in range(n_rows)],
+        }
+    )
+
+    expected_stats = tfdv.generate_statistics_from_dataframe(
+        df[["strings", "ints", "floats"]]
+    )
+    clear_unsupported_fields(expected_stats)
+
+    # Since TFDV computes population std dev
+    for feature in expected_stats.datasets[0].features:
+        if feature.HasField("num_stats"):
+            name = feature.path.step[0]
+            std = df[name].std()
+            feature.num_stats.std_dev = std
+
+    ingestion_id = client.ingest(feature_stats_feature_set, df)
+    time.sleep(10)
+    return {
+        "df": df,
+        "id": ingestion_id,
+        "date": datetime(time_offset.year, time_offset.month, time_offset.day).replace(
+            tzinfo=pytz.utc
+        ),
+        "stats": expected_stats,
+    }
+
+
+@pytest.fixture(scope="module")
+def feature_stats_dataset_agg(client, feature_stats_feature_set):
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    start_date = time_offset - timedelta(days=10)
+    end_date = time_offset - timedelta(days=7)
+    df1 = pd.DataFrame(
+        {
+            "datetime": [start_date] * 5,
+            "entity_id": [i for i in range(5)],
+            "strings": ["a", "b", "b", "b", "a"],
+            "ints": [4, 3, 2, 6, 3],
+            "floats": [2.1, 5.2, 4.3, 0.6, 0.1],
+        }
+    )
+    ingestion_id_1 = client.ingest(feature_stats_feature_set, df1)
+    df2 = pd.DataFrame(
+        {
+            "datetime": [start_date + timedelta(days=1)] * 3,
+            "entity_id": [i for i in range(3)],
+            "strings": ["a", "b", "c"],
+            "ints": [2, 6, 7],
+            "floats": [1.6, 2.4, 2],
+        }
+    )
+    ingestion_id_2 = client.ingest(feature_stats_feature_set, df2)
+
+    combined_df = pd.concat([df1, df2])[["strings", "ints", "floats"]]
+    expected_stats = tfdv.generate_statistics_from_dataframe(combined_df)
+    clear_unsupported_agg_fields(expected_stats)
+
+    # Since TFDV computes population std dev
+    for feature in expected_stats.datasets[0].features:
+        if feature.HasField("num_stats"):
+            name = feature.path.step[0]
+            std = combined_df[name].std()
+            feature.num_stats.std_dev = std
+
+    time.sleep(10)
+
+    return {
+        "ids": [ingestion_id_1, ingestion_id_2],
+        "start_date": datetime(
+            start_date.year, start_date.month, start_date.day
+        ).replace(tzinfo=pytz.utc),
+        "end_date": datetime(end_date.year, end_date.month, end_date.day).replace(
+            tzinfo=pytz.utc
+        ),
+        "stats": expected_stats,
+    }
+
+
+def test_feature_stats_retrieval_by_single_dataset(client, feature_stats_dataset_basic):
+    stats = client.get_statistics(
+        "feature_stats",
+        features=["strings", "ints", "floats"],
+        store=STORE_NAME,
+        ingestion_ids=[feature_stats_dataset_basic["id"]],
+    )
+
+    assert_stats_equal(feature_stats_dataset_basic["stats"], stats)
+
+
+def test_feature_stats_by_date(client, feature_stats_dataset_basic):
+    stats = client.get_statistics(
+        "feature_stats",
+        features=["strings", "ints", "floats"],
+        store=STORE_NAME,
+        start_date=feature_stats_dataset_basic["date"],
+        end_date=feature_stats_dataset_basic["date"] + timedelta(days=1),
+    )
+    assert_stats_equal(feature_stats_dataset_basic["stats"], stats)
+
+
+def test_feature_stats_agg_over_datasets(client, feature_stats_dataset_agg):
+    stats = client.get_statistics(
+        "feature_stats",
+        features=["strings", "ints", "floats"],
+        store=STORE_NAME,
+        ingestion_ids=feature_stats_dataset_agg["ids"],
+    )
+    assert_stats_equal(feature_stats_dataset_agg["stats"], stats)
+
+
+def test_feature_stats_agg_over_dates(client, feature_stats_dataset_agg):
+    stats = client.get_statistics(
+        "feature_stats",
+        features=["strings", "ints", "floats"],
+        store=STORE_NAME,
+        start_date=feature_stats_dataset_agg["start_date"],
+        end_date=feature_stats_dataset_agg["end_date"],
+    )
+    assert_stats_equal(feature_stats_dataset_agg["stats"], stats)
+
+
+def test_feature_stats_force_refresh(
+    client, feature_stats_dataset_basic, feature_stats_feature_set
+):
+    df = feature_stats_dataset_basic["df"]
+
+    df2 = pd.DataFrame(
+        {
+            "datetime": [df.iloc[0].datetime],
+            "entity_id": [10],
+            "strings": ["c"],
+            "ints": [2],
+            "floats": [1.3],
+        }
+    )
+    client.ingest(feature_stats_feature_set, df2)
+    time.sleep(10)
+
+    actual_stats = client.get_statistics(
+        "feature_stats",
+        features=["strings", "ints", "floats"],
+        store="historical",
+        start_date=feature_stats_dataset_basic["date"],
+        end_date=feature_stats_dataset_basic["date"] + timedelta(days=1),
+        force_refresh=True,
+    )
+
+    combined_df = pd.concat([df, df2])
+    expected_stats = tfdv.generate_statistics_from_dataframe(combined_df)
+
+    clear_unsupported_fields(expected_stats)
+
+    # Since TFDV computes population std dev
+    for feature in expected_stats.datasets[0].features:
+        if feature.HasField("num_stats"):
+            name = feature.path.step[0]
+            std = combined_df[name].std()
+            feature.num_stats.std_dev = std
+
+    assert_stats_equal(expected_stats, actual_stats)
diff --git a/tests/e2e/bq/testutils.py b/tests/e2e/bq/testutils.py
new file mode 100644
index 0000000000..9ac678bc59
--- /dev/null
+++ b/tests/e2e/bq/testutils.py
@@ -0,0 +1,55 @@
+from deepdiff import DeepDiff
+from google.protobuf.json_format import MessageToDict
+
+
+def clear_unsupported_fields(datasets):
+    dataset = datasets.datasets[0]
+    for feature in dataset.features:
+        if feature.HasField("num_stats"):
+            feature.num_stats.common_stats.ClearField("num_values_histogram")
+            # Since difference in how BQ and TFDV compute histogram values make them
+            # approximate but uncomparable
+            feature.num_stats.ClearField("histograms")
+        elif feature.HasField("string_stats"):
+            feature.string_stats.common_stats.ClearField("num_values_histogram")
+            for bucket in feature.string_stats.rank_histogram.buckets:
+                bucket.ClearField("low_rank")
+                bucket.ClearField("high_rank")
+        elif feature.HasField("struct_stats"):
+            feature.string_stats.struct_stats.ClearField("num_values_histogram")
+        elif feature.HasField("bytes_stats"):
+            feature.string_stats.bytes_stats.ClearField("num_values_histogram")
+
+
+def clear_unsupported_agg_fields(datasets):
+    dataset = datasets.datasets[0]
+    for feature in dataset.features:
+        if feature.HasField("num_stats"):
+            feature.num_stats.common_stats.ClearField("num_values_histogram")
+            feature.num_stats.ClearField("histograms")
+            feature.num_stats.ClearField("median")
+        elif feature.HasField("string_stats"):
+            feature.string_stats.common_stats.ClearField("num_values_histogram")
+            feature.string_stats.ClearField("rank_histogram")
+            feature.string_stats.ClearField("top_values")
+            feature.string_stats.ClearField("unique")
+        elif feature.HasField("struct_stats"):
+            feature.struct_stats.ClearField("num_values_histogram")
+        elif feature.HasField("bytes_stats"):
+            feature.bytes_stats.ClearField("num_values_histogram")
+            feature.bytes_stats.ClearField("unique")
+
+
+def assert_stats_equal(left, right):
+    left_stats = MessageToDict(left)["datasets"][0]
+    right_stats = MessageToDict(right)["datasets"][0]
+    assert (
+        left_stats["numExamples"] == right_stats["numExamples"]
+    ), f"Number of examples do not match. Expected {left_stats['numExamples']}, got {right_stats['numExamples']}"
+
+    left_features = sorted(left_stats["features"], key=lambda k: k["path"]["step"][0])
+    right_features = sorted(right_stats["features"], key=lambda k: k["path"]["step"][0])
+    diff = DeepDiff(left_features, right_features, significant_digits=3)
+    assert (
+        len(diff) == 0
+    ), f"Feature statistics do not match: \nwanted: {left_features}\n got: {right_features}"

From 7e9ad2bac86fdc517804fdce3e889bcf62310234 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 14:46:03 +0800
Subject: [PATCH 09/34] Fix pytest redis

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/feature_table.py                |  4 +++-
 tests/e2e/redis/parallel-ingest-redis-serving.py | 16 ++++------------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index ebe69e7fad..eafa4260ef 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -80,7 +80,9 @@ def __eq__(self, other):
         ):
             return False
 
-        if self.entities != other.entities:
+        if sorted(self.entities) != sorted(other.entities):
+            return False
+        if self.features != other.features:
             return False
         if self.batch_source != other.batch_source:
             return False
diff --git a/tests/e2e/redis/parallel-ingest-redis-serving.py b/tests/e2e/redis/parallel-ingest-redis-serving.py
index 7a213dc8af..dd98f66063 100644
--- a/tests/e2e/redis/parallel-ingest-redis-serving.py
+++ b/tests/e2e/redis/parallel-ingest-redis-serving.py
@@ -80,15 +80,11 @@ def test_discovery(self, client):
 
         # GetFeatureTable Check
         actual_get_feature_table = client.get_feature_table(name="dev_featuretable")
-        assert actual_get_feature_table.name == self.basic_ft_spec.name
-        assert actual_get_feature_table.entities == self.basic_ft_spec.entities
-        assert actual_get_feature_table.features == self.basic_ft_spec.features
+        assert actual_get_feature_table == self.basic_ft_spec
 
         # ListFeatureTables Check
         actual_list_feature_table = client.list_feature_tables()[0]
-        assert actual_list_feature_table.name == self.basic_ft_spec.name
-        assert actual_list_feature_table.entities == self.basic_ft_spec.entities
-        assert actual_list_feature_table.features == self.basic_ft_spec.features
+        assert actual_list_feature_table == self.basic_ft_spec
 
     def test_basic_retrieval(self, client):
         # TODO: Add ingest and retrieval check
@@ -174,15 +170,11 @@ def test_discovery(self, client):
 
         # GetFeatureTable Check
         actual_get_feature_table = client.get_feature_table(name="alltypes")
-        assert actual_get_feature_table.name == self.alltypes_ft_spec.name
-        assert actual_get_feature_table.entities == self.alltypes_ft_spec.entities
-        assert actual_get_feature_table.features == self.alltypes_ft_spec.features
+        assert actual_get_feature_table == self.alltypes_ft_spec
 
         # ListFeatureTables Check
         actual_list_feature_table = client.list_feature_tables()[0]
-        assert actual_list_feature_table.name == self.alltypes_ft_spec.name
-        assert actual_list_feature_table.entities == self.alltypes_ft_spec.entities
-        assert actual_list_feature_table.features == self.alltypes_ft_spec.features
+        assert actual_list_feature_table == self.alltypes_ft_spec
 
     def test_alltypes_retrieval(self, client):
         # TODO: Add ingest and retrieval check

From f7c5e316f41a87dac0d042f3424361c1faf62213 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 15:57:53 +0800
Subject: [PATCH 10/34] Remove unused batch tests

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 .prow/config.yaml                             |  91 --
 Makefile                                      |   2 +-
 .../scripts/test-end-to-end-batch-dataflow.sh | 307 -------
 infra/scripts/test-end-to-end-batch.sh        | 153 ----
 tests/e2e/bq/bq-batch-retrieval.py            | 819 ------------------
 tests/e2e/bq/feature-stats.py                 | 256 ------
 tests/e2e/bq/testutils.py                     |  55 --
 7 files changed, 1 insertion(+), 1682 deletions(-)
 delete mode 100755 infra/scripts/test-end-to-end-batch-dataflow.sh
 delete mode 100755 infra/scripts/test-end-to-end-batch.sh
 delete mode 100644 tests/e2e/bq/bq-batch-retrieval.py
 delete mode 100644 tests/e2e/bq/feature-stats.py
 delete mode 100644 tests/e2e/bq/testutils.py

diff --git a/.prow/config.yaml b/.prow/config.yaml
index d2269fcc6d..39c275603d 100644
--- a/.prow/config.yaml
+++ b/.prow/config.yaml
@@ -254,97 +254,6 @@ presubmits:
     branches:
     - ^v0\.(3|4)-branch$
 
-  - name: test-end-to-end-batch
-    decorate: true
-    always_run: true
-    spec:
-      volumes:
-      - name: service-account
-        secret:
-          secretName: feast-service-account
-      containers:
-      - image: maven:3.6-jdk-11
-        command: ["infra/scripts/test-end-to-end-batch.sh"]
-        resources:
-          requests:
-            cpu: "6"
-            memory: "6144Mi"
-        volumeMounts:
-        - name: service-account
-          mountPath: "/etc/service-account"
-    skip_branches:
-    - ^v0\.(3|4)-branch$
-
-  - name: test-end-to-end-batch-fs-update
-    decorate: true
-    always_run: false
-    spec:
-      volumes:
-        - name: service-account
-          secret:
-            secretName: feast-service-account
-      containers:
-        - image: maven:3.6-jdk-11
-          command: ["infra/scripts/test-end-to-end-batch.sh", "-m", "fs_update"]
-          resources:
-            requests:
-              cpu: "6"
-              memory: "6144Mi"
-          volumeMounts:
-            - name: service-account
-              mountPath: "/etc/service-account"
-    skip_branches:
-      - ^v0\.(3|4)-branch$
-
-  - name: test-end-to-end-batch-java-8
-    decorate: true
-    always_run: true
-    spec:
-      volumes:
-        - name: service-account
-          secret:
-            secretName: feast-service-account
-      containers:
-        - image: maven:3.6-jdk-8
-          command: ["infra/scripts/test-end-to-end-batch.sh"]
-          resources:
-            requests:
-              cpu: "6"
-              memory: "6144Mi"
-          volumeMounts:
-            - name: service-account
-              mountPath: "/etc/service-account"
-    branches:
-    - ^v0\.(3|4)-branch$
-
-  - name: test-end-to-end-batch-dataflow
-    decorate: true
-    always_run: true
-    spec:
-      volumes:
-      - name: service-account-df
-        secret:
-          secretName: feast-e2e-service-account
-      - name: docker-socket
-        hostPath:
-          path: /var/run/docker.sock
-      containers:
-      - image: google/cloud-sdk:302.0.0
-        command: ["infra/scripts/test-end-to-end-batch-dataflow.sh"]
-        resources:
-          requests:
-            cpu: "6"
-            memory: "6144Mi"
-        volumeMounts:
-        - name: service-account-df
-          mountPath: "/etc/service-account-df"
-        - name: docker-socket
-          mountPath: /var/run/docker.sock
-        securityContext:
-          privileged: true
-    skip_branches:
-    - ^v0\.(3|4)-branch$
-
 postsubmits:
   feast-dev/feast:
   - name: publish-python-sdk
diff --git a/Makefile b/Makefile
index 8fffe20816..f159ad624d 100644
--- a/Makefile
+++ b/Makefile
@@ -86,7 +86,7 @@ lint-python:
 	cd ${ROOT_DIR}/sdk/python; flake8 feast/ tests/
 	cd ${ROOT_DIR}/sdk/python; black --check feast tests
 
-	cd ${ROOT_DIR}/tests/e2e; mypy bq/ redis/
+	cd ${ROOT_DIR}/tests/e2e; mypy redis/
 	cd ${ROOT_DIR}/tests/e2e; isort . --check-only
 	cd ${ROOT_DIR}/tests/e2e; flake8 .
 	cd ${ROOT_DIR}/tests/e2e; black --check .
diff --git a/infra/scripts/test-end-to-end-batch-dataflow.sh b/infra/scripts/test-end-to-end-batch-dataflow.sh
deleted file mode 100755
index 363ba7dc47..0000000000
--- a/infra/scripts/test-end-to-end-batch-dataflow.sh
+++ /dev/null
@@ -1,307 +0,0 @@
-#!/usr/bin/env bash
-echo "Preparing environment variables..."
-
-set -e
-set -o pipefail
-
-test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account-df/service-account-df.json"
-test -z ${GCLOUD_PROJECT} && GCLOUD_PROJECT="kf-feast"
-test -z ${GCLOUD_REGION} && GCLOUD_REGION="us-central1"
-test -z ${GCLOUD_NETWORK} && GCLOUD_NETWORK="default"
-test -z ${GCLOUD_SUBNET} && GCLOUD_SUBNET="default"
-test -z ${TEMP_BUCKET} && TEMP_BUCKET="kf-feast-dataflow-temp"
-test -z ${K8_CLUSTER_NAME} && K8_CLUSTER_NAME="feast-e2e-dataflow"
-test -z ${HELM_RELEASE_NAME} && HELM_RELEASE_NAME="pr-$PULL_NUMBER"
-test -z ${HELM_COMMON_NAME} && HELM_COMMON_NAME="deps"
-test -z ${DATASET_NAME} && DATASET_NAME=feast_e2e_$(date +%s)
-test -z ${SPECS_TOPIC} && SPECS_TOPIC=feast-specs-$(date +%s)
-test -z ${FEATURES_TOPIC} && FEATURES_TOPIC=feast-$(date +%s)
-
-
-feast_kafka_1_ip_name="feast-kafka-1"
-feast_kafka_2_ip_name="feast-kafka-2"
-feast_kafka_3_ip_name="feast-kafka-3"
-feast_redis_ip_name="feast-redis"
-feast_statsd_ip_name="feast-statsd"
-
-echo "
-This script will run end-to-end tests for Feast Core and Batch Serving using Dataflow Runner.
-
-1. Setup K8s cluster (optional, if it was not created before)
-2. Reuse existing IP addresses or generate new ones for stateful services
-3. Install stateful services (kafka, redis, postgres, etc) (optional)
-4. Build core & serving docker images (optional)
-5. Create temporary BQ table for Feast Serving.
-6. Rollout target images to cluster via helm in dedicated namespace (pr-{number})
-7. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from
-   tests/e2e via pytest.
-8. Tear down feast services, keep stateful services.
-"
-
-ORIGINAL_DIR=$(pwd)
-echo $ORIGINAL_DIR
-
-echo "Environment:"
-printenv
-
-export GOOGLE_APPLICATION_CREDENTIALS
-gcloud auth activate-service-account --key-file ${GOOGLE_APPLICATION_CREDENTIALS}
-gcloud -q auth configure-docker
-
-gcloud config set project ${GCLOUD_PROJECT}
-gcloud config set compute/region ${GCLOUD_REGION}
-gcloud config list
-
-apt-get -qq update
-apt-get -y install wget build-essential gettext-base curl
-
-curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
-chmod 700 $ORIGINAL_DIR/get_helm.sh
-$ORIGINAL_DIR/get_helm.sh
-
-
-function getPublicAddresses() {
-  existing_addresses=$(gcloud compute addresses list --filter="region:($GCLOUD_REGION) name:kafka" --format "list(name)")
-  if [[ -z "$existing_addresses" ]]; then
-    echo "
-============================================================
-Reserving IP addresses for Feast dependencies
-============================================================
-"
-
-    gcloud compute addresses create \
-      $feast_kafka_1_ip_name $feast_kafka_2_ip_name $feast_kafka_3_ip_name $feast_redis_ip_name $feast_statsd_ip_name \
-      --region ${GCLOUD_REGION} --subnet ${GCLOUD_SUBNET}
-  fi
-
-
-  export feast_kafka_1_ip=$(gcloud compute addresses describe $feast_kafka_1_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_kafka_2_ip=$(gcloud compute addresses describe $feast_kafka_2_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_kafka_3_ip=$(gcloud compute addresses describe $feast_kafka_3_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_redis_ip=$(gcloud compute addresses describe $feast_redis_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-  export feast_statsd_ip=$(gcloud compute addresses describe $feast_statsd_ip_name --region=${GCLOUD_REGION} --format "value(address)")
-}
-
-function createKubeCluster() {
-  echo "
-============================================================
-Creating GKE nodepool for Feast e2e test with DataflowRunner
-============================================================
-"
-  gcloud container clusters create ${K8_CLUSTER_NAME} --region ${GCLOUD_REGION} \
-      --enable-cloud-logging \
-      --enable-cloud-monitoring \
-      --network ${GCLOUD_NETWORK} \
-      --subnetwork ${GCLOUD_SUBNET} \
-      --scopes https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,\
-https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/service.management.readonly,\
-https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/trace.append,\
-https://www.googleapis.com/auth/bigquery \
-      --machine-type n1-standard-2
-
-  echo "
-============================================================
-Create feast-postgres-database Secret in GKE nodepool
-============================================================
-"
-  kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password
-
-  echo "
-============================================================
-Create feast-gcp-service-account Secret in GKE nodepool
-============================================================
-"
-  cd $ORIGINAL_DIR/infra/scripts
-  kubectl create secret generic feast-gcp-service-account --from-file=credentials.json=${GOOGLE_APPLICATION_CREDENTIALS}
-}
-
-function installDependencies() {
-  echo "
-============================================================
-Helm install common parts (kafka, redis, etc)
-============================================================
-"
-  cd $ORIGINAL_DIR/infra/charts/feast
-
-  helm install --replace --wait --debug --values="values-end-to-end-batch-dataflow-updated.yaml" \
-   --set "feast-core.enabled=false" \
-   --set "feast-online-serving.enabled=false" \
-   --set "feast-batch-serving.enabled=false" \
-   --set "postgresql.enabled=false"
-   "$HELM_COMMON_NAME" .
-
-}
-
-function buildAndPushImage()
-{
-  echo docker build -t $1:$2 --build-arg REVISION=$2 -f $3 $ORIGINAL_DIR
-  docker build -t $1:$2 --build-arg REVISION=$2 -f $3 $ORIGINAL_DIR
-  docker push $1:$2
-}
-
-function buildTarget() {
-  buildAndPushImage "gcr.io/kf-feast/feast-core" "$PULL_NUMBER" "$ORIGINAL_DIR/infra/docker/core/Dockerfile"
-  buildAndPushImage "gcr.io/kf-feast/feast-serving" "$PULL_NUMBER" "$ORIGINAL_DIR/infra/docker/serving/Dockerfile"
-}
-
-function installTarget() {
-  echo "
-============================================================
-Helm install feast
-============================================================
-"
-  cd $ORIGINAL_DIR/infra/charts/feast
-
-  helm install --wait --timeout 300s --debug --values="values-end-to-end-batch-dataflow-updated.yaml" \
-   --set "kafka.enabled=false" \
-   --set "redis.enabled=false" \
-   --set "prometheus-statsd-exporter.enabled=false" \
-   --set "prometheus.enabled=false" \
-    "$HELM_RELEASE_NAME" .
-
-}
-
-function clean() {
-  echo "
-  ============================================================
-  Cleaning up
-  ============================================================
-  "
-  cd $ORIGINAL_DIR/tests/e2e
-
-  # Remove BQ Dataset
-  bq rm -r -f ${GCLOUD_PROJECT}:${DATASET_NAME}
-
-  # Uninstall helm release before clearing PVCs
-  helm uninstall ${HELM_RELEASE_NAME}
-
-  kubectl delete pvc data-${HELM_RELEASE_NAME}-postgresql-0
-
-  # Stop Dataflow jobs from retrieved Dataflow job ids in ingesting_jobs.txt
-  if [ -f ingesting_jobs.txt ]; then
-    while read line
-    do
-        echo $line
-        gcloud dataflow jobs cancel $line --region=${GCLOUD_REGION}
-    done < ingesting_jobs.txt
-  fi
-}
-
-# 1.
-existing_cluster=$(gcloud container clusters list --format "list(name)" --filter "name:$K8_CLUSTER_NAME")
-if [[ -z $existing_cluster ]]; then
-  createKubeCluster "$@"
-else
-  gcloud container clusters get-credentials $K8_CLUSTER_NAME --region $GCLOUD_REGION --project $GCLOUD_PROJECT
-fi
-
-# 2.
-getPublicAddresses "$@"
-
-echo "
-============================================================
-Export required environment variables
-============================================================
-"
-
-export TEMP_BUCKET=$TEMP_BUCKET/$HELM_RELEASE_NAME/$(date +%s)
-export DATASET_NAME=$DATASET_NAME
-export GCLOUD_PROJECT=$GCLOUD_PROJECT
-export GCLOUD_NETWORK=$GCLOUD_NETWORK
-export GCLOUD_SUBNET=$GCLOUD_SUBNET
-export GCLOUD_REGION=$GCLOUD_REGION
-export HELM_COMMON_NAME=$HELM_COMMON_NAME
-export IMAGE_TAG=$PULL_PULL_SHA
-export SPECS_TOPIC=$SPECS_TOPIC
-export FEATURES_TOPIC=$FEATURES_TOPIC
-
-export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
-export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
-source ${SCRIPTS_DIR}/setup-common-functions.sh
-
-wait_for_docker_image gcr.io/kf-feast/feast-core:"${IMAGE_TAG}"
-wait_for_docker_image gcr.io/kf-feast/feast-serving:"${IMAGE_TAG}"
-
-envsubst $'$TEMP_BUCKET $DATASET_NAME $GCLOUD_PROJECT $GCLOUD_NETWORK $SPECS_TOPIC $FEATURES_TOPIC \
-  $GCLOUD_SUBNET $GCLOUD_REGION $IMAGE_TAG $HELM_COMMON_NAME $feast_kafka_1_ip
-  $feast_kafka_2_ip $feast_kafka_3_ip $feast_redis_ip $feast_statsd_ip' < $ORIGINAL_DIR/infra/scripts/test-templates/values-end-to-end-batch-dataflow.yaml > $ORIGINAL_DIR/infra/charts/feast/values-end-to-end-batch-dataflow-updated.yaml
-
-
-# 3.
-existing_deps=$(helm list --filter deps -q)
-if [[ -z $existing_deps ]]; then
-  installDependencies "$@"
-fi
-
-# 4.
-# buildTarget "$@"
-
-# 5.
-echo "
-============================================================
-Creating temp BQ table for Feast Serving
-============================================================
-"
-
-bq --location=US --project_id=${GCLOUD_PROJECT} mk \
-  --dataset \
-  --default_table_expiration 86400 \
-  ${GCLOUD_PROJECT}:${DATASET_NAME}
-
-
-# 6.
-
-set +e
-installTarget "$@"
-
-# 7.
-echo "
-============================================================
-Installing Python 3.7 with Miniconda and Feast SDK
-============================================================
-"
-cd $ORIGINAL_DIR
-# Install Python 3.7 with Miniconda
-wget -q https://repo.continuum.io/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh \
-   -O /tmp/miniconda.sh
-bash /tmp/miniconda.sh -b -p /root/miniconda -f
-/root/miniconda/bin/conda init
-source ~/.bashrc
-
-# Install Feast Python SDK and test requirements
-cd $ORIGINAL_DIR
-make compile-protos-python
-pip install -qe sdk/python
-pip install -qr tests/e2e/requirements.txt
-
-echo "
-============================================================
-Running end-to-end tests with pytest at 'tests/e2e'
-============================================================
-"
-# Default artifact location setting in Prow jobs
-LOGS_ARTIFACT_PATH=/logs/artifacts
-
-cd $ORIGINAL_DIR/tests/e2e
-
-core_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-core)
-serving_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-batch-serving)
-jobcontroller_ip=$(kubectl get -o jsonpath="{.status.loadBalancer.ingress[0].ip}" service ${HELM_RELEASE_NAME}-feast-jobcontroller)
-
-set +e
-pytest -s -v bq/bq-batch-retrieval.py -m dataflow_runner --core_url "$core_ip:6565" --serving_url "$serving_ip:6566" \
- --jobcontroller_url "$jobcontroller_ip:6570" --gcs_path "gs://${TEMP_BUCKET}" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
-TEST_EXIT_CODE=$?
-
-if [[ ${TEST_EXIT_CODE} != 0 ]]; then
-  echo "[DEBUG] Printing logs"
-  ls -ltrh /var/log/feast*
-  cat /var/log/feast-serving-warehouse.log /var/log/feast-core.log
-
-  echo "[DEBUG] Printing Python packages list"
-  pip list
-else
-  clean "$@"
-fi
-
-exit ${TEST_EXIT_CODE}
diff --git a/infra/scripts/test-end-to-end-batch.sh b/infra/scripts/test-end-to-end-batch.sh
deleted file mode 100755
index c741fe7168..0000000000
--- a/infra/scripts/test-end-to-end-batch.sh
+++ /dev/null
@@ -1,153 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-set -o pipefail
-
-PYTEST_MARK='direct_runner' #default
-
-print_usage() {
-  printf "Usage: ./test-end-to-end-batch -m pytest_mark"
-}
-
-while getopts 'm:' flag; do
-  case "${flag}" in
-    m) PYTEST_MARK="${OPTARG}" ;;
-    *) print_usage
-       exit 1 ;;
-  esac
-done
-
-test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account/service-account.json"
-test -z ${SKIP_BUILD_JARS} && SKIP_BUILD_JARS="false"
-test -z ${GOOGLE_CLOUD_PROJECT} && GOOGLE_CLOUD_PROJECT="kf-feast"
-test -z ${TEMP_BUCKET} && TEMP_BUCKET="feast-templocation-kf-feast"
-test -z ${JOBS_STAGING_LOCATION} && JOBS_STAGING_LOCATION="gs://${TEMP_BUCKET}/staging-location/$(date +%s)"
-
-# Get the current build version using maven (and pom.xml)
-export FEAST_BUILD_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
-echo Building version: $FEAST_BUILD_VERSION
-
-# Get Feast project repository root and scripts directory
-export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
-export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
-
-echo "
-This script will run end-to-end tests for Feast Core and Batch Serving.
-
-1. Install gcloud SDK
-2. Install Redis as the job store for Feast Batch Serving.
-4. Install Postgres for persisting Feast metadata.
-5. Install Kafka and Zookeeper as the Source in Feast.
-6. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from
-   tests/e2e via pytest.
-"
-
-source ${SCRIPTS_DIR}/setup-common-functions.sh
-
-install_test_tools
-install_gcloud_sdk
-install_and_start_local_redis
-install_and_start_local_postgres
-install_and_start_local_zookeeper_and_kafka
-
-if [[ ${SKIP_BUILD_JARS} != "true" ]]; then
-  build_feast_core_and_serving
-else
-  echo "[DEBUG] Skipping building jars"
-fi
-
-DATASET_NAME=feast_$(date +%s)
-bq --location=US --project_id=${GOOGLE_CLOUD_PROJECT} mk \
-  --dataset \
-  --default_table_expiration 86400 \
-  ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME}
-
-# Start Feast Core in background
-cat <<EOF > /tmp/jc.warehouse.application.yml
-feast:
-  core-host: localhost
-  core-port: 6565
-  jobs:
-    polling_interval_milliseconds: 10000
-    active_runner: direct
-    consolidate-jobs-per-source: true
-    runners:
-      - name: direct
-        type: DirectRunner
-        options:
-          tempLocation: gs://${TEMP_BUCKET}/tempLocation
-
-EOF
-
-cat <<EOF > /tmp/serving.warehouse.application.yml
-feast:
-  # GRPC service address for Feast Core
-  # Feast Serving requires connection to Feast Core to retrieve and reload Feast metadata (e.g. FeatureSpecs, Store information)
-  core-host: localhost
-  core-grpc-port: 6565
-
-  # Indicates the active store. Only a single store in the last can be active at one time. In the future this key
-  # will be deprecated in order to allow multiple stores to be served from a single serving instance
-  active_store: historical
-
-  # List of store configurations
-  stores:
-    - name: historical
-      type: BIGQUERY
-      config:
-        project_id: ${GOOGLE_CLOUD_PROJECT}
-        dataset_id: ${DATASET_NAME}
-        staging_location: ${JOBS_STAGING_LOCATION}
-        initial_retry_delay_seconds: 1
-        total_timeout_seconds: 21600
-        write_triggering_frequency_seconds: 1
-      subscriptions:
-        - name: "*"
-          project: "*"
-          version: "*"
-
-  job_store:
-    redis_host: localhost
-    redis_port: 6379
-
-  tracing:
-    enabled: false
-
-server:
-  port: 8081
-
-EOF
-
-cat /tmp/jc.warehouse.application.yml /tmp/serving.warehouse.application.yml
-
-start_feast_core
-start_feast_jobcontroller /tmp/jc.warehouse.application.yml
-start_feast_serving /tmp/serving.warehouse.application.yml
-
-install_python_with_miniconda_and_feast_sdk
-
-print_banner "Running end-to-end tests with pytest at 'tests/e2e'"
-# Default artifact location setting in Prow jobs
-LOGS_ARTIFACT_PATH=/logs/artifacts
-
-ORIGINAL_DIR=$(pwd)
-cd tests/e2e
-
-set +e
-pytest bq/* -v -m ${PYTEST_MARK} --gcs_path ${JOBS_STAGING_LOCATION} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
-TEST_EXIT_CODE=$?
-
-if [[ ${TEST_EXIT_CODE} != 0 ]]; then
-  echo "[DEBUG] Printing logs"
-  ls -ltrh /var/log/feast*
-  cat /var/log/feast-serving-online.log /var/log/feast-core.log /var/log/feast-jobcontroller.log
-
-  echo "[DEBUG] Printing Python packages list"
-  pip list
-else
-  print_banner "Cleaning up"
-
-  bq rm -r -f ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME}
-fi
-
-exit ${TEST_EXIT_CODE}
diff --git a/tests/e2e/bq/bq-batch-retrieval.py b/tests/e2e/bq/bq-batch-retrieval.py
deleted file mode 100644
index 2d94d2e6cf..0000000000
--- a/tests/e2e/bq/bq-batch-retrieval.py
+++ /dev/null
@@ -1,819 +0,0 @@
-import math
-import os
-import random
-import time
-import uuid
-from datetime import datetime, timedelta
-from urllib.parse import urlparse
-
-import numpy as np
-import pandas as pd
-import pytest
-import pytz
-import tensorflow_data_validation as tfdv
-from google.cloud import bigquery, storage
-from google.cloud.storage import Blob
-from google.protobuf.duration_pb2 import Duration
-from pandavro import to_avro
-
-from bq.testutils import assert_stats_equal, clear_unsupported_fields
-from feast.client import Client
-from feast.contrib.job_controller.client import Client as JCClient
-from feast.core.CoreService_pb2 import ListStoresRequest
-from feast.core.FeatureSet_pb2 import FeatureSetStatus
-from feast.core.IngestionJob_pb2 import IngestionJobStatus
-from feast.entity import Entity
-from feast.feature import Feature
-from feast.feature_set import FeatureSet
-from feast.type_map import ValueType
-from feast.wait import wait_retry_backoff
-
-pd.set_option("display.max_columns", None)
-
-PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6]
-
-
-@pytest.fixture(scope="module")
-def core_url(pytestconfig):
-    return pytestconfig.getoption("core_url")
-
-
-@pytest.fixture(scope="module")
-def serving_url(pytestconfig):
-    return pytestconfig.getoption("serving_url")
-
-
-@pytest.fixture(scope="module")
-def jobcontroller_url(pytestconfig):
-    return pytestconfig.getoption("jobcontroller_url")
-
-
-@pytest.fixture(scope="module")
-def allow_dirty(pytestconfig):
-    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
-
-
-@pytest.fixture(scope="module")
-def gcs_path(pytestconfig):
-    return pytestconfig.getoption("gcs_path")
-
-
-@pytest.fixture(scope="module")
-def client(core_url, serving_url, allow_dirty):
-    # Get client for core and serving
-    client = Client(core_url=core_url, serving_url=serving_url)
-    client.create_project(PROJECT_NAME)
-    client.set_project(PROJECT_NAME)
-
-    # Ensure Feast core is active, but empty
-    if not allow_dirty:
-        feature_sets = client.list_feature_sets()
-        if len(feature_sets) > 0:
-            raise Exception(
-                "Feast cannot have existing feature sets registered. Exiting tests."
-            )
-
-    return client
-
-
-def wait_for(fn, timeout: timedelta, sleep=5):
-    until = datetime.now() + timeout
-    last_exc = BaseException()
-
-    while datetime.now() <= until:
-        try:
-            fn()
-        except Exception as exc:
-            last_exc = exc
-        else:
-            return
-        time.sleep(sleep)
-
-    raise last_exc
-
-
-@pytest.mark.first
-@pytest.mark.direct_runner
-@pytest.mark.dataflow_runner
-@pytest.mark.run(order=1)
-def test_batch_apply_all_featuresets(client):
-    client.set_project(PROJECT_NAME)
-
-    file_fs1 = FeatureSet(
-        "file_feature_set",
-        features=[Feature("feature_value1", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(file_fs1)
-
-    gcs_fs1 = FeatureSet(
-        "gcs_feature_set",
-        features=[Feature("feature_value2", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(gcs_fs1)
-
-    proc_time_fs = FeatureSet(
-        "processing_time",
-        features=[Feature("feature_value3", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(proc_time_fs)
-
-    add_cols_fs = FeatureSet(
-        "additional_columns",
-        features=[Feature("feature_value4", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(add_cols_fs)
-
-    historical_fs = FeatureSet(
-        "historical",
-        features=[Feature("feature_value5", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(historical_fs)
-
-    fs1 = FeatureSet(
-        "feature_set_1",
-        features=[Feature("feature_value6", ValueType.STRING)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-
-    fs2 = FeatureSet(
-        "feature_set_2",
-        features=[Feature("other_feature_value7", ValueType.INT64)],
-        entities=[Entity("other_entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(fs1)
-    client.apply(fs2)
-
-    no_max_age_fs = FeatureSet(
-        "no_max_age",
-        features=[Feature("feature_value8", ValueType.INT64)],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=0),
-    )
-    client.apply(no_max_age_fs)
-
-
-@pytest.mark.direct_runner
-@pytest.mark.dataflow_runner
-@pytest.mark.run(order=10)
-def test_batch_get_historical_features_with_file(client):
-    file_fs1 = client.get_feature_set(name="file_feature_set")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value1": [f"{i}" for i in range(N_ROWS)],
-        }
-    )
-
-    # feature set may be ready (direct runner set ready  right after job submitted),
-    # but kafka consumer is not configured
-    # give some time to warm up ingestion job
-    wait_retry_backoff(
-        retry_fn=(
-            lambda: (
-                None,
-                client.get_feature_set(name="file_feature_set").status
-                == FeatureSetStatus.STATUS_READY,
-            )
-        ),
-        timeout_secs=480,
-        timeout_msg="Wait for FeatureSet to be READY",
-    )
-    time.sleep(20)
-
-    client.ingest(file_fs1, features_1_df, timeout=480)
-
-    # Rename column (datetime -> event_timestamp)
-    features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"})
-
-    to_avro(
-        df=features_1_df[["event_timestamp", "entity_id"]],
-        file_path_or_buffer="file_feature_set.avro",
-    )
-
-    time.sleep(10)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows="file://file_feature_set.avro",
-            feature_refs=["feature_value1"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["entity_id"].to_list() == [
-            int(i) for i in output["feature_value1"].to_list()
-        ]
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=10))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.dataflow_runner
-@pytest.mark.run(order=11)
-def test_batch_get_historical_features_with_gs_path(client, gcs_path):
-    gcs_fs1 = client.get_feature_set(name="gcs_feature_set")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value2": [f"{i}" for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(gcs_fs1, features_1_df, timeout=360)
-
-    # Rename column (datetime -> event_timestamp)
-    features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"})
-
-    # Output file to local
-    file_name = "gcs_feature_set.avro"
-    to_avro(
-        df=features_1_df[["event_timestamp", "entity_id"]],
-        file_path_or_buffer=file_name,
-    )
-
-    uri = urlparse(gcs_path)
-    bucket = uri.hostname
-    ts = int(time.time())
-    remote_path = str(uri.path).strip("/") + f"/{ts}/{file_name}"
-
-    # Upload file to gcs
-    storage_client = storage.Client(project=None)
-    bucket = storage_client.get_bucket(bucket)
-    blob = bucket.blob(remote_path)
-    blob.upload_from_filename(file_name)
-
-    time.sleep(10)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=f"{gcs_path}/{ts}/*",
-            feature_refs=["feature_value2"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-        assert output["entity_id"].to_list() == [
-            int(i) for i in output["feature_value2"].to_list()
-        ]
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-        blob.delete()
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=12)
-def test_batch_order_by_creation_time(client):
-    proc_time_fs = client.get_feature_set(name="processing_time")
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    N_ROWS = 10
-    incorrect_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value3": ["WRONG"] * N_ROWS,
-        }
-    )
-    correct_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value3": ["CORRECT"] * N_ROWS,
-        }
-    )
-    client.ingest(proc_time_fs, incorrect_df)
-    time.sleep(15)
-    client.ingest(proc_time_fs, correct_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=incorrect_df[["datetime", "entity_id"]],
-            feature_refs=["feature_value3"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["feature_value3"].to_list() == ["CORRECT"] * N_ROWS
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=13)
-def test_batch_additional_columns_in_entity_table(client):
-    add_cols_fs = client.get_feature_set(name="additional_columns")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value4": ["abc"] * N_ROWS,
-        }
-    )
-    client.ingest(add_cols_fs, features_df)
-
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "additional_string_col": ["hello im extra"] * N_ROWS,
-            "additional_float_col": [random.random() for i in range(N_ROWS)],
-        }
-    )
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=entity_df,
-            feature_refs=["feature_value4"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-            by=["entity_id"]
-        )
-        print(output.head(10))
-
-        assert np.allclose(
-            output["additional_float_col"], entity_df["additional_float_col"]
-        )
-        assert (
-            output["additional_string_col"].to_list()
-            == entity_df["additional_string_col"].to_list()
-        )
-        assert (
-            output["feature_value4"].to_list()
-            == features_df["feature_value4"].to_list()
-        )
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=14)
-def test_batch_point_in_time_correctness_join(client):
-    historical_fs = client.get_feature_set(name="historical")
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    N_EXAMPLES = 10
-    historical_df = pd.DataFrame(
-        {
-            "datetime": [
-                time_offset - timedelta(seconds=50),
-                time_offset - timedelta(seconds=30),
-                time_offset - timedelta(seconds=10),
-            ]
-            * N_EXAMPLES,
-            "entity_id": [i for i in range(N_EXAMPLES) for _ in range(3)],
-            "feature_value5": ["WRONG", "WRONG", "CORRECT"] * N_EXAMPLES,
-        }
-    )
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset - timedelta(seconds=10)] * N_EXAMPLES,
-            "entity_id": [i for i in range(N_EXAMPLES)],
-        }
-    )
-
-    client.ingest(historical_fs, historical_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=entity_df,
-            feature_refs=["feature_value5"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["feature_value5"].to_list() == ["CORRECT"] * N_EXAMPLES
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=15)
-def test_batch_multiple_featureset_joins(client):
-    fs1 = client.get_feature_set(name="feature_set_1")
-    fs2 = client.get_feature_set(name="feature_set_2")
-
-    N_ROWS = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value6": [f"{i}" for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(fs1, features_1_df)
-
-    features_2_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "other_entity_id": [i for i in range(N_ROWS)],
-            "other_feature_value7": [i for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(fs2, features_2_df)
-
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)],
-        }
-    )
-
-    # Test retrieve with different variations of the string feature refs
-    # ie feature set inference for feature refs without specified feature set
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=entity_df,
-            feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
-            project=PROJECT_NAME,
-        )
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["entity_id"].to_list() == [
-            int(i) for i in output["feature_value6"].to_list()
-        ]
-        assert (
-            output["other_entity_id"].to_list()
-            == output["feature_set_2__other_feature_value7"].to_list()
-        )
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=16)
-def test_batch_no_max_age(client):
-    no_max_age_fs = client.get_feature_set(name="no_max_age")
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    N_ROWS = 10
-    features_8_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "entity_id": [i for i in range(N_ROWS)],
-            "feature_value8": [i for i in range(N_ROWS)],
-        }
-    )
-    client.ingest(no_max_age_fs, features_8_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=features_8_df[["datetime", "entity_id"]],
-            feature_refs=["feature_value8"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-        print(output.head())
-
-        assert output["entity_id"].to_list() == output["feature_value8"].to_list()
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.fixture(scope="module", autouse=True)
-def infra_teardown(pytestconfig, jobcontroller_url):
-    client = JCClient(jobcontroller_url=jobcontroller_url)
-
-    marker = pytestconfig.getoption("-m")
-    yield marker
-    if marker == "dataflow_runner":
-        ingest_jobs = client.list_ingest_jobs()
-        ingest_jobs = [
-            client.list_ingest_jobs(job.id)[0].external_id
-            for job in ingest_jobs
-            if job.status == IngestionJobStatus.RUNNING
-        ]
-
-        cwd = os.getcwd()
-        with open(f"{cwd}/ingesting_jobs.txt", "w+") as output:
-            for job in ingest_jobs:
-                output.write("%s\n" % job)
-    else:
-        print("Cleaning up not required")
-
-
-"""
-This suite of tests tests the apply feature set - update feature set - retrieve
-event sequence. It ensures that when a feature set is updated, tombstoned features
-are no longer retrieved, and added features are null for previously ingested
-rows.
-
-It is marked separately because of the length of time required
-to perform this test, due to bigquery schema caching for streaming writes.
-"""
-
-
-@pytest.fixture(scope="module")
-def update_featureset_dataframe():
-    n_rows = 10
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    return pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [i for i in range(n_rows)],
-            "update_feature1": ["a" for i in range(n_rows)],
-            "update_feature2": [i + 2 for i in range(n_rows)],
-            "update_feature3": [i for i in range(n_rows)],
-            "update_feature4": ["b" for i in range(n_rows)],
-        }
-    )
-
-
-@pytest.mark.fs_update
-@pytest.mark.run(order=20)
-def test_update_featureset_apply_featureset_and_ingest_first_subset(
-    client, update_featureset_dataframe
-):
-    subset_columns = ["datetime", "entity_id", "update_feature1", "update_feature2"]
-    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
-    update_fs = FeatureSet(
-        "update_fs",
-        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
-        max_age=Duration(seconds=432000),
-    )
-    update_fs.infer_fields_from_df(subset_df)
-    client.apply(update_fs)
-
-    client.ingest(feature_set=update_fs, source=subset_df)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5],
-            feature_refs=["update_feature1", "update_feature2"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-            by=["entity_id"]
-        )
-        print(output.head())
-
-        assert (
-            output["update_feature1"].to_list()
-            == subset_df["update_feature1"].to_list()
-        )
-        assert (
-            output["update_feature2"].to_list()
-            == subset_df["update_feature2"].to_list()
-        )
-
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.fs_update
-@pytest.mark.timeout(600)
-@pytest.mark.run(order=21)
-def test_update_featureset_update_featureset_and_ingest_second_subset(
-    client, update_featureset_dataframe
-):
-    subset_columns = [
-        "datetime",
-        "entity_id",
-        "update_feature1",
-        "update_feature3",
-        "update_feature4",
-    ]
-    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
-    update_fs = FeatureSet(
-        "update_fs",
-        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
-        max_age=Duration(seconds=432000),
-    )
-    update_fs.infer_fields_from_df(subset_df)
-    client.apply(update_fs)
-
-    # We keep retrying this ingestion until all values make it into the buffer.
-    # This is a necessary step because bigquery streaming caches table schemas
-    # and as a result, rows may be lost.
-    while True:
-        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
-        time.sleep(15)  # wait for rows to get written to bq
-        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
-        if rows_ingested == len(subset_df):
-            print(f"Number of rows successfully ingested: {rows_ingested}. Continuing.")
-            break
-        print(
-            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
-        )
-        time.sleep(30)
-
-    def check():
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:],
-            feature_refs=["update_feature1", "update_feature3", "update_feature4"],
-            project=PROJECT_NAME,
-        )
-
-        output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-            by=["entity_id"]
-        )
-        print(output.head())
-
-        assert (
-            output["update_feature1"].to_list()
-            == subset_df["update_feature1"].to_list()
-        )
-        assert (
-            output["update_feature3"].to_list()
-            == subset_df["update_feature3"].to_list()
-        )
-        assert (
-            output["update_feature4"].to_list()
-            == subset_df["update_feature4"].to_list()
-        )
-        clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-    wait_for(check, timedelta(minutes=5))
-
-
-@pytest.mark.fs_update
-@pytest.mark.run(order=22)
-def test_update_featureset_retrieve_all_fields(client, update_featureset_dataframe):
-    with pytest.raises(Exception):
-        feature_retrieval_job = client.get_historical_features(
-            entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
-            feature_refs=[
-                "update_feature1",
-                "update_feature2",
-                "update_feature3",
-                "update_feature4",
-            ],
-            project=PROJECT_NAME,
-        )
-        feature_retrieval_job.result()
-
-
-@pytest.mark.fs_update
-@pytest.mark.run(order=23)
-def test_update_featureset_retrieve_valid_fields(client, update_featureset_dataframe):
-    feature_retrieval_job = client.get_historical_features(
-        entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
-        feature_refs=["update_feature1", "update_feature3", "update_feature4"],
-        project=PROJECT_NAME,
-    )
-    output = feature_retrieval_job.to_dataframe(timeout_sec=180).sort_values(
-        by=["entity_id"]
-    )
-    clean_up_remote_files(feature_retrieval_job.get_avro_files())
-    print(output.head(10))
-    assert (
-        output["update_feature1"].to_list()
-        == update_featureset_dataframe["update_feature1"].to_list()
-    )
-    # we have to convert to float because the column contains np.NaN
-    assert [math.isnan(i) for i in output["update_feature3"].to_list()[:5]] == [
-        True
-    ] * 5
-    assert output["update_feature3"].to_list()[5:] == [
-        float(i) for i in update_featureset_dataframe["update_feature3"].to_list()[5:]
-    ]
-    assert (
-        output["update_feature4"].to_list()
-        == [None] * 5 + update_featureset_dataframe["update_feature4"].to_list()[5:]
-    )
-
-
-@pytest.mark.direct_runner
-@pytest.mark.run(order=31)
-@pytest.mark.timeout(600)
-def test_batch_dataset_statistics(client):
-    fs1 = client.get_feature_set(name="feature_set_1")
-    fs2 = client.get_feature_set(name="feature_set_2")
-    id_offset = 20
-
-    n_rows = 21
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    features_1_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [id_offset + i for i in range(n_rows)],
-            "feature_value6": ["a" for i in range(n_rows)],
-        }
-    )
-    ingestion_id1 = client.ingest(fs1, features_1_df)
-
-    features_2_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "other_entity_id": [id_offset + i for i in range(n_rows)],
-            "other_feature_value7": [int(i) % 10 for i in range(0, n_rows)],
-        }
-    )
-    ingestion_id2 = client.ingest(fs2, features_2_df)
-
-    entity_df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [id_offset + i for i in range(n_rows)],
-            "other_entity_id": [id_offset + i for i in range(n_rows)],
-        }
-    )
-
-    time.sleep(15)  # wait for rows to get written to bq
-    while True:
-        rows_ingested1 = get_rows_ingested(client, fs1, ingestion_id1)
-        rows_ingested2 = get_rows_ingested(client, fs2, ingestion_id2)
-        if rows_ingested1 == len(features_1_df) and rows_ingested2 == len(
-            features_2_df
-        ):
-            print(
-                f"Number of rows successfully ingested: {rows_ingested1}, {rows_ingested2}. Continuing."
-            )
-            break
-        time.sleep(30)
-
-    feature_retrieval_job = client.get_historical_features(
-        entity_rows=entity_df,
-        feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
-        project=PROJECT_NAME,
-        compute_statistics=True,
-    )
-    output = feature_retrieval_job.to_dataframe(timeout_sec=180)
-    print(output.head(10))
-    stats = feature_retrieval_job.statistics(timeout_sec=180)
-    clear_unsupported_fields(stats)
-
-    expected_stats = tfdv.generate_statistics_from_dataframe(
-        output[["feature_value6", "feature_set_2__other_feature_value7"]]
-    )
-    clear_unsupported_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = output[name].std()
-            feature.num_stats.std_dev = std
-
-    assert_stats_equal(expected_stats, stats)
-    clean_up_remote_files(feature_retrieval_job.get_avro_files())
-
-
-def get_rows_ingested(
-    client: Client, feature_set: FeatureSet, ingestion_id: str
-) -> int:
-    response = client._core_service.ListStores(
-        ListStoresRequest(filter=ListStoresRequest.Filter(name="historical"))
-    )
-    bq_config = response.store[0].bigquery_config
-    project = bq_config.project_id
-    dataset = bq_config.dataset_id
-    table = f"{PROJECT_NAME}_{feature_set.name}"
-
-    bq_client = bigquery.Client(project=project)
-    rows = bq_client.query(
-        f'SELECT COUNT(*) as count FROM `{project}.{dataset}.{table}` WHERE ingestion_id = "{ingestion_id}"'
-    ).result()
-
-    return list(rows)[0]["count"]
-
-
-def clean_up_remote_files(files):
-    storage_client = storage.Client()
-    for file_uri in files:
-        if file_uri.scheme == "gs":
-            blob = Blob.from_string(file_uri.geturl(), client=storage_client)
-            blob.delete()
diff --git a/tests/e2e/bq/feature-stats.py b/tests/e2e/bq/feature-stats.py
deleted file mode 100644
index 226dc358f1..0000000000
--- a/tests/e2e/bq/feature-stats.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import os
-import time
-import uuid
-from datetime import datetime, timedelta
-
-import pandas as pd
-import pytest
-import pytz
-import tensorflow_data_validation as tfdv
-from google.protobuf.duration_pb2 import Duration
-
-from bq.testutils import (
-    assert_stats_equal,
-    clear_unsupported_agg_fields,
-    clear_unsupported_fields,
-)
-from feast.client import Client
-from feast.entity import Entity
-from feast.feature import Feature
-from feast.feature_set import FeatureSet
-from feast.type_map import ValueType
-
-pd.set_option("display.max_columns", None)
-
-PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6]
-STORE_NAME = "historical"
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-
-
-@pytest.fixture(scope="module")
-def core_url(pytestconfig):
-    return pytestconfig.getoption("core_url")
-
-
-@pytest.fixture(scope="module")
-def serving_url(pytestconfig):
-    return pytestconfig.getoption("serving_url")
-
-
-@pytest.fixture(scope="module")
-def allow_dirty(pytestconfig):
-    return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False
-
-
-@pytest.fixture(scope="module")
-def gcs_path(pytestconfig):
-    return pytestconfig.getoption("gcs_path")
-
-
-@pytest.fixture(scope="module")
-def client(core_url, allow_dirty):
-    # Get client for core and serving
-    client = Client(core_url=core_url)
-    client.create_project(PROJECT_NAME)
-    client.set_project(PROJECT_NAME)
-
-    # Ensure Feast core is active, but empty
-    if not allow_dirty:
-        feature_sets = client.list_feature_sets()
-        if len(feature_sets) > 0:
-            raise Exception(
-                "Feast cannot have existing feature sets registered. Exiting tests."
-            )
-
-    return client
-
-
-@pytest.fixture(scope="module")
-def feature_stats_feature_set(client):
-    fv_fs = FeatureSet(
-        "feature_stats",
-        features=[
-            Feature("strings", ValueType.STRING),
-            Feature("ints", ValueType.INT64),
-            Feature("floats", ValueType.FLOAT),
-        ],
-        entities=[Entity("entity_id", ValueType.INT64)],
-        max_age=Duration(seconds=100),
-    )
-    client.apply(fv_fs)
-    return fv_fs
-
-
-@pytest.fixture(scope="module")
-def feature_stats_dataset_basic(client, feature_stats_feature_set):
-
-    n_rows = 20
-
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    df = pd.DataFrame(
-        {
-            "datetime": [time_offset] * n_rows,
-            "entity_id": [i for i in range(n_rows)],
-            "strings": ["a", "b"] * int(n_rows / 2),
-            "ints": [int(i) for i in range(n_rows)],
-            "floats": [10.5 - i for i in range(n_rows)],
-        }
-    )
-
-    expected_stats = tfdv.generate_statistics_from_dataframe(
-        df[["strings", "ints", "floats"]]
-    )
-    clear_unsupported_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = df[name].std()
-            feature.num_stats.std_dev = std
-
-    ingestion_id = client.ingest(feature_stats_feature_set, df)
-    time.sleep(10)
-    return {
-        "df": df,
-        "id": ingestion_id,
-        "date": datetime(time_offset.year, time_offset.month, time_offset.day).replace(
-            tzinfo=pytz.utc
-        ),
-        "stats": expected_stats,
-    }
-
-
-@pytest.fixture(scope="module")
-def feature_stats_dataset_agg(client, feature_stats_feature_set):
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    start_date = time_offset - timedelta(days=10)
-    end_date = time_offset - timedelta(days=7)
-    df1 = pd.DataFrame(
-        {
-            "datetime": [start_date] * 5,
-            "entity_id": [i for i in range(5)],
-            "strings": ["a", "b", "b", "b", "a"],
-            "ints": [4, 3, 2, 6, 3],
-            "floats": [2.1, 5.2, 4.3, 0.6, 0.1],
-        }
-    )
-    ingestion_id_1 = client.ingest(feature_stats_feature_set, df1)
-    df2 = pd.DataFrame(
-        {
-            "datetime": [start_date + timedelta(days=1)] * 3,
-            "entity_id": [i for i in range(3)],
-            "strings": ["a", "b", "c"],
-            "ints": [2, 6, 7],
-            "floats": [1.6, 2.4, 2],
-        }
-    )
-    ingestion_id_2 = client.ingest(feature_stats_feature_set, df2)
-
-    combined_df = pd.concat([df1, df2])[["strings", "ints", "floats"]]
-    expected_stats = tfdv.generate_statistics_from_dataframe(combined_df)
-    clear_unsupported_agg_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = combined_df[name].std()
-            feature.num_stats.std_dev = std
-
-    time.sleep(10)
-
-    return {
-        "ids": [ingestion_id_1, ingestion_id_2],
-        "start_date": datetime(
-            start_date.year, start_date.month, start_date.day
-        ).replace(tzinfo=pytz.utc),
-        "end_date": datetime(end_date.year, end_date.month, end_date.day).replace(
-            tzinfo=pytz.utc
-        ),
-        "stats": expected_stats,
-    }
-
-
-def test_feature_stats_retrieval_by_single_dataset(client, feature_stats_dataset_basic):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        ingestion_ids=[feature_stats_dataset_basic["id"]],
-    )
-
-    assert_stats_equal(feature_stats_dataset_basic["stats"], stats)
-
-
-def test_feature_stats_by_date(client, feature_stats_dataset_basic):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        start_date=feature_stats_dataset_basic["date"],
-        end_date=feature_stats_dataset_basic["date"] + timedelta(days=1),
-    )
-    assert_stats_equal(feature_stats_dataset_basic["stats"], stats)
-
-
-def test_feature_stats_agg_over_datasets(client, feature_stats_dataset_agg):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        ingestion_ids=feature_stats_dataset_agg["ids"],
-    )
-    assert_stats_equal(feature_stats_dataset_agg["stats"], stats)
-
-
-def test_feature_stats_agg_over_dates(client, feature_stats_dataset_agg):
-    stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store=STORE_NAME,
-        start_date=feature_stats_dataset_agg["start_date"],
-        end_date=feature_stats_dataset_agg["end_date"],
-    )
-    assert_stats_equal(feature_stats_dataset_agg["stats"], stats)
-
-
-def test_feature_stats_force_refresh(
-    client, feature_stats_dataset_basic, feature_stats_feature_set
-):
-    df = feature_stats_dataset_basic["df"]
-
-    df2 = pd.DataFrame(
-        {
-            "datetime": [df.iloc[0].datetime],
-            "entity_id": [10],
-            "strings": ["c"],
-            "ints": [2],
-            "floats": [1.3],
-        }
-    )
-    client.ingest(feature_stats_feature_set, df2)
-    time.sleep(10)
-
-    actual_stats = client.get_statistics(
-        "feature_stats",
-        features=["strings", "ints", "floats"],
-        store="historical",
-        start_date=feature_stats_dataset_basic["date"],
-        end_date=feature_stats_dataset_basic["date"] + timedelta(days=1),
-        force_refresh=True,
-    )
-
-    combined_df = pd.concat([df, df2])
-    expected_stats = tfdv.generate_statistics_from_dataframe(combined_df)
-
-    clear_unsupported_fields(expected_stats)
-
-    # Since TFDV computes population std dev
-    for feature in expected_stats.datasets[0].features:
-        if feature.HasField("num_stats"):
-            name = feature.path.step[0]
-            std = combined_df[name].std()
-            feature.num_stats.std_dev = std
-
-    assert_stats_equal(expected_stats, actual_stats)
diff --git a/tests/e2e/bq/testutils.py b/tests/e2e/bq/testutils.py
deleted file mode 100644
index 9ac678bc59..0000000000
--- a/tests/e2e/bq/testutils.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from deepdiff import DeepDiff
-from google.protobuf.json_format import MessageToDict
-
-
-def clear_unsupported_fields(datasets):
-    dataset = datasets.datasets[0]
-    for feature in dataset.features:
-        if feature.HasField("num_stats"):
-            feature.num_stats.common_stats.ClearField("num_values_histogram")
-            # Since difference in how BQ and TFDV compute histogram values make them
-            # approximate but uncomparable
-            feature.num_stats.ClearField("histograms")
-        elif feature.HasField("string_stats"):
-            feature.string_stats.common_stats.ClearField("num_values_histogram")
-            for bucket in feature.string_stats.rank_histogram.buckets:
-                bucket.ClearField("low_rank")
-                bucket.ClearField("high_rank")
-        elif feature.HasField("struct_stats"):
-            feature.string_stats.struct_stats.ClearField("num_values_histogram")
-        elif feature.HasField("bytes_stats"):
-            feature.string_stats.bytes_stats.ClearField("num_values_histogram")
-
-
-def clear_unsupported_agg_fields(datasets):
-    dataset = datasets.datasets[0]
-    for feature in dataset.features:
-        if feature.HasField("num_stats"):
-            feature.num_stats.common_stats.ClearField("num_values_histogram")
-            feature.num_stats.ClearField("histograms")
-            feature.num_stats.ClearField("median")
-        elif feature.HasField("string_stats"):
-            feature.string_stats.common_stats.ClearField("num_values_histogram")
-            feature.string_stats.ClearField("rank_histogram")
-            feature.string_stats.ClearField("top_values")
-            feature.string_stats.ClearField("unique")
-        elif feature.HasField("struct_stats"):
-            feature.struct_stats.ClearField("num_values_histogram")
-        elif feature.HasField("bytes_stats"):
-            feature.bytes_stats.ClearField("num_values_histogram")
-            feature.bytes_stats.ClearField("unique")
-
-
-def assert_stats_equal(left, right):
-    left_stats = MessageToDict(left)["datasets"][0]
-    right_stats = MessageToDict(right)["datasets"][0]
-    assert (
-        left_stats["numExamples"] == right_stats["numExamples"]
-    ), f"Number of examples do not match. Expected {left_stats['numExamples']}, got {right_stats['numExamples']}"
-
-    left_features = sorted(left_stats["features"], key=lambda k: k["path"]["step"][0])
-    right_features = sorted(right_stats["features"], key=lambda k: k["path"]["step"][0])
-    diff = DeepDiff(left_features, right_features, significant_digits=3)
-    assert (
-        len(diff) == 0
-    ), f"Feature statistics do not match: \nwanted: {left_features}\n got: {right_features}"

From 28830deb746f9b28197dd4cd0e67442554e11c3e Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 16:44:25 +0800
Subject: [PATCH 11/34] Fix docker-compose test

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 infra/scripts/test-docker-compose.sh             | 2 +-
 tests/e2e/redis/parallel-ingest-redis-serving.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/infra/scripts/test-docker-compose.sh b/infra/scripts/test-docker-compose.sh
index 45105d4839..173c796eb0 100755
--- a/infra/scripts/test-docker-compose.sh
+++ b/infra/scripts/test-docker-compose.sh
@@ -63,4 +63,4 @@ export FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .N
 ${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS}:6566 --timeout=120
 
 # Run e2e tests for Redis
-docker exec feast_jupyter_1 bash -c 'cd /feast/tests/e2e/redis && pytest --verbose -rs parallel-ingest-redis-serving.py --core_url core:6565 --serving_url=online_serving:6566 --kafka_brokers=kafka:9092'
+docker exec feast_jupyter_1 bash -c 'cd /feast/tests/e2e/redis && pytest -n 1 --dist=loadscope parallel-ingest-redis-serving.py --core_url core:6565 --serving_url=online_serving:6566 --kafka_brokers=kafka:9092'
diff --git a/tests/e2e/redis/parallel-ingest-redis-serving.py b/tests/e2e/redis/parallel-ingest-redis-serving.py
index dd98f66063..53967aaa74 100644
--- a/tests/e2e/redis/parallel-ingest-redis-serving.py
+++ b/tests/e2e/redis/parallel-ingest-redis-serving.py
@@ -83,7 +83,9 @@ def test_discovery(self, client):
         assert actual_get_feature_table == self.basic_ft_spec
 
         # ListFeatureTables Check
-        actual_list_feature_table = client.list_feature_tables()[0]
+        actual_list_feature_table = [
+            ft for ft in client.list_feature_tables() if ft.name == "dev_featuretable"
+        ][0]
         assert actual_list_feature_table == self.basic_ft_spec
 
     def test_basic_retrieval(self, client):
@@ -173,7 +175,9 @@ def test_discovery(self, client):
         assert actual_get_feature_table == self.alltypes_ft_spec
 
         # ListFeatureTables Check
-        actual_list_feature_table = client.list_feature_tables()[0]
+        actual_list_feature_table = [
+            ft for ft in client.list_feature_tables() if ft.name == "alltypes"
+        ][0]
         assert actual_list_feature_table == self.alltypes_ft_spec
 
     def test_alltypes_retrieval(self, client):

From d1c434668d192702fb810a86e3046dc305b64449 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 18:37:20 +0800
Subject: [PATCH 12/34] Address PR comments

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 Makefile                                      |   2 +-
 .../scripts/test-end-to-end-redis-cluster.sh  |   2 +-
 infra/scripts/test-end-to-end.sh              |   2 +-
 sdk/python/feast/client.py                    |   6 +-
 sdk/python/feast/feature_table.py             |  34 +++-
 sdk/python/feast/loaders/ingest.py            |   2 +-
 sdk/python/tests/test_client.py               |  24 ++-
 sdk/python/tests/test_feature_table.py        |  14 +-
 .../redis/parallel-ingest-redis-serving.py    | 185 ------------------
 tests/e2e/setup.cfg                           |   2 +-
 .../{redis => src}/specifications/dev_ft.yaml |   0
 tests/e2e/src/test-register-ingest.py         | 158 +++++++++++++++
 12 files changed, 209 insertions(+), 222 deletions(-)
 delete mode 100644 tests/e2e/redis/parallel-ingest-redis-serving.py
 rename tests/e2e/{redis => src}/specifications/dev_ft.yaml (100%)
 create mode 100644 tests/e2e/src/test-register-ingest.py

diff --git a/Makefile b/Makefile
index f159ad624d..ad755d70d3 100644
--- a/Makefile
+++ b/Makefile
@@ -86,7 +86,7 @@ lint-python:
 	cd ${ROOT_DIR}/sdk/python; flake8 feast/ tests/
 	cd ${ROOT_DIR}/sdk/python; black --check feast tests
 
-	cd ${ROOT_DIR}/tests/e2e; mypy redis/
+	cd ${ROOT_DIR}/tests/e2e; mypy src/
 	cd ${ROOT_DIR}/tests/e2e; isort . --check-only
 	cd ${ROOT_DIR}/tests/e2e; flake8 .
 	cd ${ROOT_DIR}/tests/e2e; black --check .
diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index 083079a32b..1b67986742 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -103,7 +103,7 @@ cd tests/e2e
 
 set +e
 CORE_NO=$(nproc --all)
-pytest redis/parallel-ingest-redis-serving.py -n ${CORE_NO} --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+pytest src/* -n ${CORE_NO} --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/infra/scripts/test-end-to-end.sh b/infra/scripts/test-end-to-end.sh
index a7dadd5a1f..474c98de42 100755
--- a/infra/scripts/test-end-to-end.sh
+++ b/infra/scripts/test-end-to-end.sh
@@ -120,7 +120,7 @@ cd tests/e2e
 set +e
 export GOOGLE_APPLICATION_CREDENTIALS=/etc/gcloud/service-account.json
 CORE_NO=$(nproc --all)
-pytest redis/parallel-ingest-redis-serving.py -n ${CORE_NO} --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+pytest src/* -n ${CORE_NO} --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index bc21a6f2e4..d53ccd7599 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -68,7 +68,7 @@
 from feast.grpc.grpc import create_grpc_channel
 from feast.loaders.ingest import (
     BATCH_INGESTION_PRODUCTION_TIMEOUT,
-    check_field_mappings,
+    _check_field_mappings,
 )
 from feast.serving.ServingService_pb2 import GetFeastServingInfoRequest
 from feast.serving.ServingService_pb2_grpc import ServingServiceStub
@@ -625,7 +625,7 @@ def ingest(
             >>>     )
             >>> client.set_project("project1")
             >>>
-            >>> driver_ft = client.get_feature_table(name="driver")
+            >>> driver_ft = client.get_feature_table("driver")
             >>> client.ingest(driver_ft, ft_df)
         """
 
@@ -670,7 +670,7 @@ def ingest(
             )
 
         # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
-        check_field_mappings(
+        _check_field_mappings(
             column_names, name, feature_table.batch_source.field_mapping
         )
 
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index eafa4260ef..bb35eb534d 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, List, MutableMapping, Optional, Union
+from typing import Dict, List, MutableMapping, Optional
 
 import yaml
 from google.protobuf import json_format
@@ -20,6 +20,7 @@
 from google.protobuf.json_format import MessageToDict, MessageToJson
 from google.protobuf.timestamp_pb2 import Timestamp
 
+from feast.core.DataSource_pb2 import DataSource as DataSourceProto
 from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
 from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
 from feast.core.FeatureTable_pb2 import FeatureTableSpec as FeatureTableSpecProto
@@ -33,6 +34,7 @@
 )
 from feast.feature import Feature
 from feast.loaders import yaml as feast_yaml
+from feast.value_type import ValueType
 
 
 class FeatureTable:
@@ -43,8 +45,8 @@ class FeatureTable:
     def __init__(
         self,
         name: str,
-        entities: Union[str, List[str]],
-        features: Union[Feature, List[Feature]],
+        entities: List[str],
+        features: List[Feature],
         batch_source: Optional[DataSource] = None,
         stream_source: Optional[DataSource] = None,
         max_age: Optional[Duration] = None,
@@ -52,9 +54,19 @@ def __init__(
     ):
         self._name = name
         self._entities = entities
-        self._features = features
-        self._batch_source = batch_source
-        self._stream_source = stream_source
+        self._features = [
+            feature.to_proto() for feature in features if isinstance(feature, Feature)
+        ]
+        self._batch_source = (
+            batch_source.to_proto()
+            if isinstance(batch_source, DataSource)
+            else batch_source
+        )
+        self._stream_source = (
+            stream_source.to_proto()
+            if isinstance(stream_source, DataSource)
+            else stream_source
+        )
         if labels is None:
             self._labels = dict()  # type: MutableMapping[str, str]
         else:
@@ -141,7 +153,7 @@ def batch_source(self):
         return self._batch_source
 
     @batch_source.setter
-    def batch_source(self, batch_source: DataSource):
+    def batch_source(self, batch_source: DataSourceProto):
         """
         Sets the batch source of this feature table
         """
@@ -155,7 +167,7 @@ def stream_source(self):
         return self._stream_source
 
     @stream_source.setter
-    def stream_source(self, stream_source: DataSource):
+    def stream_source(self, stream_source: DataSourceProto):
         """
         Sets the stream source of this feature table
         """
@@ -322,7 +334,11 @@ def from_proto(cls, feature_table_proto: FeatureTableProto):
             name=feature_table_proto.spec.name,
             entities=[entity for entity in feature_table_proto.spec.entities],
             features=[
-                Feature.from_proto(feature).to_proto()
+                Feature(
+                    name=feature.name,
+                    dtype=ValueType(feature.value_type),
+                    labels=feature.labels,
+                )
                 for feature in feature_table_proto.spec.features
             ],
             labels=feature_table_proto.spec.labels,
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index 0d1c3e5e31..56bb839eb8 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -8,7 +8,7 @@
 BATCH_INGESTION_PRODUCTION_TIMEOUT = 120  # type: int
 
 
-def check_field_mappings(
+def _check_field_mappings(
     column_names: List[str],
     feature_table_name: str,
     feature_table_field_mappings: Dict[str, str],
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index c152d6d400..4964a84c96 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -39,7 +39,7 @@
 from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
 from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
 from feast.core.FeatureTable_pb2 import FeatureTableSpec as FeatureTableSpecProto
-from feast.data_source import DataSource, FileOptions, KafkaOptions, SourceType
+from feast.data_source import DataSource, FileOptions, KafkaOptions
 from feast.entity import Entity
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
@@ -457,7 +457,7 @@ def test_get_feature_table(self, mocked_client, mocker):
                         ],
                         entities=["my_entity_1"],
                         batch_source=DataSourceProto(
-                            type=SourceType(1).name,
+                            type="BATCH_FILE",
                             field_mapping={
                                 "ride_distance": "ride_distance",
                                 "ride_duration": "ride_duration",
@@ -498,7 +498,7 @@ def test_list_feature_tables(self, mocked_client, mocker):
         )
 
         batch_source = DataSourceProto(
-            type=SourceType(1).name,
+            type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -569,7 +569,7 @@ def test_apply_feature_table_success(self, test_client):
 
         # Create Feature Tables
         batch_source = DataSource(
-            type=SourceType(1).name,
+            type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -580,7 +580,7 @@ def test_apply_feature_table_success(self, test_client):
         )
 
         stream_source = DataSource(
-            type=SourceType(3).name,
+            type="STREAM_KAFKA",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -596,17 +596,15 @@ def test_apply_feature_table_success(self, test_client):
         ft1 = FeatureTable(
             name="my-feature-table-1",
             features=[
-                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64).to_proto(),
-                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING).to_proto(),
-                Feature(
-                    name="fs1-my-feature-3", dtype=ValueType.STRING_LIST
-                ).to_proto(),
-                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST).to_proto(),
+                Feature(name="fs1-my-feature-1", dtype=ValueType.INT64),
+                Feature(name="fs1-my-feature-2", dtype=ValueType.STRING),
+                Feature(name="fs1-my-feature-3", dtype=ValueType.STRING_LIST),
+                Feature(name="fs1-my-feature-4", dtype=ValueType.BYTES_LIST),
             ],
             entities=["fs1-my-entity-1"],
             labels={"team": "matchmaking"},
-            batch_source=batch_source.to_proto(),
-            stream_source=stream_source.to_proto(),
+            batch_source=batch_source,
+            stream_source=stream_source,
         )
 
         # Register Feature Table with Core
diff --git a/sdk/python/tests/test_feature_table.py b/sdk/python/tests/test_feature_table.py
index 8a1059bcb6..8d9891e67d 100644
--- a/sdk/python/tests/test_feature_table.py
+++ b/sdk/python/tests/test_feature_table.py
@@ -21,7 +21,7 @@
 
 from feast.client import Client
 from feast.core import CoreService_pb2_grpc as Core
-from feast.data_source import DataSource, FileOptions, KafkaOptions, SourceType
+from feast.data_source import DataSource, FileOptions, KafkaOptions
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
 from feast.value_type import ValueType
@@ -55,7 +55,7 @@ def client(self, server):
     def test_feature_table_import_export_yaml(self):
 
         batch_source = DataSource(
-            type=SourceType(1).name,
+            type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -66,7 +66,7 @@ def test_feature_table_import_export_yaml(self):
         )
 
         stream_source = DataSource(
-            type=SourceType(3).name,
+            type="STREAM_KAFKA",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -82,13 +82,13 @@ def test_feature_table_import_export_yaml(self):
         test_feature_table = FeatureTable(
             name="car_driver",
             features=[
-                Feature(name="ride_distance", dtype=ValueType.FLOAT).to_proto(),
-                Feature(name="ride_duration", dtype=ValueType.STRING).to_proto(),
+                Feature(name="ride_distance", dtype=ValueType.FLOAT),
+                Feature(name="ride_duration", dtype=ValueType.STRING),
             ],
             entities=["car_driver_entity"],
             labels={"team": "matchmaking"},
-            batch_source=batch_source.to_proto(),
-            stream_source=stream_source.to_proto(),
+            batch_source=batch_source,
+            stream_source=stream_source,
         )
 
         # Create a string YAML representation of the feature table
diff --git a/tests/e2e/redis/parallel-ingest-redis-serving.py b/tests/e2e/redis/parallel-ingest-redis-serving.py
deleted file mode 100644
index 53967aaa74..0000000000
--- a/tests/e2e/redis/parallel-ingest-redis-serving.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import os
-import uuid
-from datetime import datetime
-
-import pytest
-from google.protobuf.duration_pb2 import Duration
-
-from feast.client import Client
-from feast.data_source import DataSource, FileOptions, SourceType
-from feast.entity import Entity
-from feast.feature import Feature
-from feast.feature_table import FeatureTable
-from feast.value_type import ValueType
-
-DIR_PATH = os.path.dirname(os.path.realpath(__file__))
-PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
-
-
-@pytest.fixture(scope="module")
-def client(pytestconfig):
-    core_url = pytestconfig.getoption("core_url")
-    serving_url = pytestconfig.getoption("serving_url")
-
-    client = Client(core_url=core_url, serving_url=serving_url,)
-
-    client.set_project(PROJECT_NAME)
-
-    return client
-
-
-@pytest.mark.incremental
-class TestBasicIngestionRetrieval:
-    def setup_class(cls):
-        prefix = "basic_ingestion"
-        suffix = str(int(datetime.now().timestamp()))
-        cls.customer_ft_name = f"{prefix}_customer_{suffix}"
-        cls.driver_ft_name = f"{prefix}_driver_{suffix}"
-
-        cls.customer_entity = Entity(
-            name="customer_id",
-            description="Customer entity for rides",
-            value_type=ValueType.STRING,
-            labels={"team": "customer_service", "common_key": "common_val"},
-        )
-
-        cls.driver_entity = Entity(
-            name="driver_id",
-            description="Driver entity for car rides",
-            value_type=ValueType.STRING,
-            labels={"team": "matchmaking", "common_key": "common_val"},
-        )
-
-        cls.basic_ft_spec = FeatureTable.from_yaml(
-            f"{DIR_PATH}/specifications/dev_ft.yaml"
-        )
-
-    def test_discovery(self, client):
-
-        # ApplyEntity
-        client.apply_entity(self.customer_entity)
-        client.apply_entity(self.driver_entity)
-
-        # GetEntity Check
-        assert client.get_entity(name="customer_id") == self.customer_entity
-        assert client.get_entity(name="driver_id") == self.driver_entity
-
-        # ListEntities Check
-        common_filtering_labels = {"common_key": "common_val"}
-        matchmaking_filtering_labels = {"team": "matchmaking"}
-
-        actual_common_entities = client.list_entities(labels=common_filtering_labels)
-        actual_matchmaking_entities = client.list_entities(
-            labels=matchmaking_filtering_labels
-        )
-        assert len(actual_common_entities) == 2
-        assert len(actual_matchmaking_entities) == 1
-
-        # ApplyFeatureTable
-        client.apply_feature_table(self.basic_ft_spec, PROJECT_NAME)
-
-        # GetFeatureTable Check
-        actual_get_feature_table = client.get_feature_table(name="dev_featuretable")
-        assert actual_get_feature_table == self.basic_ft_spec
-
-        # ListFeatureTables Check
-        actual_list_feature_table = [
-            ft for ft in client.list_feature_tables() if ft.name == "dev_featuretable"
-        ][0]
-        assert actual_list_feature_table == self.basic_ft_spec
-
-    def test_basic_retrieval(self, client):
-        # TODO: Add ingest and retrieval check
-        pass
-
-
-@pytest.mark.incremental
-class TestAllTypesIngestionRetrieval:
-    def setup_class(cls):
-        prefix = "alltypes_ingestion"
-        suffix = str(int(datetime.now().timestamp()))
-        batch_source = DataSource(
-            type=SourceType(1).name,
-            field_mapping={
-                "ride_distance": "ride_distance",
-                "ride_duration": "ride_duration",
-            },
-            options=FileOptions(file_format="parquet", file_url="file://feast/*"),
-            timestamp_column="ts_col",
-            date_partition_column="date_partition_col",
-        )
-
-        cls.alltypes_entity = Entity(
-            name="alltypes_id",
-            description="Driver entity for car rides",
-            value_type=ValueType.STRING,
-            labels={"cat": "alltypes"},
-        )
-
-        cls.alltypes_ft_name = f"{prefix}_alltypes_{suffix}"
-        cls.alltypes_ft_spec = FeatureTable(
-            name="alltypes",
-            entities=["alltypes_id"],
-            features=[
-                Feature(name="float_feature", dtype=ValueType.FLOAT).to_proto(),
-                Feature(name="int64_feature", dtype=ValueType.INT64).to_proto(),
-                Feature(name="int32_feature", dtype=ValueType.INT32).to_proto(),
-                Feature(name="string_feature", dtype=ValueType.STRING).to_proto(),
-                Feature(name="bytes_feature", dtype=ValueType.BYTES).to_proto(),
-                Feature(name="bool_feature", dtype=ValueType.BOOL).to_proto(),
-                Feature(name="double_feature", dtype=ValueType.DOUBLE).to_proto(),
-                Feature(
-                    name="double_list_feature", dtype=ValueType.DOUBLE_LIST
-                ).to_proto(),
-                Feature(
-                    name="float_list_feature", dtype=ValueType.FLOAT_LIST
-                ).to_proto(),
-                Feature(
-                    name="int64_list_feature", dtype=ValueType.INT64_LIST
-                ).to_proto(),
-                Feature(
-                    name="int32_list_feature", dtype=ValueType.INT32_LIST
-                ).to_proto(),
-                Feature(
-                    name="string_list_feature", dtype=ValueType.STRING_LIST
-                ).to_proto(),
-                Feature(
-                    name="bytes_list_feature", dtype=ValueType.BYTES_LIST
-                ).to_proto(),
-                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST).to_proto(),
-            ],
-            max_age=Duration(seconds=3600),
-            batch_source=batch_source.to_proto(),
-            labels={"cat": "alltypes"},
-        )
-
-    def test_discovery(self, client):
-        # ApplyEntity
-        client.apply_entity(self.alltypes_entity)
-
-        # GetEntity Check
-        assert client.get_entity(name="alltypes_id") == self.alltypes_entity
-
-        # ListEntities Check
-        alltypes_filtering_labels = {"cat": "alltypes"}
-        actual_alltypes_entities = client.list_entities(
-            labels=alltypes_filtering_labels
-        )
-        assert len(actual_alltypes_entities) == 1
-
-        # ApplyFeatureTable
-        client.apply_feature_table(self.alltypes_ft_spec, PROJECT_NAME)
-
-        # GetFeatureTable Check
-        actual_get_feature_table = client.get_feature_table(name="alltypes")
-        assert actual_get_feature_table == self.alltypes_ft_spec
-
-        # ListFeatureTables Check
-        actual_list_feature_table = [
-            ft for ft in client.list_feature_tables() if ft.name == "alltypes"
-        ][0]
-        assert actual_list_feature_table == self.alltypes_ft_spec
-
-    def test_alltypes_retrieval(self, client):
-        # TODO: Add ingest and retrieval check
-        pass
diff --git a/tests/e2e/setup.cfg b/tests/e2e/setup.cfg
index 2e0bf6860b..0c3d9bee74 100644
--- a/tests/e2e/setup.cfg
+++ b/tests/e2e/setup.cfg
@@ -14,5 +14,5 @@ max-complexity = 20
 select = B,C,E,F,W,T4
 
 [mypy]
-files=bq,redis
+files=src
 ignore_missing_imports=true
\ No newline at end of file
diff --git a/tests/e2e/redis/specifications/dev_ft.yaml b/tests/e2e/src/specifications/dev_ft.yaml
similarity index 100%
rename from tests/e2e/redis/specifications/dev_ft.yaml
rename to tests/e2e/src/specifications/dev_ft.yaml
diff --git a/tests/e2e/src/test-register-ingest.py b/tests/e2e/src/test-register-ingest.py
new file mode 100644
index 0000000000..ef3ac2c7ce
--- /dev/null
+++ b/tests/e2e/src/test-register-ingest.py
@@ -0,0 +1,158 @@
+import os
+import uuid
+
+import pytest
+from google.protobuf.duration_pb2 import Duration
+
+from feast.client import Client
+from feast.data_source import DataSource, FileOptions
+from feast.entity import Entity
+from feast.feature import Feature
+from feast.feature_table import FeatureTable
+from feast.value_type import ValueType
+
+DIR_PATH = os.path.dirname(os.path.realpath(__file__))
+PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
+
+
+@pytest.fixture(scope="module")
+def client(pytestconfig):
+    core_url = pytestconfig.getoption("core_url")
+    serving_url = pytestconfig.getoption("serving_url")
+
+    client = Client(core_url=core_url, serving_url=serving_url,)
+
+    client.set_project(PROJECT_NAME)
+
+    return client
+
+
+@pytest.fixture
+def customer_entity():
+    return Entity(
+        name="customer_id",
+        description="Customer entity for rides",
+        value_type=ValueType.STRING,
+        labels={"team": "customer_service", "common_key": "common_val"},
+    )
+
+
+@pytest.fixture
+def driver_entity():
+    return Entity(
+        name="driver_id",
+        description="Driver entity for car rides",
+        value_type=ValueType.STRING,
+        labels={"team": "matchmaking", "common_key": "common_val"},
+    )
+
+
+@pytest.fixture
+def alltypes_entity():
+    return Entity(
+        name="alltypes_id",
+        description="Driver entity for car rides",
+        value_type=ValueType.STRING,
+        labels={"cat": "alltypes"},
+    )
+
+
+@pytest.fixture
+def alltypes_featuretable():
+    batch_source = DataSource(
+        type="BATCH_FILE",
+        field_mapping={
+            "ride_distance": "ride_distance",
+            "ride_duration": "ride_duration",
+        },
+        options=FileOptions(file_format="parquet", file_url="file://feast/*"),
+        timestamp_column="ts_col",
+        date_partition_column="date_partition_col",
+    )
+    return FeatureTable(
+        name="alltypes",
+        entities=["alltypes_id"],
+        features=[
+            Feature(name="float_feature", dtype=ValueType.FLOAT),
+            Feature(name="int64_feature", dtype=ValueType.INT64),
+            Feature(name="int32_feature", dtype=ValueType.INT32),
+            Feature(name="string_feature", dtype=ValueType.STRING),
+            Feature(name="bytes_feature", dtype=ValueType.BYTES),
+            Feature(name="bool_feature", dtype=ValueType.BOOL),
+            Feature(name="double_feature", dtype=ValueType.DOUBLE),
+            Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
+            Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
+            Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
+            Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
+            Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
+            Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
+            Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
+        ],
+        max_age=Duration(seconds=3600),
+        batch_source=batch_source,
+        labels={"cat": "alltypes"},
+    )
+
+
+def test_get_list_basic(client: Client, customer_entity: Entity, driver_entity: Entity):
+    basic_ft_spec = FeatureTable.from_yaml(f"{DIR_PATH}/specifications/dev_ft.yaml")
+
+    # ApplyEntity
+    client.apply_entity(customer_entity)
+    client.apply_entity(driver_entity)
+
+    # GetEntity Check
+    assert client.get_entity(name="customer_id") == customer_entity
+    assert client.get_entity(name="driver_id") == driver_entity
+
+    # ListEntities Check
+    common_filtering_labels = {"common_key": "common_val"}
+    matchmaking_filtering_labels = {"team": "matchmaking"}
+
+    actual_common_entities = client.list_entities(labels=common_filtering_labels)
+    actual_matchmaking_entities = client.list_entities(
+        labels=matchmaking_filtering_labels
+    )
+    assert len(actual_common_entities) == 2
+    assert len(actual_matchmaking_entities) == 1
+
+    # ApplyFeatureTable
+    client.apply_feature_table(basic_ft_spec)
+
+    # GetFeatureTable Check
+    actual_get_feature_table = client.get_feature_table(name="dev_featuretable")
+    assert actual_get_feature_table == basic_ft_spec
+
+    # ListFeatureTables Check
+    actual_list_feature_table = [
+        ft for ft in client.list_feature_tables() if ft.name == "dev_featuretable"
+    ][0]
+    assert actual_list_feature_table == basic_ft_spec
+
+
+def test_get_list_alltypes(
+    client: Client, alltypes_entity: Entity, alltypes_featuretable: FeatureTable
+):
+    # ApplyEntity
+    client.apply_entity(alltypes_entity)
+
+    # GetEntity Check
+    assert client.get_entity(name="alltypes_id") == alltypes_entity
+
+    # ListEntities Check
+    alltypes_filtering_labels = {"cat": "alltypes"}
+    actual_alltypes_entities = client.list_entities(labels=alltypes_filtering_labels)
+    assert len(actual_alltypes_entities) == 1
+
+    # ApplyFeatureTable
+    client.apply_feature_table(alltypes_featuretable)
+
+    # GetFeatureTable Check
+    actual_get_feature_table = client.get_feature_table(name="alltypes")
+    assert actual_get_feature_table == alltypes_featuretable
+
+    # ListFeatureTables Check
+    actual_list_feature_table = [
+        ft for ft in client.list_feature_tables() if ft.name == "alltypes"
+    ][0]
+    assert actual_list_feature_table == alltypes_featuretable

From c917762e7e35f8b2532ebe22a1424e10ee8b228a Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 18:47:46 +0800
Subject: [PATCH 13/34] Remove src folder for e2e tests

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 Makefile                                       | 2 +-
 tests/e2e/setup.cfg                            | 1 -
 tests/e2e/{src => }/specifications/dev_ft.yaml | 0
 tests/e2e/{src => }/test-register-ingest.py    | 0
 4 files changed, 1 insertion(+), 2 deletions(-)
 rename tests/e2e/{src => }/specifications/dev_ft.yaml (100%)
 rename tests/e2e/{src => }/test-register-ingest.py (100%)

diff --git a/Makefile b/Makefile
index ad755d70d3..85f3fd53e6 100644
--- a/Makefile
+++ b/Makefile
@@ -86,7 +86,7 @@ lint-python:
 	cd ${ROOT_DIR}/sdk/python; flake8 feast/ tests/
 	cd ${ROOT_DIR}/sdk/python; black --check feast tests
 
-	cd ${ROOT_DIR}/tests/e2e; mypy src/
+	cd ${ROOT_DIR}/tests/e2e; mypy .
 	cd ${ROOT_DIR}/tests/e2e; isort . --check-only
 	cd ${ROOT_DIR}/tests/e2e; flake8 .
 	cd ${ROOT_DIR}/tests/e2e; black --check .
diff --git a/tests/e2e/setup.cfg b/tests/e2e/setup.cfg
index 0c3d9bee74..3026e38be1 100644
--- a/tests/e2e/setup.cfg
+++ b/tests/e2e/setup.cfg
@@ -14,5 +14,4 @@ max-complexity = 20
 select = B,C,E,F,W,T4
 
 [mypy]
-files=src
 ignore_missing_imports=true
\ No newline at end of file
diff --git a/tests/e2e/src/specifications/dev_ft.yaml b/tests/e2e/specifications/dev_ft.yaml
similarity index 100%
rename from tests/e2e/src/specifications/dev_ft.yaml
rename to tests/e2e/specifications/dev_ft.yaml
diff --git a/tests/e2e/src/test-register-ingest.py b/tests/e2e/test-register-ingest.py
similarity index 100%
rename from tests/e2e/src/test-register-ingest.py
rename to tests/e2e/test-register-ingest.py

From e4af10107d3d160fef771ec103ecda008cc2f280 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 20:34:57 +0800
Subject: [PATCH 14/34] Address PR comments

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 infra/scripts/test-docker-compose.sh           | 2 +-
 infra/scripts/test-end-to-end-redis-cluster.sh | 2 +-
 infra/scripts/test-end-to-end.sh               | 2 +-
 sdk/python/feast/cli.py                        | 6 ++++++
 sdk/python/feast/client.py                     | 6 ++----
 sdk/python/feast/loaders/ingest.py             | 4 +++-
 6 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/infra/scripts/test-docker-compose.sh b/infra/scripts/test-docker-compose.sh
index 173c796eb0..d669f3b655 100755
--- a/infra/scripts/test-docker-compose.sh
+++ b/infra/scripts/test-docker-compose.sh
@@ -63,4 +63,4 @@ export FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .N
 ${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS}:6566 --timeout=120
 
 # Run e2e tests for Redis
-docker exec feast_jupyter_1 bash -c 'cd /feast/tests/e2e/redis && pytest -n 1 --dist=loadscope parallel-ingest-redis-serving.py --core_url core:6565 --serving_url=online_serving:6566 --kafka_brokers=kafka:9092'
+docker exec feast_jupyter_1 bash -c 'cd /feast/tests/e2e && pytest *.py --core_url core:6565 --serving_url=online_serving:6566 --kafka_brokers=kafka:9092'
diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index 1b67986742..0e5aa5879a 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -103,7 +103,7 @@ cd tests/e2e
 
 set +e
 CORE_NO=$(nproc --all)
-pytest src/* -n ${CORE_NO} --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+pytest *.py -n ${CORE_NO} --dist=loadscope --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/infra/scripts/test-end-to-end.sh b/infra/scripts/test-end-to-end.sh
index 474c98de42..51b55b1763 100755
--- a/infra/scripts/test-end-to-end.sh
+++ b/infra/scripts/test-end-to-end.sh
@@ -120,7 +120,7 @@ cd tests/e2e
 set +e
 export GOOGLE_APPLICATION_CREDENTIALS=/etc/gcloud/service-account.json
 CORE_NO=$(nproc --all)
-pytest src/* -n ${CORE_NO} --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
+pytest *.py -n ${CORE_NO} --dist=loadscope --enable_auth=${ENABLE_AUTH} --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml
 TEST_EXIT_CODE=$?
 
 if [[ ${TEST_EXIT_CODE} != 0 ]]; then
diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py
index f041b4c12f..788541ad2e 100644
--- a/sdk/python/feast/cli.py
+++ b/sdk/python/feast/cli.py
@@ -215,6 +215,12 @@ def feature_table():
 def _get_labels_dict(label_str: str) -> Dict[str, str]:
     """
     Converts CLI input labels string to dictionary format if provided string is valid.
+
+    Args:
+        label_str: A comma-separated string of key-value pairs
+
+    Returns:
+        Dict of key-value label pairs
     """
     labels_dict: Dict[str, str] = {}
     labels_kv = label_str.split(",")
diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index d53ccd7599..8d4d4b455a 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -588,11 +588,11 @@ def ingest(
         timeout: int = BATCH_INGESTION_PRODUCTION_TIMEOUT,
     ) -> None:
         """
-        Batch load feature data into batch source of a specific feature table.
+        Batch load feature data into a FeatureTable.
 
         Args:
             feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
-                Feature table object or the string name of the feature table
+                FeatureTable object or the string name of the feature table
 
             source (typing.Union[pd.DataFrame, str]):
                 Either a file path or Pandas Dataframe to ingest into Feast
@@ -634,13 +634,11 @@ def ingest(
         if isinstance(feature_table, FeatureTable):
             name = feature_table.name
 
-        # Read table and get row count
         dir_path, dest_path, column_names = _read_table_from_source(
             source, chunk_size, max_workers
         )
 
         current_time = time.time()
-        print("Waiting for feature table to be ready for ingestion...")
         while True:
             if timeout is not None and time.time() - current_time >= timeout:
                 raise TimeoutError("Timed out waiting for feature table to be ready")
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index 56bb839eb8..14ed290f54 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -28,7 +28,9 @@ def _check_field_mappings(
             f'Provided data source does not contain entity "datetime" in columns {column_names}'
         )
 
-    specified_field_mappings = [v for k, v in feature_table_field_mappings.items()]
+    specified_field_mappings = list()
+    for k, v in feature_table_field_mappings.items():
+        specified_field_mappings.append(v)
 
     is_valid = all(col_name in column_names for col_name in specified_field_mappings)
 

From 69bda0290871cd1c48d9a4b972d9a3e537fe77d7 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 22:58:25 +0800
Subject: [PATCH 15/34] Expose Source instead of Options native class

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/__init__.py           |  16 +-
 sdk/python/feast/client.py             |  21 +-
 sdk/python/feast/data_source.py        | 268 ++++++++++++++++++-------
 sdk/python/feast/feature_table.py      |  54 ++---
 sdk/python/tests/test_client.py        |  17 +-
 sdk/python/tests/test_feature_table.py |  17 +-
 6 files changed, 260 insertions(+), 133 deletions(-)

diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py
index 298b8ac975..adf2aaf181 100644
--- a/sdk/python/feast/__init__.py
+++ b/sdk/python/feast/__init__.py
@@ -2,11 +2,11 @@
 
 from .client import Client
 from .data_source import (
-    BigQueryOptions,
+    BigQuerySource,
     DataSource,
-    FileOptions,
-    KafkaOptions,
-    KinesisOptions,
+    FileSource,
+    KafkaSource,
+    KinesisSource,
     SourceType,
 )
 from .entity import Entity
@@ -24,10 +24,10 @@
     "Client",
     "Entity",
     "DataSource",
-    "BigQueryOptions",
-    "FileOptions",
-    "KafkaOptions",
-    "KinesisOptions",
+    "BigQuerySource",
+    "FileSource",
+    "KafkaSource",
+    "KinesisSource",
     "Feature",
     "FeatureTable",
     "SourceType",
diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 8d4d4b455a..283f878074 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -638,20 +638,13 @@ def ingest(
             source, chunk_size, max_workers
         )
 
-        current_time = time.time()
-        while True:
-            if timeout is not None and time.time() - current_time >= timeout:
-                raise TimeoutError("Timed out waiting for feature table to be ready")
-            fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
-                name, project
-            )
-            if fetched_feature_table is not None:
-                feature_table = fetched_feature_table
-                break
-            time.sleep(3)
-
-        if timeout is not None:
-            timeout = timeout - int(time.time() - current_time)
+        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
+            name, project
+        )
+        if fetched_feature_table is not None:
+            feature_table = fetched_feature_table
+        else:
+            raise Exception(f"FeatureTable, {name} cannot be found.")
 
         # Check 1) Only parquet file format for FeatureTable batch source is supported
         if (
diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 59020f8ec9..38e9e695e0 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -14,7 +14,7 @@
 
 
 import enum
-from typing import Dict, Optional, Union
+from typing import Dict, Optional
 
 from feast.core.DataSource_pb2 import DataSource as DataSourceProto
 
@@ -354,13 +354,11 @@ def __init__(
         self,
         type: str,
         field_mapping: Dict[str, str],
-        options: Union[BigQueryOptions, FileOptions, KafkaOptions, KinesisOptions],
         timestamp_column: str,
         date_partition_column: Optional[str] = "",
     ):
         self._type = type
         self._field_mapping = field_mapping
-        self._options = options
         self._timestamp_column = timestamp_column
         self._date_partition_column = date_partition_column
 
@@ -392,20 +390,6 @@ def field_mapping(self, field_mapping):
         """
         self._field_mapping = field_mapping
 
-    @property
-    def options(self):
-        """
-        Returns the options of this data source
-        """
-        return self._options
-
-    @options.setter
-    def options(self, options):
-        """
-        Sets the options of this data source
-        """
-        self._options = options
-
     @property
     def timestamp_column(self):
         """
@@ -437,31 +421,50 @@ def date_partition_column(self, date_partition_column):
     @classmethod
     def from_proto(cls, data_source_proto: DataSourceProto):
         """
-        Creates a DataSource from a protobuf representation of an data source
+        Creates a DataSource from a protobuf representation of a data source
+        """
+        raise NotImplementedError
 
-        Args:
-            data_source_proto: A protobuf representation of a DataSource
+    def to_proto(self) -> DataSourceProto:
+        """
+        Converts an DataSourceProto object to its protobuf representation.
+        """
+        raise NotImplementedError
 
-        Returns:
-            Returns a DataSource object based on the data_source protobuf
-        """
-
-        if isinstance(cls.options, FileOptions):
-            data_source = cls(file_options=data_source_proto.options,)
-        if isinstance(cls.options, BigQueryOptions):
-            data_source = cls(bigquery_options=data_source_proto.options,)
-        if isinstance(cls.options, KafkaOptions):
-            data_source = cls(kafka_options=data_source_proto.options,)
-        if isinstance(cls.options, KinesisOptions):
-            data_source = cls(kinesis_options=data_source_proto.options,)
-        else:
-            raise TypeError(
-                "DataSource.from_proto: Provided DataSource option is invalid. Only FileOptions, BigQueryOptions, KafkaOptions and KinesisOptions are supported currently."
-            )
+
+class FileSource(DataSource):
+    def __init__(
+        self,
+        type,
+        field_mapping,
+        timestamp_column,
+        file_format,
+        file_url,
+        date_partition_column="",
+    ):
+        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        self._file_options = FileOptions(file_format=file_format, file_url=file_url)
+
+    @property
+    def file_options(self):
+        """
+        Returns the file options of this data source
+        """
+        return self._file_options
+
+    @file_options.setter
+    def file_options(self, file_options):
+        """
+        Sets the file options of this data source
+        """
+        self._file_options = file_options
+
+    def from_proto(cls, data_source_proto):
 
         data_source = cls(
             type=data_source_proto.type,
             field_mapping=data_source_proto.field_mapping,
+            file_options=cls.file_options,
             timestamp_column=data_source_proto.timestamp_column,
             date_partition_column=data_source_proto.date_partition_column,
         )
@@ -469,42 +472,167 @@ def from_proto(cls, data_source_proto: DataSourceProto):
         return data_source
 
     def to_proto(self) -> DataSourceProto:
+        data_source_proto = DataSourceProto(
+            type=self.type,
+            field_mapping=self.field_mapping,
+            file_options=self.file_options.to_proto(),
+        )
+
+        data_source_proto.timestamp_column = self.timestamp_column
+        data_source_proto.date_partition_column = self.date_partition_column
+
+        return data_source_proto
+
+
+class BigQuerySource(DataSource):
+    def __init__(
+        self, type, field_mapping, timestamp_column, table_ref, date_partition_column=""
+    ):
+        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        self._bigquery_options = BigQueryOptions(table_ref=table_ref,)
+
+    @property
+    def bigquery_options(self):
         """
-        Converts an DataSourceProto object to its protobuf representation.
-        Used when passing DataSourceProto object to Feast request.
+        Returns the bigquery options of this data source
+        """
+        return self._bigquery_options
 
-        Returns:
-            DataSourceProto protobuf
-        """
-
-        if isinstance(self.options, FileOptions):
-            data_source_proto = DataSourceProto(
-                type=self.type,
-                field_mapping=self.field_mapping,
-                file_options=self.options.to_proto(),
-            )
-        elif isinstance(self.options, BigQueryOptions):
-            data_source_proto = DataSourceProto(
-                type=self.type,
-                field_mapping=self.field_mapping,
-                bigquery_options=self.options.to_proto(),
-            )
-        elif isinstance(self.options, KafkaOptions):
-            data_source_proto = DataSourceProto(
-                type=self.type,
-                field_mapping=self.field_mapping,
-                kafka_options=self.options.to_proto(),
-            )
-        elif isinstance(self.options, KinesisOptions):
-            data_source_proto = DataSourceProto(
-                type=self.type,
-                field_mapping=self.field_mapping,
-                kinesis_options=self.options.to_proto(),
-            )
-        else:
-            raise TypeError(
-                "DataSource.to_proto: Provided DataSource option is invalid. Only FileOptions, BigQueryOptions, KafkaOptions and KinesisOptions are supported currently."
-            )
+    @bigquery_options.setter
+    def bigquery_options(self, bigquery_options):
+        """
+        Sets the bigquery options of this data source
+        """
+        self._bigquery_options = bigquery_options
+
+    def from_proto(cls, data_source_proto):
+
+        data_source = cls(
+            type=data_source_proto.type,
+            field_mapping=data_source_proto.field_mapping,
+            bigquery_options=cls.bigquery_options,
+            timestamp_column=data_source_proto.timestamp_column,
+            date_partition_column=data_source_proto.date_partition_column,
+        )
+
+        return data_source
+
+    def to_proto(self) -> DataSourceProto:
+        data_source_proto = DataSourceProto(
+            type=self.type,
+            field_mapping=self.field_mapping,
+            bigquery_options=self.bigquery_options.to_proto(),
+        )
+
+        data_source_proto.timestamp_column = self.timestamp_column
+        data_source_proto.date_partition_column = self.date_partition_column
+
+        return data_source_proto
+
+
+class KafkaSource(DataSource):
+    def __init__(
+        self,
+        type,
+        field_mapping,
+        timestamp_column,
+        bootstrap_servers,
+        class_path,
+        topic,
+        date_partition_column="",
+    ):
+        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        self._kafka_options = KafkaOptions(
+            bootstrap_servers=bootstrap_servers, class_path=class_path, topic=topic
+        )
+
+    @property
+    def kafka_options(self):
+        """
+        Returns the kafka options of this data source
+        """
+        return self._kafka_options
+
+    @kafka_options.setter
+    def kafka_options(self, kafka_options):
+        """
+        Sets the kafka options of this data source
+        """
+        self._kafka_options = kafka_options
+
+    def from_proto(cls, data_source_proto):
+
+        data_source = cls(
+            type=data_source_proto.type,
+            field_mapping=data_source_proto.field_mapping,
+            kafka_options=cls.kafka_options,
+            timestamp_column=data_source_proto.timestamp_column,
+            date_partition_column=data_source_proto.date_partition_column,
+        )
+
+        return data_source
+
+    def to_proto(self) -> DataSourceProto:
+        data_source_proto = DataSourceProto(
+            type=self.type,
+            field_mapping=self.field_mapping,
+            kafka_options=self.kafka_options.to_proto(),
+        )
+
+        data_source_proto.timestamp_column = self.timestamp_column
+        data_source_proto.date_partition_column = self.date_partition_column
+
+        return data_source_proto
+
+
+class KinesisSource(DataSource):
+    def __init__(
+        self,
+        type,
+        field_mapping,
+        timestamp_column,
+        class_path,
+        region,
+        stream_name,
+        date_partition_column="",
+    ):
+        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        self._kinesis_options = KinesisOptions(
+            class_path=class_path, region=region, stream_name=stream_name
+        )
+
+    @property
+    def kinesis_options(self):
+        """
+        Returns the kinesis options of this data source
+        """
+        return self._kinesis_options
+
+    @kinesis_options.setter
+    def kinesis_options(self, kinesis_options):
+        """
+        Sets the kinesis options of this data source
+        """
+        self._kinesis_options = kinesis_options
+
+    def from_proto(cls, data_source_proto):
+
+        data_source = cls(
+            type=data_source_proto.type,
+            field_mapping=data_source_proto.field_mapping,
+            kinesis_options=cls.kinesis_options,
+            timestamp_column=data_source_proto.timestamp_column,
+            date_partition_column=data_source_proto.date_partition_column,
+        )
+
+        return data_source
+
+    def to_proto(self) -> DataSourceProto:
+        data_source_proto = DataSourceProto(
+            type=self.type,
+            field_mapping=self.field_mapping,
+            kinesis_options=self.kinesis_options.to_proto(),
+        )
 
         data_source_proto.timestamp_column = self.timestamp_column
         data_source_proto.date_partition_column = self.date_partition_column
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index bb35eb534d..b1f722c40d 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, List, MutableMapping, Optional
+from typing import Dict, List, MutableMapping, Optional, Union
 
 import yaml
 from google.protobuf import json_format
@@ -25,11 +25,11 @@
 from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
 from feast.core.FeatureTable_pb2 import FeatureTableSpec as FeatureTableSpecProto
 from feast.data_source import (
-    BigQueryOptions,
+    BigQuerySource,
     DataSource,
-    FileOptions,
-    KafkaOptions,
-    KinesisOptions,
+    FileSource,
+    KafkaSource,
+    KinesisSource,
     SourceType,
 )
 from feast.feature import Feature
@@ -47,8 +47,8 @@ def __init__(
         name: str,
         entities: List[str],
         features: List[Feature],
-        batch_source: Optional[DataSource] = None,
-        stream_source: Optional[DataSource] = None,
+        batch_source: Union[BigQuerySource, FileSource] = None,
+        stream_source: Optional[Union[KafkaSource, KinesisSource]] = None,
         max_age: Optional[Duration] = None,
         labels: Optional[MutableMapping[str, str]] = None,
     ):
@@ -275,47 +275,55 @@ def _to_data_source(cls, data_source):
             and data_source.file_options.file_format
             and data_source.file_options.file_url
         ):
-            data_source_options = FileOptions(
+            data_source_proto = FileSource(
+                type=data_source.type,
+                field_mapping=data_source.field_mapping,
                 file_format=data_source.file_options.file_format,
                 file_url=data_source.file_options.file_url,
-            )
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            ).to_proto()
         elif source_type == "BATCH_BIGQUERY" and data_source.bigquery_options.table_ref:
-            data_source_options = BigQueryOptions(
+            data_source_proto = BigQuerySource(
+                type=data_source.type,
+                field_mapping=data_source.field_mapping,
                 table_ref=data_source.bigquery_options.table_ref,
-            )
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            ).to_proto()
         elif (
             source_type == "STREAM_KAFKA"
             and data_source.kafka_options.bootstrap_servers
             and data_source.kafka_options.topic
             and data_source.kafka_options.class_path
         ):
-            data_source_options = KafkaOptions(
+            data_source_proto = KafkaSource(
+                type=data_source.type,
+                field_mapping=data_source.field_mapping,
                 bootstrap_servers=data_source.kafka_options.bootstrap_servers,
                 class_path=data_source.kafka_options.class_path,
                 topic=data_source.kafka_options.topic,
-            )
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            ).to_proto()
         elif (
             source_type == "STREAM_KINESIS"
             and data_source.kinesis_options.class_path
             and data_source.kinesis_options.region
             and data_source.kinesis_options.stream_name
         ):
-            data_source_options = KinesisOptions(
+            data_source_proto = KinesisSource(
+                type=data_source.type,
+                field_mapping=data_source.field_mapping,
                 class_path=data_source.kinesis_options.class_path,
                 region=data_source.kinesis_options.region,
                 stream_name=data_source.kinesis_options.stream_name,
-            )
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            ).to_proto()
         else:
             raise ValueError("Could not identify the source type being added")
 
-        data_source_proto = DataSource(
-            type=data_source.type,
-            field_mapping=data_source.field_mapping,
-            options=data_source_options,
-            timestamp_column=data_source.timestamp_column,
-            date_partition_column=data_source.date_partition_column,
-        ).to_proto()
-
         return data_source_proto
 
     @classmethod
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index 4964a84c96..cd83d10fcd 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -39,7 +39,7 @@
 from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
 from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
 from feast.core.FeatureTable_pb2 import FeatureTableSpec as FeatureTableSpecProto
-from feast.data_source import DataSource, FileOptions, KafkaOptions
+from feast.data_source import FileSource, KafkaSource
 from feast.entity import Entity
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
@@ -568,28 +568,27 @@ def test_apply_feature_table_success(self, test_client):
         test_client.set_project("project1")
 
         # Create Feature Tables
-        batch_source = DataSource(
+        batch_source = FileSource(
             type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
             },
-            options=FileOptions(file_format="avro", file_url="data/test.avro"),
+            file_format="parquet",
+            file_url="file://feast/*",
             timestamp_column="ts_col",
             date_partition_column="date_partition_col",
         )
 
-        stream_source = DataSource(
+        stream_source = KafkaSource(
             type="STREAM_KAFKA",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
             },
-            options=KafkaOptions(
-                bootstrap_servers="localhost:9094",
-                class_path="random/path/to/class",
-                topic="test_topic",
-            ),
+            bootstrap_servers="localhost:9094",
+            class_path="random/path/to/class",
+            topic="test_topic",
             timestamp_column="ts_col",
         )
 
diff --git a/sdk/python/tests/test_feature_table.py b/sdk/python/tests/test_feature_table.py
index 8d9891e67d..d4085b61e0 100644
--- a/sdk/python/tests/test_feature_table.py
+++ b/sdk/python/tests/test_feature_table.py
@@ -21,7 +21,7 @@
 
 from feast.client import Client
 from feast.core import CoreService_pb2_grpc as Core
-from feast.data_source import DataSource, FileOptions, KafkaOptions
+from feast.data_source import FileSource, KafkaSource
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
 from feast.value_type import ValueType
@@ -54,28 +54,27 @@ def client(self, server):
 
     def test_feature_table_import_export_yaml(self):
 
-        batch_source = DataSource(
+        batch_source = FileSource(
             type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
             },
-            options=FileOptions(file_format="avro", file_url="data/test.avro"),
+            file_format="parquet",
+            file_url="file://feast/*",
             timestamp_column="ts_col",
             date_partition_column="date_partition_col",
         )
 
-        stream_source = DataSource(
+        stream_source = KafkaSource(
             type="STREAM_KAFKA",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
             },
-            options=KafkaOptions(
-                bootstrap_servers="localhost:9094",
-                class_path="random/path/to/class",
-                topic="test_topic",
-            ),
+            bootstrap_servers="localhost:9094",
+            class_path="random/path/to/class",
+            topic="test_topic",
             timestamp_column="ts_col",
         )
 

From 7f794c3c9e0d9c176c3b2d4470c7ecdd4ffa712b Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Mon, 5 Oct 2020 22:58:52 +0800
Subject: [PATCH 16/34] Refactor e2e tests without specification

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 tests/e2e/specifications/dev_ft.yaml          | 38 -----------
 ...st-register-ingest.py => test-register.py} | 65 ++++++++++++++++---
 2 files changed, 55 insertions(+), 48 deletions(-)
 delete mode 100644 tests/e2e/specifications/dev_ft.yaml
 rename tests/e2e/{test-register-ingest.py => test-register.py} (71%)

diff --git a/tests/e2e/specifications/dev_ft.yaml b/tests/e2e/specifications/dev_ft.yaml
deleted file mode 100644
index 59072b73b9..0000000000
--- a/tests/e2e/specifications/dev_ft.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-spec:
-  name: dev_featuretable
-  entities:
-    - driver_id
-    - customer_id
-  features:
-    - name: dev_feature_float
-      valueType: FLOAT
-    - name: dev_feature_string
-      valueType: STRING
-      labels:
-        feature_key1: feature_val1
-  batchSource:
-    type: BATCH_FILE
-    fieldMapping:
-      dev_entity: dev_entity_field
-      dev_feature_float: dev_feature_float_field
-      dev_feature_string: dev_feature_string_field
-    timestampColumn: datetime_col
-    datePartitionColumn: datetime
-    file_options:
-      file_format: PARQUET
-      file_url: gs://example/feast/*
-  streamSource:
-    type: STREAM_KAFKA
-    field_mapping:
-      dev_entity: dev_entity_field
-      dev_feature_float: dev_feature_float_field
-      dev_feature_string: dev_feature_string_field
-    timestampColumn: datetime_col
-    kafka_options:
-      bootstrap_servers: "localhost:9094"
-      topic: test_topic
-      class_path: random/path/to/test
-  maxAge: 14400s
-  labels:
-    key1: val1
-    key2: val2
\ No newline at end of file
diff --git a/tests/e2e/test-register-ingest.py b/tests/e2e/test-register.py
similarity index 71%
rename from tests/e2e/test-register-ingest.py
rename to tests/e2e/test-register.py
index ef3ac2c7ce..c587a15879 100644
--- a/tests/e2e/test-register-ingest.py
+++ b/tests/e2e/test-register.py
@@ -5,7 +5,7 @@
 from google.protobuf.duration_pb2 import Duration
 
 from feast.client import Client
-from feast.data_source import DataSource, FileOptions
+from feast.data_source import FileSource, KafkaSource
 from feast.entity import Entity
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
@@ -47,6 +47,46 @@ def driver_entity():
     )
 
 
+@pytest.fixture
+def basic_featuretable():
+    batch_source = FileSource(
+        type="BATCH_FILE",
+        field_mapping={
+            "dev_entity": "dev_entity_field",
+            "dev_feature_float": "dev_feature_float_field",
+            "dev_feature_string": "dev_feature_string_field",
+        },
+        file_format="PARQUET",
+        file_url="gs://example/feast/*",
+        timestamp_column="datetime_col",
+        date_partition_column="datetime",
+    )
+    stream_source = KafkaSource(
+        type="STREAM_KAFKA",
+        field_mapping={
+            "dev_entity": "dev_entity_field",
+            "dev_feature_float": "dev_feature_float_field",
+            "dev_feature_string": "dev_feature_string_field",
+        },
+        bootstrap_servers="localhost:9094",
+        class_path="random/path/to/class",
+        topic="test_topic",
+        timestamp_column="datetime_col",
+    )
+    return FeatureTable(
+        name="basic_featuretable",
+        entities=["driver_id", "customer_id"],
+        features=[
+            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
+            Feature(name="dev_feature_string", dtype=ValueType.STRING),
+        ],
+        max_age=Duration(seconds=3600),
+        batch_source=batch_source,
+        stream_source=stream_source,
+        labels={"key1": "val1", "key2": "val2"},
+    )
+
+
 @pytest.fixture
 def alltypes_entity():
     return Entity(
@@ -59,13 +99,14 @@ def alltypes_entity():
 
 @pytest.fixture
 def alltypes_featuretable():
-    batch_source = DataSource(
+    batch_source = FileSource(
         type="BATCH_FILE",
         field_mapping={
             "ride_distance": "ride_distance",
             "ride_duration": "ride_duration",
         },
-        options=FileOptions(file_format="parquet", file_url="file://feast/*"),
+        file_format="parquet",
+        file_url="file://feast/*",
         timestamp_column="ts_col",
         date_partition_column="date_partition_col",
     )
@@ -94,8 +135,12 @@ def alltypes_featuretable():
     )
 
 
-def test_get_list_basic(client: Client, customer_entity: Entity, driver_entity: Entity):
-    basic_ft_spec = FeatureTable.from_yaml(f"{DIR_PATH}/specifications/dev_ft.yaml")
+def test_get_list_basic(
+    client: Client,
+    customer_entity: Entity,
+    driver_entity: Entity,
+    basic_featuretable: FeatureTable,
+):
 
     # ApplyEntity
     client.apply_entity(customer_entity)
@@ -117,17 +162,17 @@ def test_get_list_basic(client: Client, customer_entity: Entity, driver_entity:
     assert len(actual_matchmaking_entities) == 1
 
     # ApplyFeatureTable
-    client.apply_feature_table(basic_ft_spec)
+    client.apply_feature_table(basic_featuretable)
 
     # GetFeatureTable Check
-    actual_get_feature_table = client.get_feature_table(name="dev_featuretable")
-    assert actual_get_feature_table == basic_ft_spec
+    actual_get_feature_table = client.get_feature_table(name="basic_featuretable")
+    assert actual_get_feature_table == basic_featuretable
 
     # ListFeatureTables Check
     actual_list_feature_table = [
-        ft for ft in client.list_feature_tables() if ft.name == "dev_featuretable"
+        ft for ft in client.list_feature_tables() if ft.name == "basic_featuretable"
     ][0]
-    assert actual_list_feature_table == basic_ft_spec
+    assert actual_list_feature_table == basic_featuretable
 
 
 def test_get_list_alltypes(

From a3d5d94192e69e52a13b55e4c4a27402ffab1f61 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Tue, 6 Oct 2020 11:43:26 +0800
Subject: [PATCH 17/34] Fix data partitioning for ingest method

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/client.py         | 58 ++++++++++++++----------
 sdk/python/feast/loaders/ingest.py | 72 ++++++++++++++++++++++++++----
 2 files changed, 99 insertions(+), 31 deletions(-)

diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 283f878074..4da2854734 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import datetime
 import logging
 import multiprocessing
 import os
@@ -69,6 +68,7 @@
 from feast.loaders.ingest import (
     BATCH_INGESTION_PRODUCTION_TIMEOUT,
     _check_field_mappings,
+    _partition_by_date,
 )
 from feast.serving.ServingService_pb2 import GetFeastServingInfoRequest
 from feast.serving.ServingService_pb2_grpc import ServingServiceStub
@@ -634,10 +634,6 @@ def ingest(
         if isinstance(feature_table, FeatureTable):
             name = feature_table.name
 
-        dir_path, dest_path, column_names = _read_table_from_source(
-            source, chunk_size, max_workers
-        )
-
         fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
             name, project
         )
@@ -646,6 +642,10 @@ def ingest(
         else:
             raise Exception(f"FeatureTable, {name} cannot be found.")
 
+        dir_path, dest_path, column_names = _read_table_from_source(
+            source, chunk_size, max_workers
+        )
+
         # Check 1) Only parquet file format for FeatureTable batch source is supported
         if (
             feature_table.batch_source
@@ -662,7 +662,14 @@ def ingest(
 
         # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
         _check_field_mappings(
-            column_names, name, feature_table.batch_source.field_mapping
+            column_names,
+            name,
+            feature_table.batch_source.timestamp_column,
+            feature_table.batch_source.field_mapping,
+        )
+        # Partition dataset by date
+        date_partition_dest_path = _partition_by_date(
+            column_names, feature_table, dest_path,
         )
 
         batch_source_type = SourceType(feature_table.batch_source.type).name
@@ -675,36 +682,41 @@ def ingest(
                 uri = urlparse(file_url)
                 staging_client = get_staging_client(uri.scheme)
 
-                file_name = dest_path.split("/")[-1]
-                date_today = datetime.datetime.today().strftime("%Y-%m-%d")
-
-                staging_client.upload_file(
-                    dest_path,
-                    uri.hostname,
-                    str(uri.path).strip("/") + "/" + f"date={date_today}/" + file_name,
-                )
+                file_paths = list()
+                for (dirpath, dirnames, filenames) in os.walk(date_partition_dest_path):
+                    file_paths += [os.path.join(dirpath, file) for file in filenames]
+                for path in file_paths:
+                    file_name = path.split("/")[-1]
+                    partition_date = path.split("/")[-2].split("=")[-1]
+                    staging_client.upload_file(
+                        path,
+                        uri.hostname,
+                        str(uri.path).strip("/")
+                        + "/"
+                        + f"date={partition_date}/"
+                        + file_name,
+                    )
             if batch_source_type == "BATCH_BIGQUERY":
                 from google.cloud import bigquery
 
                 bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                 gcp_project, dataset_table = bq_table_ref.split(":")
-                dataset, table = dataset_table.split(".")
 
                 client = bigquery.Client(project=gcp_project)
 
-                table_ref = client.dataset(dataset).table(table)
+                bq_table_ref = bq_table_ref.replace(":", ".")
+                table = bigquery.table.Table(bq_table_ref)
+
                 job_config = bigquery.LoadJobConfig()
                 job_config.source_format = bigquery.SourceFormat.PARQUET
 
-                # Check for date partitioning column in FeatureTable spec
-                if feature_table.batch_source.date_partition_column:
-                    time_partitioning_obj = bigquery.table.TimePartitioning(
-                        field=feature_table.batch_source.date_partition_column
-                    )
-                    job_config.time_partitioning = time_partitioning_obj
+                time_partitioning_obj = bigquery.table.TimePartitioning(
+                    field=feature_table.batch_source.timestamp_column
+                )
+                job_config.time_partitioning = time_partitioning_obj
                 with open(dest_path, "rb") as source_file:
                     client.load_table_from_file(
-                        source_file, table_ref, job_config=job_config
+                        source_file, table, job_config=job_config
                     )
         finally:
             # Remove parquet file(s) that were created earlier
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index 14ed290f54..a6407f6e15 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -1,5 +1,12 @@
+import tempfile
 from typing import Dict, List
 
+import pandas as pd
+import pyarrow as pa
+from pyarrow import parquet as pq
+
+from feast.feature_table import FeatureTable
+
 GRPC_CONNECTION_TIMEOUT_DEFAULT = 3  # type: int
 GRPC_CONNECTION_TIMEOUT_APPLY = 300  # type: int
 FEAST_SERVING_URL_ENV_KEY = "FEAST_SERVING_URL"  # type: str
@@ -11,21 +18,22 @@
 def _check_field_mappings(
     column_names: List[str],
     feature_table_name: str,
+    feature_table_timestamp_column: str,
     feature_table_field_mappings: Dict[str, str],
 ) -> None:
     """
-        Checks that all specified field mappings in FeatureTable can be found in
-        column names of specified ingestion source.
+    Checks that all specified field mappings in FeatureTable can be found in
+    column names of specified ingestion source.
 
-        Args:
-            column_names: Column names in provided ingestion source
-            feature_table_name: Name of FeatureTable
-            feature_table_field_mappings: Field mappings of FeatureTable
+    Args:
+        column_names: Column names in provided ingestion source
+        feature_table_name: Name of FeatureTable
+        feature_table_field_mappings: Field mappings of FeatureTable
     """
 
-    if "datetime" not in column_names:
+    if feature_table_timestamp_column not in column_names:
         raise ValueError(
-            f'Provided data source does not contain entity "datetime" in columns {column_names}'
+            f"Provided data source does not contain timestamp column {feature_table_timestamp_column} in columns {column_names}"
         )
 
     specified_field_mappings = list()
@@ -39,3 +47,51 @@ def _check_field_mappings(
             f"Provided data source does not contain all field mappings previously "
             f"defined for FeatureTable, {feature_table_name}."
         )
+
+
+def _partition_by_date(
+    column_names: List[str], feature_table: FeatureTable, file_path: str,
+) -> str:
+    """
+    Partitions dataset by date based on timestamp_column.
+    Assumes date_partition_column is in date format if provided.
+
+    Args:
+        column_names: Column names in provided ingestion source
+        feature_table: FeatureTable
+        file_path: File path to existing parquet file that's not yet partitioned
+
+    Returns:
+        str:
+            Root directory which contains date partitioned files.
+    """
+    df = pd.read_parquet(file_path)
+    # Date-partitioned dataset temp path
+    dir_path = tempfile.mkdtemp()
+
+    # Case: date_partition_column is provided and dataset contains it
+    if (
+        feature_table.batch_source.date_partition_column
+        and feature_table.batch_source.date_partition_column in column_names
+    ):
+        table = pa.Table.from_pandas(df)
+        pq.write_to_dataset(
+            table=table,
+            root_path=dir_path,
+            partition_cols=[feature_table.batch_source.date_partition_column],
+        )
+        return dir_path
+
+    # Case: date_partition_column is provided and dataset does not contain it
+    if feature_table.batch_source.date_partition_column:
+        feast_partition_col = feature_table.batch_source.date_partition_column
+    else:
+        feast_partition_col = "feast_partition_col"
+
+    df[feast_partition_col] = df[feature_table.batch_source.timestamp_column].dt.date
+    table = pa.Table.from_pandas(df)
+    pq.write_to_dataset(
+        table=table, root_path=dir_path, partition_cols=[feast_partition_col]
+    )
+
+    return dir_path

From 6ce6e4169e3569dd5f8c554a5c800407bbc4a923 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Tue, 6 Oct 2020 16:33:07 +0800
Subject: [PATCH 18/34] Cleanup date partition logic and add ingest test

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/client.py                 | 46 +++++++----
 sdk/python/feast/loaders/ingest.py         | 33 +++-----
 sdk/python/feast/staging/storage_client.py |  8 +-
 sdk/python/tests/test_client.py            | 90 ++++++++++++++++++++++
 4 files changed, 139 insertions(+), 38 deletions(-)

diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 4da2854734..b4e060f607 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -668,9 +668,14 @@ def ingest(
             feature_table.batch_source.field_mapping,
         )
         # Partition dataset by date
-        date_partition_dest_path = _partition_by_date(
-            column_names, feature_table, dest_path,
-        )
+        date_partition_dest_path = None
+        if feature_table.batch_source.date_partition_column:
+            date_partition_dest_path = _partition_by_date(
+                column_names,
+                feature_table.batch_source.date_partition_column,
+                feature_table.batch_source.timestamp_column,
+                dest_path,
+            )
 
         batch_source_type = SourceType(feature_table.batch_source.type).name
 
@@ -682,19 +687,32 @@ def ingest(
                 uri = urlparse(file_url)
                 staging_client = get_staging_client(uri.scheme)
 
-                file_paths = list()
-                for (dirpath, dirnames, filenames) in os.walk(date_partition_dest_path):
-                    file_paths += [os.path.join(dirpath, file) for file in filenames]
-                for path in file_paths:
-                    file_name = path.split("/")[-1]
-                    partition_date = path.split("/")[-2].split("=")[-1]
+                if date_partition_dest_path is not None:
+                    file_paths = list()
+                    for (dirpath, dirnames, filenames) in os.walk(
+                        date_partition_dest_path
+                    ):
+                        file_paths += [
+                            os.path.join(dirpath, file) for file in filenames
+                        ]
+                    for path in file_paths:
+                        file_name = path.split("/")[-1]
+                        partition_col = path.split("/")[-2]
+                        staging_client.upload_file(
+                            path,
+                            uri.hostname,
+                            str(uri.path).strip("/")
+                            + "/"
+                            + partition_col
+                            + "/"
+                            + file_name,
+                        )
+                else:
+                    file_name = dest_path.split("/")[-1]
                     staging_client.upload_file(
-                        path,
+                        dest_path,
                         uri.hostname,
-                        str(uri.path).strip("/")
-                        + "/"
-                        + f"date={partition_date}/"
-                        + file_name,
+                        str(uri.path).strip("/") + "/" + file_name,
                     )
             if batch_source_type == "BATCH_BIGQUERY":
                 from google.cloud import bigquery
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index a6407f6e15..f817a27cd3 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -5,8 +5,6 @@
 import pyarrow as pa
 from pyarrow import parquet as pq
 
-from feast.feature_table import FeatureTable
-
 GRPC_CONNECTION_TIMEOUT_DEFAULT = 3  # type: int
 GRPC_CONNECTION_TIMEOUT_APPLY = 300  # type: int
 FEAST_SERVING_URL_ENV_KEY = "FEAST_SERVING_URL"  # type: str
@@ -50,7 +48,10 @@ def _check_field_mappings(
 
 
 def _partition_by_date(
-    column_names: List[str], feature_table: FeatureTable, file_path: str,
+    column_names: List[str],
+    feature_table_date_partition_column: str,
+    feature_table_timestamp_column: str,
+    file_path: str,
 ) -> str:
     """
     Partitions dataset by date based on timestamp_column.
@@ -69,29 +70,17 @@ def _partition_by_date(
     # Date-partitioned dataset temp path
     dir_path = tempfile.mkdtemp()
 
-    # Case: date_partition_column is provided and dataset contains it
-    if (
-        feature_table.batch_source.date_partition_column
-        and feature_table.batch_source.date_partition_column in column_names
-    ):
-        table = pa.Table.from_pandas(df)
-        pq.write_to_dataset(
-            table=table,
-            root_path=dir_path,
-            partition_cols=[feature_table.batch_source.date_partition_column],
-        )
-        return dir_path
-
     # Case: date_partition_column is provided and dataset does not contain it
-    if feature_table.batch_source.date_partition_column:
-        feast_partition_col = feature_table.batch_source.date_partition_column
-    else:
-        feast_partition_col = "feast_partition_col"
+    if feature_table_date_partition_column not in column_names:
+        df[feature_table_date_partition_column] = df[
+            feature_table_timestamp_column
+        ].dt.date
 
-    df[feast_partition_col] = df[feature_table.batch_source.timestamp_column].dt.date
     table = pa.Table.from_pandas(df)
     pq.write_to_dataset(
-        table=table, root_path=dir_path, partition_cols=[feast_partition_col]
+        table=table,
+        root_path=dir_path,
+        partition_cols=[feature_table_date_partition_column],
     )
 
     return dir_path
diff --git a/sdk/python/feast/staging/storage_client.py b/sdk/python/feast/staging/storage_client.py
index 3b391410b2..a10558b38c 100644
--- a/sdk/python/feast/staging/storage_client.py
+++ b/sdk/python/feast/staging/storage_client.py
@@ -14,7 +14,9 @@
 # limitations under the License.
 
 
+import os
 import re
+import shutil
 from abc import ABC, ABCMeta, abstractmethod
 from tempfile import TemporaryFile
 from typing import List
@@ -227,8 +229,10 @@ def download_file(self, uri: ParseResult) -> IO[bytes]:
     def list_files(self, bucket: str, path: str) -> List[str]:
         raise NotImplementedError("list files not implemented for Local file")
 
-    def upload_file(self, local_path: str, bucket: str, remote_path: str):
-        pass  # For test cases
+    def upload_file(self, local_path: str, folder: str, remote_path: str):
+        dest_fpath = os.path.join(folder + "/" + remote_path)
+        os.makedirs(os.path.dirname(dest_fpath), exist_ok=True)
+        shutil.copy(local_path, dest_fpath)
 
 
 storage_clients = {GS: GCSClient, S3: S3Client, LOCAL_FILE: LocalFSClient}
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index cd83d10fcd..b38b1102e3 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -11,15 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import pkgutil
 import socket
 from concurrent import futures
+from datetime import datetime, timedelta
 from unittest import mock
 
 import grpc
+import numpy as np
+import pandas as pd
 import pytest
+import pytz
 from google.protobuf.duration_pb2 import Duration
 from mock import MagicMock, patch
+from pandas.util.testing import assert_frame_equal
+from pyarrow import parquet as pq
 from pytest_lazyfixture import lazy_fixture
 
 from feast.client import Client
@@ -628,6 +635,89 @@ def test_apply_feature_table_success(self, test_client):
             and feature_tables[0].entities[0] == "fs1-my-entity-1"
         )
 
+    @pytest.mark.parametrize(
+        "mocked_client", [lazy_fixture("mock_client")],
+    )
+    def test_ingest(self, mocked_client, mocker):
+        mocked_client._core_service_stub = Core.CoreServiceStub(
+            grpc.insecure_channel("")
+        )
+
+        N_ROWS = 100
+        time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+        final_offset = (
+            [time_offset] * 33
+            + [time_offset - timedelta(days=1)] * 33
+            + [time_offset - timedelta(days=2)] * 34
+        )
+        final_part_offset = (
+            [time_offset - timedelta(days=99)] * 33
+            + [time_offset - timedelta(days=100)] * 33
+            + [time_offset - timedelta(days=101)] * 34
+        )
+        ft_df = pd.DataFrame(
+            {
+                "datetime": final_offset,
+                "datetime_col": final_part_offset,
+                "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
+                "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
+            }
+        )
+
+        mocker.patch.object(
+            mocked_client._core_service_stub,
+            "GetFeatureTable",
+            return_value=GetFeatureTableResponse(
+                table=FeatureTableProto(
+                    spec=FeatureTableSpecProto(
+                        name="ingest_featuretable",
+                        max_age=Duration(seconds=3600),
+                        features=[
+                            FeatureSpecProto(
+                                name="dev_feature_float",
+                                value_type=ValueProto.ValueType.FLOAT,
+                            ),
+                            FeatureSpecProto(
+                                name="dev_feature_string",
+                                value_type=ValueProto.ValueType.STRING,
+                            ),
+                        ],
+                        entities=["dev_entity"],
+                        batch_source=DataSourceProto(
+                            type="BATCH_FILE",
+                            field_mapping={
+                                "dev_feature_float": "dev_feature_float",
+                                "dev_feature_string": "dev_feature_string",
+                            },
+                            file_options=DataSourceProto.FileOptions(
+                                file_format="parquet", file_url="file://feast/*"
+                            ),
+                            timestamp_column="datetime",
+                            date_partition_column="datetime_col",
+                        ),
+                    ),
+                    meta=FeatureTableMetaProto(),
+                )
+            ),
+        )
+
+        mocked_client.set_project("my_project")
+        ft = mocked_client.get_feature_table("ingest_featuretable")
+        mocked_client.ingest(ft, ft_df, timeout=600)
+
+        dest_fpath = os.path.join("feast/")
+        pq_df = pq.read_table(dest_fpath).to_pandas()
+
+        ft_df.sort_values(by=["dev_feature_float"], inplace=True)
+        pq_df.sort_values(by=["dev_feature_float"], inplace=True)
+        pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
+        ft_df = ft_df.reindex(sorted(ft_df.columns), axis=1)
+        ft_df.reset_index(drop=True, inplace=True)
+        pq_df.reset_index(drop=True, inplace=True)
+        pq_df["datetime_col"] = pd.to_datetime(pq_df.datetime_col).dt.tz_convert("UTC")
+
+        assert_frame_equal(ft_df, pq_df)
+
     @patch("grpc.channel_ready_future")
     def test_secure_channel_creation_with_secure_client(
         self, _mocked_obj, core_server, serving_server

From 4ec077cac6a93f1d3777763c3a0362e8324a69f1 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Tue, 6 Oct 2020 17:30:24 +0800
Subject: [PATCH 19/34] Remove type field from datasource classes

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/client.py             | 17 +++++------
 sdk/python/feast/data_source.py        | 41 ++++++--------------------
 sdk/python/feast/feature_table.py      | 21 +++----------
 sdk/python/tests/test_client.py        |  2 --
 sdk/python/tests/test_feature_table.py |  2 --
 tests/e2e/test-register.py             |  3 --
 6 files changed, 21 insertions(+), 65 deletions(-)

diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index b4e060f607..7f502afe19 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -60,7 +60,7 @@
     ListProjectsResponse,
 )
 from feast.core.CoreService_pb2_grpc import CoreServiceStub
-from feast.data_source import SourceType
+from feast.core.DataSource_pb2 import DataSource as DataSourceProto
 from feast.entity import Entity
 from feast.feature_table import FeatureTable
 from feast.grpc import auth as feast_auth
@@ -649,7 +649,7 @@ def ingest(
         # Check 1) Only parquet file format for FeatureTable batch source is supported
         if (
             feature_table.batch_source
-            and SourceType(feature_table.batch_source.type).name == "BATCH_FILE"
+            and feature_table.batch_source.type == DataSourceProto.BATCH_FILE
             and "".join(
                 feature_table.batch_source.file_options.file_format.split()
             ).lower()
@@ -677,10 +677,11 @@ def ingest(
                 dest_path,
             )
 
-        batch_source_type = SourceType(feature_table.batch_source.type).name
-
         try:
-            if batch_source_type == "BATCH_FILE":
+            if (
+                feature_table.batch_source.file_options.file_format
+                and feature_table.batch_source.file_options.file_url
+            ):
                 from urllib.parse import urlparse
 
                 file_url = feature_table.batch_source.file_options.file_url[:-1]
@@ -714,7 +715,7 @@ def ingest(
                         uri.hostname,
                         str(uri.path).strip("/") + "/" + file_name,
                     )
-            if batch_source_type == "BATCH_BIGQUERY":
+            if feature_table.batch_source.bigquery_options.table_ref:
                 from google.cloud import bigquery
 
                 bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
@@ -741,9 +742,7 @@ def ingest(
             print("Removing temporary file(s)...")
             shutil.rmtree(dir_path)
 
-        print(
-            f"Data has been successfully ingested into FeatureTable {batch_source_type} batch source."
-        )
+        print("Data has been successfully ingested into FeatureTable batch source.")
 
     def _get_grpc_metadata(self):
         """
diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 38e9e695e0..35954b8ca3 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -352,30 +352,14 @@ class DataSource:
 
     def __init__(
         self,
-        type: str,
         field_mapping: Dict[str, str],
         timestamp_column: str,
         date_partition_column: Optional[str] = "",
     ):
-        self._type = type
         self._field_mapping = field_mapping
         self._timestamp_column = timestamp_column
         self._date_partition_column = date_partition_column
 
-    @property
-    def type(self):
-        """
-        Returns the type of this data source
-        """
-        return self._type
-
-    @type.setter
-    def type(self, type):
-        """
-        Sets the type of this data source
-        """
-        self._type = type
-
     @property
     def field_mapping(self):
         """
@@ -435,14 +419,13 @@ def to_proto(self) -> DataSourceProto:
 class FileSource(DataSource):
     def __init__(
         self,
-        type,
         field_mapping,
         timestamp_column,
         file_format,
         file_url,
         date_partition_column="",
     ):
-        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        super().__init__(field_mapping, timestamp_column, date_partition_column)
         self._file_options = FileOptions(file_format=file_format, file_url=file_url)
 
     @property
@@ -462,7 +445,6 @@ def file_options(self, file_options):
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
-            type=data_source_proto.type,
             field_mapping=data_source_proto.field_mapping,
             file_options=cls.file_options,
             timestamp_column=data_source_proto.timestamp_column,
@@ -473,7 +455,7 @@ def from_proto(cls, data_source_proto):
 
     def to_proto(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
-            type=self.type,
+            type=DataSourceProto.BATCH_FILE,
             field_mapping=self.field_mapping,
             file_options=self.file_options.to_proto(),
         )
@@ -486,9 +468,9 @@ def to_proto(self) -> DataSourceProto:
 
 class BigQuerySource(DataSource):
     def __init__(
-        self, type, field_mapping, timestamp_column, table_ref, date_partition_column=""
+        self, field_mapping, timestamp_column, table_ref, date_partition_column=""
     ):
-        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        super().__init__(field_mapping, timestamp_column, date_partition_column)
         self._bigquery_options = BigQueryOptions(table_ref=table_ref,)
 
     @property
@@ -508,7 +490,6 @@ def bigquery_options(self, bigquery_options):
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
-            type=data_source_proto.type,
             field_mapping=data_source_proto.field_mapping,
             bigquery_options=cls.bigquery_options,
             timestamp_column=data_source_proto.timestamp_column,
@@ -519,7 +500,7 @@ def from_proto(cls, data_source_proto):
 
     def to_proto(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
-            type=self.type,
+            type=DataSourceProto.BATCH_BIGQUERY,
             field_mapping=self.field_mapping,
             bigquery_options=self.bigquery_options.to_proto(),
         )
@@ -533,7 +514,6 @@ def to_proto(self) -> DataSourceProto:
 class KafkaSource(DataSource):
     def __init__(
         self,
-        type,
         field_mapping,
         timestamp_column,
         bootstrap_servers,
@@ -541,7 +521,7 @@ def __init__(
         topic,
         date_partition_column="",
     ):
-        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        super().__init__(field_mapping, timestamp_column, date_partition_column)
         self._kafka_options = KafkaOptions(
             bootstrap_servers=bootstrap_servers, class_path=class_path, topic=topic
         )
@@ -563,7 +543,6 @@ def kafka_options(self, kafka_options):
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
-            type=data_source_proto.type,
             field_mapping=data_source_proto.field_mapping,
             kafka_options=cls.kafka_options,
             timestamp_column=data_source_proto.timestamp_column,
@@ -574,7 +553,7 @@ def from_proto(cls, data_source_proto):
 
     def to_proto(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
-            type=self.type,
+            type=DataSourceProto.STREAM_KAFKA,
             field_mapping=self.field_mapping,
             kafka_options=self.kafka_options.to_proto(),
         )
@@ -588,7 +567,6 @@ def to_proto(self) -> DataSourceProto:
 class KinesisSource(DataSource):
     def __init__(
         self,
-        type,
         field_mapping,
         timestamp_column,
         class_path,
@@ -596,7 +574,7 @@ def __init__(
         stream_name,
         date_partition_column="",
     ):
-        super().__init__(type, field_mapping, timestamp_column, date_partition_column)
+        super().__init__(field_mapping, timestamp_column, date_partition_column)
         self._kinesis_options = KinesisOptions(
             class_path=class_path, region=region, stream_name=stream_name
         )
@@ -618,7 +596,6 @@ def kinesis_options(self, kinesis_options):
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
-            type=data_source_proto.type,
             field_mapping=data_source_proto.field_mapping,
             kinesis_options=cls.kinesis_options,
             timestamp_column=data_source_proto.timestamp_column,
@@ -629,7 +606,7 @@ def from_proto(cls, data_source_proto):
 
     def to_proto(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
-            type=self.type,
+            type=DataSourceProto.STREAM_KINESIS,
             field_mapping=self.field_mapping,
             kinesis_options=self.kinesis_options.to_proto(),
         )
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index b1f722c40d..1448791627 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -30,7 +30,6 @@
     FileSource,
     KafkaSource,
     KinesisSource,
-    SourceType,
 )
 from feast.feature import Feature
 from feast.loaders import yaml as feast_yaml
@@ -268,37 +267,27 @@ def _to_data_source(cls, data_source):
         Convert dict to data source.
         """
 
-        source_type = SourceType(data_source.type).name
-
-        if (
-            source_type == "BATCH_FILE"
-            and data_source.file_options.file_format
-            and data_source.file_options.file_url
-        ):
+        if data_source.file_options.file_format and data_source.file_options.file_url:
             data_source_proto = FileSource(
-                type=data_source.type,
                 field_mapping=data_source.field_mapping,
                 file_format=data_source.file_options.file_format,
                 file_url=data_source.file_options.file_url,
                 timestamp_column=data_source.timestamp_column,
                 date_partition_column=data_source.date_partition_column,
             ).to_proto()
-        elif source_type == "BATCH_BIGQUERY" and data_source.bigquery_options.table_ref:
+        elif data_source.bigquery_options.table_ref:
             data_source_proto = BigQuerySource(
-                type=data_source.type,
                 field_mapping=data_source.field_mapping,
                 table_ref=data_source.bigquery_options.table_ref,
                 timestamp_column=data_source.timestamp_column,
                 date_partition_column=data_source.date_partition_column,
             ).to_proto()
         elif (
-            source_type == "STREAM_KAFKA"
-            and data_source.kafka_options.bootstrap_servers
+            data_source.kafka_options.bootstrap_servers
             and data_source.kafka_options.topic
             and data_source.kafka_options.class_path
         ):
             data_source_proto = KafkaSource(
-                type=data_source.type,
                 field_mapping=data_source.field_mapping,
                 bootstrap_servers=data_source.kafka_options.bootstrap_servers,
                 class_path=data_source.kafka_options.class_path,
@@ -307,13 +296,11 @@ def _to_data_source(cls, data_source):
                 date_partition_column=data_source.date_partition_column,
             ).to_proto()
         elif (
-            source_type == "STREAM_KINESIS"
-            and data_source.kinesis_options.class_path
+            data_source.kinesis_options.class_path
             and data_source.kinesis_options.region
             and data_source.kinesis_options.stream_name
         ):
             data_source_proto = KinesisSource(
-                type=data_source.type,
                 field_mapping=data_source.field_mapping,
                 class_path=data_source.kinesis_options.class_path,
                 region=data_source.kinesis_options.region,
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index b38b1102e3..70ba3fc22a 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -576,7 +576,6 @@ def test_apply_feature_table_success(self, test_client):
 
         # Create Feature Tables
         batch_source = FileSource(
-            type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -588,7 +587,6 @@ def test_apply_feature_table_success(self, test_client):
         )
 
         stream_source = KafkaSource(
-            type="STREAM_KAFKA",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
diff --git a/sdk/python/tests/test_feature_table.py b/sdk/python/tests/test_feature_table.py
index d4085b61e0..7a50b7e58f 100644
--- a/sdk/python/tests/test_feature_table.py
+++ b/sdk/python/tests/test_feature_table.py
@@ -55,7 +55,6 @@ def client(self, server):
     def test_feature_table_import_export_yaml(self):
 
         batch_source = FileSource(
-            type="BATCH_FILE",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
@@ -67,7 +66,6 @@ def test_feature_table_import_export_yaml(self):
         )
 
         stream_source = KafkaSource(
-            type="STREAM_KAFKA",
             field_mapping={
                 "ride_distance": "ride_distance",
                 "ride_duration": "ride_duration",
diff --git a/tests/e2e/test-register.py b/tests/e2e/test-register.py
index c587a15879..665936eb5d 100644
--- a/tests/e2e/test-register.py
+++ b/tests/e2e/test-register.py
@@ -50,7 +50,6 @@ def driver_entity():
 @pytest.fixture
 def basic_featuretable():
     batch_source = FileSource(
-        type="BATCH_FILE",
         field_mapping={
             "dev_entity": "dev_entity_field",
             "dev_feature_float": "dev_feature_float_field",
@@ -62,7 +61,6 @@ def basic_featuretable():
         date_partition_column="datetime",
     )
     stream_source = KafkaSource(
-        type="STREAM_KAFKA",
         field_mapping={
             "dev_entity": "dev_entity_field",
             "dev_feature_float": "dev_feature_float_field",
@@ -100,7 +98,6 @@ def alltypes_entity():
 @pytest.fixture
 def alltypes_featuretable():
     batch_source = FileSource(
-        type="BATCH_FILE",
         field_mapping={
             "ride_distance": "ride_distance",
             "ride_duration": "ride_duration",

From e6cb88f0d9d9b68713b53e142ac2d2336a6fad21 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Tue, 6 Oct 2020 17:37:13 +0800
Subject: [PATCH 20/34] Remove not so useful test

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/tests/test_client.py | 226 --------------------------------
 1 file changed, 226 deletions(-)

diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index 70ba3fc22a..a8c9a71fd4 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -32,16 +32,10 @@
 from feast.client import Client
 from feast.core import CoreService_pb2_grpc as Core
 from feast.core.CoreService_pb2 import (
-    GetEntityResponse,
     GetFeastCoreVersionResponse,
     GetFeatureTableResponse,
-    ListEntitiesResponse,
-    ListFeatureTablesResponse,
 )
 from feast.core.DataSource_pb2 import DataSource as DataSourceProto
-from feast.core.Entity_pb2 import Entity as EntityProto
-from feast.core.Entity_pb2 import EntityMeta as EntityMetaProto
-from feast.core.Entity_pb2 import EntitySpecV2 as EntitySpecProto
 from feast.core.Feature_pb2 import FeatureSpecV2 as FeatureSpecProto
 from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
 from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
@@ -319,91 +313,6 @@ def test_get_online_features(self, mocked_client, auth_metadata, mocker):
     def test_get_historical_features(self, mocked_client, mocker):
         assert 1 == 1
 
-    @pytest.mark.parametrize(
-        "mocked_client",
-        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
-    )
-    def test_get_entity(self, mocked_client, mocker):
-        mocked_client._core_service_stub = Core.CoreServiceStub(
-            grpc.insecure_channel("")
-        )
-
-        entity_proto = EntityProto(
-            spec=EntitySpecProto(
-                name="driver_car_id",
-                description="Car driver id",
-                value_type=ValueProto.ValueType.STRING,
-                labels={"key1": "val1", "key2": "val2"},
-            ),
-            meta=EntityMetaProto(),
-        )
-
-        mocker.patch.object(
-            mocked_client._core_service_stub,
-            "GetEntity",
-            return_value=GetEntityResponse(entity=entity_proto),
-        )
-        mocked_client.set_project("my_project")
-        entity = mocked_client.get_entity("my_entity")
-
-        assert (
-            entity.name == "driver_car_id"
-            and entity.description == "Car driver id"
-            and entity.value_type == ValueType(ValueProto.ValueType.STRING).name
-            and "key1" in entity.labels
-            and entity.labels["key1"] == "val1"
-        )
-
-    @pytest.mark.parametrize(
-        "mocked_client",
-        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
-    )
-    def test_list_entities(self, mocked_client, mocker):
-        mocker.patch.object(
-            mocked_client,
-            "_core_service_stub",
-            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
-        )
-
-        entity_1_proto = EntityProto(
-            spec=EntitySpecProto(
-                name="driver_car_id",
-                description="Car driver id",
-                value_type=ValueProto.ValueType.INT64,
-                labels={"key1": "val1", "key2": "val2"},
-            )
-        )
-        entity_2_proto = EntityProto(
-            spec=EntitySpecProto(
-                name="driver_ride_id",
-                description="Ride driver id",
-                value_type=ValueProto.ValueType.STRING,
-                labels={"key3": "val3", "key4": "val4"},
-            )
-        )
-
-        mocker.patch.object(
-            mocked_client._core_service_stub,
-            "ListEntities",
-            return_value=ListEntitiesResponse(
-                entities=[entity_1_proto, entity_2_proto]
-            ),
-        )
-
-        entities = mocked_client.list_entities(labels={"key1": "val1"})
-        assert len(entities) == 2
-
-        entity = entities[1]
-        assert (
-            entity.name == "driver_ride_id"
-            and entity.description == "Ride driver id"
-            and entity.value_type == ValueType(ValueProto.ValueType.STRING).name
-            and "key3" in entity.labels
-            and entity.labels["key3"] == "val3"
-            and "key4" in entity.labels
-            and entity.labels["key4"] == "val4"
-        )
-
     @pytest.mark.parametrize(
         "test_client", [lazy_fixture("client"), lazy_fixture("secure_client")],
     )
@@ -432,141 +341,6 @@ def test_apply_entity_success(self, test_client):
             and entity.labels["team"] == "matchmaking"
         )
 
-    @pytest.mark.parametrize(
-        "mocked_client",
-        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
-    )
-    def test_get_feature_table(self, mocked_client, mocker):
-        mocked_client._core_service_stub = Core.CoreServiceStub(
-            grpc.insecure_channel("")
-        )
-
-        from google.protobuf.duration_pb2 import Duration
-
-        mocker.patch.object(
-            mocked_client._core_service_stub,
-            "GetFeatureTable",
-            return_value=GetFeatureTableResponse(
-                table=FeatureTableProto(
-                    spec=FeatureTableSpecProto(
-                        name="my_feature_table",
-                        max_age=Duration(seconds=3600),
-                        labels={"key1": "val1", "key2": "val2"},
-                        features=[
-                            FeatureSpecProto(
-                                name="my_feature_1",
-                                value_type=ValueProto.ValueType.FLOAT,
-                            ),
-                            FeatureSpecProto(
-                                name="my_feature_2",
-                                value_type=ValueProto.ValueType.FLOAT,
-                            ),
-                        ],
-                        entities=["my_entity_1"],
-                        batch_source=DataSourceProto(
-                            type="BATCH_FILE",
-                            field_mapping={
-                                "ride_distance": "ride_distance",
-                                "ride_duration": "ride_duration",
-                            },
-                            file_options=DataSourceProto.FileOptions(
-                                file_format="avro", file_url="data/test.avro"
-                            ),
-                            timestamp_column="ts_col",
-                            date_partition_column="date_partition_col",
-                        ),
-                    ),
-                    meta=FeatureTableMetaProto(),
-                )
-            ),
-        )
-        mocked_client.set_project("my_project")
-        feature_table = mocked_client.get_feature_table("my_feature_table")
-
-        assert (
-            feature_table.name == "my_feature_table"
-            and "key1" in feature_table.labels
-            and feature_table.labels["key1"] == "val1"
-            and "key2" in feature_table.labels
-            and feature_table.labels["key2"] == "val2"
-            and len(feature_table.features) == 2
-            and len(feature_table.entities) == 1
-        )
-
-    @pytest.mark.parametrize(
-        "mocked_client",
-        [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
-    )
-    def test_list_feature_tables(self, mocked_client, mocker):
-        mocker.patch.object(
-            mocked_client,
-            "_core_service_stub",
-            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
-        )
-
-        batch_source = DataSourceProto(
-            type="BATCH_FILE",
-            field_mapping={
-                "ride_distance": "ride_distance",
-                "ride_duration": "ride_duration",
-            },
-            file_options=DataSourceProto.FileOptions(
-                file_format="avro", file_url="data/test.avro"
-            ),
-            timestamp_column="ts_col",
-            date_partition_column="date_partition_col",
-        )
-
-        feature_table_1_proto = FeatureTableProto(
-            spec=FeatureTableSpecProto(
-                name="driver_car",
-                max_age=Duration(seconds=3600),
-                labels={"key1": "val1", "key2": "val2"},
-                features=[
-                    FeatureSpecProto(
-                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
-                    )
-                ],
-                entities=["driver_car_id"],
-                batch_source=batch_source,
-            )
-        )
-        feature_table_2_proto = FeatureTableProto(
-            spec=FeatureTableSpecProto(
-                name="driver_ride",
-                max_age=Duration(seconds=3600),
-                labels={"key1": "val1"},
-                features=[
-                    FeatureSpecProto(
-                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
-                    )
-                ],
-                entities=["driver_ride_id"],
-                batch_source=batch_source,
-            )
-        )
-
-        mocker.patch.object(
-            mocked_client._core_service_stub,
-            "ListFeatureTables",
-            return_value=ListFeatureTablesResponse(
-                tables=[feature_table_1_proto, feature_table_2_proto]
-            ),
-        )
-
-        feature_tables = mocked_client.list_feature_tables(labels={"key1": "val1"})
-        assert len(feature_tables) == 2
-
-        feature_table = feature_tables[0]
-        assert (
-            feature_table.name == "driver_car"
-            and "key1" in feature_table.labels
-            and feature_table.labels["key1"] == "val1"
-            and "key2" in feature_table.labels
-            and feature_table.labels["key2"] == "val2"
-            and len(feature_table.features) == 1
-        )
-
     @pytest.mark.parametrize(
         "test_client", [lazy_fixture("client"), lazy_fixture("secure_client")],
     )

From f09c65e9746202d79c6f058181917bcbe16d0a02 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Tue, 6 Oct 2020 22:12:31 +0800
Subject: [PATCH 21/34] Address PR comments

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/data_source.py    | 52 ++++++++++++++++--------------
 sdk/python/feast/feature_table.py  |  8 ++---
 sdk/python/feast/loaders/ingest.py |  1 +
 sdk/python/tests/test_client.py    | 12 -------
 tests/e2e/test-register.py         |  4 ---
 5 files changed, 33 insertions(+), 44 deletions(-)

diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 35954b8ca3..21014c8008 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -352,12 +352,12 @@ class DataSource:
 
     def __init__(
         self,
-        field_mapping: Dict[str, str],
         timestamp_column: str,
+        field_mapping: Optional[Dict[str, str]] = dict(),
         date_partition_column: Optional[str] = "",
     ):
-        self._field_mapping = field_mapping
         self._timestamp_column = timestamp_column
+        self._field_mapping = field_mapping
         self._date_partition_column = date_partition_column
 
     @property
@@ -419,13 +419,13 @@ def to_proto(self) -> DataSourceProto:
 class FileSource(DataSource):
     def __init__(
         self,
-        field_mapping,
-        timestamp_column,
-        file_format,
-        file_url,
-        date_partition_column="",
+        timestamp_column: str,
+        file_format: str,
+        file_url: str,
+        field_mapping: Optional[Dict[str, str]] = dict(),
+        date_partition_column: Optional[str] = "",
     ):
-        super().__init__(field_mapping, timestamp_column, date_partition_column)
+        super().__init__(timestamp_column, field_mapping, date_partition_column)
         self._file_options = FileOptions(file_format=file_format, file_url=file_url)
 
     @property
@@ -468,9 +468,13 @@ def to_proto(self) -> DataSourceProto:
 
 class BigQuerySource(DataSource):
     def __init__(
-        self, field_mapping, timestamp_column, table_ref, date_partition_column=""
+        self,
+        timestamp_column: str,
+        table_ref: str,
+        field_mapping: Optional[Dict[str, str]] = dict(),
+        date_partition_column: Optional[str] = "",
     ):
-        super().__init__(field_mapping, timestamp_column, date_partition_column)
+        super().__init__(timestamp_column, field_mapping, date_partition_column)
         self._bigquery_options = BigQueryOptions(table_ref=table_ref,)
 
     @property
@@ -514,14 +518,14 @@ def to_proto(self) -> DataSourceProto:
 class KafkaSource(DataSource):
     def __init__(
         self,
-        field_mapping,
-        timestamp_column,
-        bootstrap_servers,
-        class_path,
-        topic,
-        date_partition_column="",
+        timestamp_column: str,
+        bootstrap_servers: str,
+        class_path: str,
+        topic: str,
+        field_mapping: Optional[Dict[str, str]] = dict(),
+        date_partition_column: Optional[str] = "",
     ):
-        super().__init__(field_mapping, timestamp_column, date_partition_column)
+        super().__init__(timestamp_column, field_mapping, date_partition_column)
         self._kafka_options = KafkaOptions(
             bootstrap_servers=bootstrap_servers, class_path=class_path, topic=topic
         )
@@ -567,14 +571,14 @@ def to_proto(self) -> DataSourceProto:
 class KinesisSource(DataSource):
     def __init__(
         self,
-        field_mapping,
-        timestamp_column,
-        class_path,
-        region,
-        stream_name,
-        date_partition_column="",
+        timestamp_column: str,
+        class_path: str,
+        region: str,
+        stream_name: str,
+        field_mapping: Optional[Dict[str, str]] = dict(),
+        date_partition_column: Optional[str] = "",
     ):
-        super().__init__(field_mapping, timestamp_column, date_partition_column)
+        super().__init__(timestamp_column, field_mapping, date_partition_column)
         self._kinesis_options = KinesisOptions(
             class_path=class_path, region=region, stream_name=stream_name
         )
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index 1448791627..3312b7845c 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -262,9 +262,9 @@ def from_dict(cls, ft_dict):
         return cls.from_proto(feature_table_proto)
 
     @classmethod
-    def _to_data_source(cls, data_source):
+    def _get_data_source_proto(cls, data_source):
         """
-        Convert dict to data source.
+        Convert data source config in FeatureTable spec to a DataSource proto.
         """
 
         if data_source.file_options.file_format and data_source.file_options.file_url:
@@ -346,12 +346,12 @@ def from_proto(cls, feature_table_proto: FeatureTableProto):
             batch_source=(
                 None
                 if not feature_table_proto.spec.batch_source.ByteSize()
-                else cls._to_data_source(feature_table_proto.spec.batch_source)
+                else cls._get_data_source_proto(feature_table_proto.spec.batch_source)
             ),
             stream_source=(
                 None
                 if not feature_table_proto.spec.stream_source.ByteSize()
-                else cls._to_data_source(feature_table_proto.spec.stream_source)
+                else cls._get_data_source_proto(feature_table_proto.spec.stream_source)
             ),
         )
 
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index f817a27cd3..8cd658c2ef 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -26,6 +26,7 @@ def _check_field_mappings(
     Args:
         column_names: Column names in provided ingestion source
         feature_table_name: Name of FeatureTable
+        feature_table_timestamp_column: Timestamp column of FeatureTable
         feature_table_field_mappings: Field mappings of FeatureTable
     """
 
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index a8c9a71fd4..a5f2198416 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -350,10 +350,6 @@ def test_apply_feature_table_success(self, test_client):
 
         # Create Feature Tables
         batch_source = FileSource(
-            field_mapping={
-                "ride_distance": "ride_distance",
-                "ride_duration": "ride_duration",
-            },
             file_format="parquet",
             file_url="file://feast/*",
             timestamp_column="ts_col",
@@ -361,10 +357,6 @@ def test_apply_feature_table_success(self, test_client):
         )
 
         stream_source = KafkaSource(
-            field_mapping={
-                "ride_distance": "ride_distance",
-                "ride_duration": "ride_duration",
-            },
             bootstrap_servers="localhost:9094",
             class_path="random/path/to/class",
             topic="test_topic",
@@ -457,10 +449,6 @@ def test_ingest(self, mocked_client, mocker):
                         entities=["dev_entity"],
                         batch_source=DataSourceProto(
                             type="BATCH_FILE",
-                            field_mapping={
-                                "dev_feature_float": "dev_feature_float",
-                                "dev_feature_string": "dev_feature_string",
-                            },
                             file_options=DataSourceProto.FileOptions(
                                 file_format="parquet", file_url="file://feast/*"
                             ),
diff --git a/tests/e2e/test-register.py b/tests/e2e/test-register.py
index 665936eb5d..11f64fb854 100644
--- a/tests/e2e/test-register.py
+++ b/tests/e2e/test-register.py
@@ -98,10 +98,6 @@ def alltypes_entity():
 @pytest.fixture
 def alltypes_featuretable():
     batch_source = FileSource(
-        field_mapping={
-            "ride_distance": "ride_distance",
-            "ride_duration": "ride_duration",
-        },
         file_format="parquet",
         file_url="file://feast/*",
         timestamp_column="ts_col",

From ab47c408ddf300f9bd41594b176dbd0ec39a612c Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 09:13:56 +0800
Subject: [PATCH 22/34] Cleanup the way protos are converted

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/client.py        | 11 ++--
 sdk/python/feast/data_source.py   | 67 +++++++++++++++++++++++
 sdk/python/feast/feature_table.py | 91 ++++++++++++++++++-------------
 sdk/python/tests/test_client.py   | 10 ++--
 4 files changed, 127 insertions(+), 52 deletions(-)

diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 7f502afe19..8635940580 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -60,7 +60,7 @@
     ListProjectsResponse,
 )
 from feast.core.CoreService_pb2_grpc import CoreServiceStub
-from feast.core.DataSource_pb2 import DataSource as DataSourceProto
+from feast.data_source import BigQuerySource, FileSource
 from feast.entity import Entity
 from feast.feature_table import FeatureTable
 from feast.grpc import auth as feast_auth
@@ -649,7 +649,7 @@ def ingest(
         # Check 1) Only parquet file format for FeatureTable batch source is supported
         if (
             feature_table.batch_source
-            and feature_table.batch_source.type == DataSourceProto.BATCH_FILE
+            and issubclass(type(feature_table.batch_source), FileSource)
             and "".join(
                 feature_table.batch_source.file_options.file_format.split()
             ).lower()
@@ -678,10 +678,7 @@ def ingest(
             )
 
         try:
-            if (
-                feature_table.batch_source.file_options.file_format
-                and feature_table.batch_source.file_options.file_url
-            ):
+            if issubclass(type(feature_table.batch_source), FileSource):
                 from urllib.parse import urlparse
 
                 file_url = feature_table.batch_source.file_options.file_url[:-1]
@@ -715,7 +712,7 @@ def ingest(
                         uri.hostname,
                         str(uri.path).strip("/") + "/" + file_name,
                     )
-            if feature_table.batch_source.bigquery_options.table_ref:
+            if issubclass(type(feature_table.batch_source), BigQuerySource):
                 from google.cloud import bigquery
 
                 bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 21014c8008..40dfc8e3b2 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -360,6 +360,19 @@ def __init__(
         self._field_mapping = field_mapping
         self._date_partition_column = date_partition_column
 
+    def __eq__(self, other):
+        if not isinstance(other, DataSource):
+            raise TypeError("Comparisons should only involve DataSource class objects.")
+
+        if (
+            self.timestamp_column != other.timestamp_column
+            or self.field_mapping != other.field_mapping
+            or self.date_partition_column != other.date_partition_column
+        ):
+            return False
+
+        return True
+
     @property
     def field_mapping(self):
         """
@@ -428,6 +441,18 @@ def __init__(
         super().__init__(timestamp_column, field_mapping, date_partition_column)
         self._file_options = FileOptions(file_format=file_format, file_url=file_url)
 
+    def __eq__(self, other):
+        if not isinstance(other, FileSource):
+            raise TypeError("Comparisons should only involve FileSource class objects.")
+
+        if (
+            self.file_options.file_url != other.file_options.file_url
+            or self.file_options.file_format != other.file_options.file_format
+        ):
+            return False
+
+        return True
+
     @property
     def file_options(self):
         """
@@ -477,6 +502,17 @@ def __init__(
         super().__init__(timestamp_column, field_mapping, date_partition_column)
         self._bigquery_options = BigQueryOptions(table_ref=table_ref,)
 
+    def __eq__(self, other):
+        if not isinstance(other, BigQuerySource):
+            raise TypeError(
+                "Comparisons should only involve BigQuerySource class objects."
+            )
+
+        if self.bigquery_options.table_ref != other.bigquery_options.table_ref:
+            return False
+
+        return True
+
     @property
     def bigquery_options(self):
         """
@@ -530,6 +566,22 @@ def __init__(
             bootstrap_servers=bootstrap_servers, class_path=class_path, topic=topic
         )
 
+    def __eq__(self, other):
+        if not isinstance(other, KafkaSource):
+            raise TypeError(
+                "Comparisons should only involve KafkaSource class objects."
+            )
+
+        if (
+            self.kafka_options.bootstrap_servers
+            != other.kafka_options.bootstrap_servers
+            or self.kafka_options.class_path != other.kafka_options.class_path
+            or self.kafka_options.topic != other.kafka_options.topic
+        ):
+            return False
+
+        return True
+
     @property
     def kafka_options(self):
         """
@@ -583,6 +635,21 @@ def __init__(
             class_path=class_path, region=region, stream_name=stream_name
         )
 
+    def __eq__(self, other):
+        if not isinstance(other, KinesisSource):
+            raise TypeError(
+                "Comparisons should only involve KinesisSource class objects."
+            )
+
+        if (
+            self.kinesis_options.class_path != other.kinesis_options.class_path
+            or self.kinesis_options.region != other.kinesis_options.region
+            or self.kinesis_options.stream_name != other.kinesis_options.stream_name
+        ):
+            return False
+
+        return True
+
     @property
     def kinesis_options(self):
         """
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index 3312b7845c..3f10003182 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -20,7 +20,6 @@
 from google.protobuf.json_format import MessageToDict, MessageToJson
 from google.protobuf.timestamp_pb2 import Timestamp
 
-from feast.core.DataSource_pb2 import DataSource as DataSourceProto
 from feast.core.FeatureTable_pb2 import FeatureTable as FeatureTableProto
 from feast.core.FeatureTable_pb2 import FeatureTableMeta as FeatureTableMetaProto
 from feast.core.FeatureTable_pb2 import FeatureTableSpec as FeatureTableSpecProto
@@ -53,19 +52,9 @@ def __init__(
     ):
         self._name = name
         self._entities = entities
-        self._features = [
-            feature.to_proto() for feature in features if isinstance(feature, Feature)
-        ]
-        self._batch_source = (
-            batch_source.to_proto()
-            if isinstance(batch_source, DataSource)
-            else batch_source
-        )
-        self._stream_source = (
-            stream_source.to_proto()
-            if isinstance(stream_source, DataSource)
-            else stream_source
-        )
+        self._features = features
+        self._batch_source = batch_source
+        self._stream_source = stream_source
         if labels is None:
             self._labels = dict()  # type: MutableMapping[str, str]
         else:
@@ -110,7 +99,7 @@ def name(self):
         return self._name
 
     @name.setter
-    def name(self, name):
+    def name(self, name: str):
         """
         Sets the name of this feature table
         """
@@ -124,7 +113,7 @@ def entities(self):
         return self._entities
 
     @entities.setter
-    def entities(self, entities):
+    def entities(self, entities: List[str]):
         """
         Sets the entities of this feature table
         """
@@ -138,7 +127,7 @@ def features(self):
         return self._features
 
     @features.setter
-    def features(self, features):
+    def features(self, features: List[Feature]):
         """
         Sets the features of this feature table
         """
@@ -152,7 +141,7 @@ def batch_source(self):
         return self._batch_source
 
     @batch_source.setter
-    def batch_source(self, batch_source: DataSourceProto):
+    def batch_source(self, batch_source: Union[BigQuerySource, FileSource]):
         """
         Sets the batch source of this feature table
         """
@@ -166,7 +155,7 @@ def stream_source(self):
         return self._stream_source
 
     @stream_source.setter
-    def stream_source(self, stream_source: DataSourceProto):
+    def stream_source(self, stream_source: Union[KafkaSource, KinesisSource]):
         """
         Sets the stream source of this feature table
         """
@@ -182,7 +171,7 @@ def max_age(self):
         return self._max_age
 
     @max_age.setter
-    def max_age(self, max_age):
+    def max_age(self, max_age: Duration):
         """
         Set the maximum age for this feature table
         """
@@ -262,56 +251,56 @@ def from_dict(cls, ft_dict):
         return cls.from_proto(feature_table_proto)
 
     @classmethod
-    def _get_data_source_proto(cls, data_source):
+    def _get_data_source(cls, data_source):
         """
-        Convert data source config in FeatureTable spec to a DataSource proto.
+        Convert data source config in FeatureTable spec to a DataSource class object.
         """
 
         if data_source.file_options.file_format and data_source.file_options.file_url:
-            data_source_proto = FileSource(
+            data_source_obj = FileSource(
                 field_mapping=data_source.field_mapping,
                 file_format=data_source.file_options.file_format,
                 file_url=data_source.file_options.file_url,
                 timestamp_column=data_source.timestamp_column,
                 date_partition_column=data_source.date_partition_column,
-            ).to_proto()
+            )
         elif data_source.bigquery_options.table_ref:
-            data_source_proto = BigQuerySource(
+            data_source_obj = BigQuerySource(
                 field_mapping=data_source.field_mapping,
                 table_ref=data_source.bigquery_options.table_ref,
                 timestamp_column=data_source.timestamp_column,
                 date_partition_column=data_source.date_partition_column,
-            ).to_proto()
+            )
         elif (
             data_source.kafka_options.bootstrap_servers
             and data_source.kafka_options.topic
             and data_source.kafka_options.class_path
         ):
-            data_source_proto = KafkaSource(
+            data_source_obj = KafkaSource(
                 field_mapping=data_source.field_mapping,
                 bootstrap_servers=data_source.kafka_options.bootstrap_servers,
                 class_path=data_source.kafka_options.class_path,
                 topic=data_source.kafka_options.topic,
                 timestamp_column=data_source.timestamp_column,
                 date_partition_column=data_source.date_partition_column,
-            ).to_proto()
+            )
         elif (
             data_source.kinesis_options.class_path
             and data_source.kinesis_options.region
             and data_source.kinesis_options.stream_name
         ):
-            data_source_proto = KinesisSource(
+            data_source_obj = KinesisSource(
                 field_mapping=data_source.field_mapping,
                 class_path=data_source.kinesis_options.class_path,
                 region=data_source.kinesis_options.region,
                 stream_name=data_source.kinesis_options.stream_name,
                 timestamp_column=data_source.timestamp_column,
                 date_partition_column=data_source.date_partition_column,
-            ).to_proto()
+            )
         else:
             raise ValueError("Could not identify the source type being added")
 
-        return data_source_proto
+        return data_source_obj
 
     @classmethod
     def from_proto(cls, feature_table_proto: FeatureTableProto):
@@ -346,12 +335,12 @@ def from_proto(cls, feature_table_proto: FeatureTableProto):
             batch_source=(
                 None
                 if not feature_table_proto.spec.batch_source.ByteSize()
-                else cls._get_data_source_proto(feature_table_proto.spec.batch_source)
+                else cls._get_data_source(feature_table_proto.spec.batch_source)
             ),
             stream_source=(
                 None
                 if not feature_table_proto.spec.stream_source.ByteSize()
-                else cls._get_data_source_proto(feature_table_proto.spec.stream_source)
+                else cls._get_data_source(feature_table_proto.spec.stream_source)
             ),
         )
 
@@ -375,11 +364,23 @@ def to_proto(self) -> FeatureTableProto:
         spec = FeatureTableSpecProto(
             name=self.name,
             entities=self.entities,
-            features=self.features,
+            features=[
+                feature.to_proto()
+                for feature in self.features
+                if type(feature) == Feature
+            ],
             labels=self.labels,
             max_age=self.max_age,
-            batch_source=self.batch_source,
-            stream_source=self.stream_source,
+            batch_source=(
+                self.batch_source.to_proto()
+                if issubclass(type(self.batch_source), DataSource)
+                else self.batch_source
+            ),
+            stream_source=(
+                self.stream_source.to_proto()
+                if issubclass(type(self.stream_source), DataSource)
+                else self.stream_source
+            ),
         )
 
         return FeatureTableProto(spec=spec, meta=meta)
@@ -396,11 +397,23 @@ def to_spec_proto(self) -> FeatureTableSpecProto:
         spec = FeatureTableSpecProto(
             name=self.name,
             entities=self.entities,
-            features=self.features,
+            features=[
+                feature.to_proto()
+                for feature in self.features
+                if type(feature) == Feature
+            ],
             labels=self.labels,
             max_age=self.max_age,
-            batch_source=self.batch_source,
-            stream_source=self.stream_source,
+            batch_source=(
+                self.batch_source.to_proto()
+                if issubclass(type(self.batch_source), DataSource)
+                else self.batch_source
+            ),
+            stream_source=(
+                self.stream_source.to_proto()
+                if issubclass(type(self.stream_source), DataSource)
+                else self.stream_source
+            ),
         )
 
         return spec
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index a5f2198416..8daa5db5db 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -387,15 +387,13 @@ def test_apply_feature_table_success(self, test_client):
             len(feature_tables) == 1
             and feature_tables[0].name == "my-feature-table-1"
             and feature_tables[0].features[0].name == "fs1-my-feature-1"
-            and feature_tables[0].features[0].value_type == ValueProto.ValueType.INT64
+            and feature_tables[0].features[0].dtype == ValueType.INT64
             and feature_tables[0].features[1].name == "fs1-my-feature-2"
-            and feature_tables[0].features[1].value_type == ValueProto.ValueType.STRING
+            and feature_tables[0].features[1].dtype == ValueType.STRING
             and feature_tables[0].features[2].name == "fs1-my-feature-3"
-            and feature_tables[0].features[2].value_type
-            == ValueProto.ValueType.STRING_LIST
+            and feature_tables[0].features[2].dtype == ValueType.STRING_LIST
             and feature_tables[0].features[3].name == "fs1-my-feature-4"
-            and feature_tables[0].features[3].value_type
-            == ValueProto.ValueType.BYTES_LIST
+            and feature_tables[0].features[3].dtype == ValueType.BYTES_LIST
             and feature_tables[0].entities[0] == "fs1-my-entity-1"
         )
 

From 40c3e29740cc9fac824e172225acb09e83e33b4b Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 16:06:00 +0800
Subject: [PATCH 23/34] Split ingest function and add more tests

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/client.py                 | 168 +++-------------
 sdk/python/feast/loaders/ingest.py         | 164 +++++++++++++++-
 sdk/python/tests/data/dev_featuretable.csv | 101 ++++++++++
 sdk/python/tests/test_client.py            | 215 ++++++++++++++++++---
 4 files changed, 474 insertions(+), 174 deletions(-)
 create mode 100644 sdk/python/tests/data/dev_featuretable.csv

diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 8635940580..96ed8e7538 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -13,17 +13,11 @@
 # limitations under the License.
 import logging
 import multiprocessing
-import os
 import shutil
-import tempfile
-import time
-from math import ceil
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Union
 
 import grpc
 import pandas as pd
-import pyarrow as pa
-from pyarrow import parquet as pq
 
 from feast.config import Config
 from feast.constants import (
@@ -68,11 +62,14 @@
 from feast.loaders.ingest import (
     BATCH_INGESTION_PRODUCTION_TIMEOUT,
     _check_field_mappings,
-    _partition_by_date,
+    _read_table_from_source,
+    _upload_to_bq_source,
+    _upload_to_file_source,
+    _write_non_partitioned_table_from_source,
+    _write_partitioned_table_from_source,
 )
 from feast.serving.ServingService_pb2 import GetFeastServingInfoRequest
 from feast.serving.ServingService_pb2_grpc import ServingServiceStub
-from feast.staging.storage_client import get_staging_client
 
 _logger = logging.getLogger(__name__)
 
@@ -642,10 +639,6 @@ def ingest(
         else:
             raise Exception(f"FeatureTable, {name} cannot be found.")
 
-        dir_path, dest_path, column_names = _read_table_from_source(
-            source, chunk_size, max_workers
-        )
-
         # Check 1) Only parquet file format for FeatureTable batch source is supported
         if (
             feature_table.batch_source
@@ -660,6 +653,7 @@ def ingest(
                 f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
             )
 
+        pyarrow_table, column_names = _read_table_from_source(source)
         # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
         _check_field_mappings(
             column_names,
@@ -667,77 +661,42 @@ def ingest(
             feature_table.batch_source.timestamp_column,
             feature_table.batch_source.field_mapping,
         )
-        # Partition dataset by date
-        date_partition_dest_path = None
-        if feature_table.batch_source.date_partition_column:
-            date_partition_dest_path = _partition_by_date(
+
+        dir_path = None
+        to_partition = False
+        if feature_table.batch_source.date_partition_column and issubclass(
+            type(feature_table.batch_source), FileSource
+        ):
+            to_partition = True
+            dest_path = _write_partitioned_table_from_source(
                 column_names,
+                pyarrow_table,
                 feature_table.batch_source.date_partition_column,
                 feature_table.batch_source.timestamp_column,
-                dest_path,
+            )
+        else:
+            dir_path, dest_path = _write_non_partitioned_table_from_source(
+                column_names, pyarrow_table, chunk_size, max_workers,
             )
 
         try:
             if issubclass(type(feature_table.batch_source), FileSource):
-                from urllib.parse import urlparse
-
                 file_url = feature_table.batch_source.file_options.file_url[:-1]
-                uri = urlparse(file_url)
-                staging_client = get_staging_client(uri.scheme)
-
-                if date_partition_dest_path is not None:
-                    file_paths = list()
-                    for (dirpath, dirnames, filenames) in os.walk(
-                        date_partition_dest_path
-                    ):
-                        file_paths += [
-                            os.path.join(dirpath, file) for file in filenames
-                        ]
-                    for path in file_paths:
-                        file_name = path.split("/")[-1]
-                        partition_col = path.split("/")[-2]
-                        staging_client.upload_file(
-                            path,
-                            uri.hostname,
-                            str(uri.path).strip("/")
-                            + "/"
-                            + partition_col
-                            + "/"
-                            + file_name,
-                        )
-                else:
-                    file_name = dest_path.split("/")[-1]
-                    staging_client.upload_file(
-                        dest_path,
-                        uri.hostname,
-                        str(uri.path).strip("/") + "/" + file_name,
-                    )
+                _upload_to_file_source(file_url, to_partition, dest_path)
             if issubclass(type(feature_table.batch_source), BigQuerySource):
-                from google.cloud import bigquery
-
                 bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
-                gcp_project, dataset_table = bq_table_ref.split(":")
-
-                client = bigquery.Client(project=gcp_project)
-
-                bq_table_ref = bq_table_ref.replace(":", ".")
-                table = bigquery.table.Table(bq_table_ref)
-
-                job_config = bigquery.LoadJobConfig()
-                job_config.source_format = bigquery.SourceFormat.PARQUET
+                feature_table_timestamp_column = (
+                    feature_table.batch_source.timestamp_column
+                )
 
-                time_partitioning_obj = bigquery.table.TimePartitioning(
-                    field=feature_table.batch_source.timestamp_column
+                _upload_to_bq_source(
+                    bq_table_ref, feature_table_timestamp_column, dest_path
                 )
-                job_config.time_partitioning = time_partitioning_obj
-                with open(dest_path, "rb") as source_file:
-                    client.load_table_from_file(
-                        source_file, table, job_config=job_config
-                    )
         finally:
             # Remove parquet file(s) that were created earlier
             print("Removing temporary file(s)...")
-            shutil.rmtree(dir_path)
+            if dir_path:
+                shutil.rmtree(dir_path)
 
         print("Data has been successfully ingested into FeatureTable batch source.")
 
@@ -751,74 +710,3 @@ def _get_grpc_metadata(self):
         if self._config.getboolean(CONFIG_ENABLE_AUTH_KEY) and self._auth_metadata:
             return self._auth_metadata.get_signed_meta()
         return ()
-
-
-def _read_table_from_source(
-    source: Union[pd.DataFrame, str], chunk_size: int, max_workers: int
-) -> Tuple[str, str, List[str]]:
-    """
-    Infers a data source type (path or Pandas DataFrame) and reads it in as
-    a PyArrow Table.
-
-    The PyArrow Table that is read will be written to a parquet file with row
-    group size determined by the minimum of:
-        * (table.num_rows / max_workers)
-        * chunk_size
-
-    The parquet file that is created will be passed as file path to the
-    multiprocessing pool workers.
-
-    Args:
-        source (Union[pd.DataFrame, str]):
-            Either a string path or Pandas DataFrame.
-
-        chunk_size (int):
-            Number of worker processes to use to encode values.
-
-        max_workers (int):
-            Amount of rows to load and ingest at a time.
-
-    Returns:
-        Tuple[str, str, List[str]]:
-            Tuple containing parent directory path, destination path to
-            parquet file and column names of pyarrow table.
-    """
-
-    # Pandas DataFrame detected
-    if isinstance(source, pd.DataFrame):
-        table = pa.Table.from_pandas(df=source)
-
-    # Inferring a string path
-    elif isinstance(source, str):
-        file_path = source
-        filename, file_ext = os.path.splitext(file_path)
-
-        if ".csv" in file_ext:
-            from pyarrow import csv
-
-            table = csv.read_csv(filename)
-        elif ".json" in file_ext:
-            from pyarrow import json
-
-            table = json.read_json(filename)
-        else:
-            table = pq.read_table(file_path)
-    else:
-        raise ValueError(f"Unknown data source provided for ingestion: {source}")
-
-    # Ensure that PyArrow table is initialised
-    assert isinstance(table, pa.lib.Table)
-
-    # Write table as parquet file with a specified row_group_size
-    dir_path = tempfile.mkdtemp()
-    tmp_table_name = f"{int(time.time())}.parquet"
-    dest_path = f"{dir_path}/{tmp_table_name}"
-    row_group_size = min(ceil(table.num_rows / max_workers), chunk_size)
-    pq.write_table(table=table, where=dest_path, row_group_size=row_group_size)
-
-    column_names = table.column_names
-
-    # Remove table from memory
-    del table
-
-    return dir_path, dest_path, column_names
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index 8cd658c2ef..528e47fa7c 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -1,10 +1,15 @@
+import os
 import tempfile
-from typing import Dict, List
+import time
+from math import ceil
+from typing import Dict, List, Tuple, Union
 
 import pandas as pd
 import pyarrow as pa
 from pyarrow import parquet as pq
 
+from feast.staging.storage_client import get_staging_client
+
 GRPC_CONNECTION_TIMEOUT_DEFAULT = 3  # type: int
 GRPC_CONNECTION_TIMEOUT_APPLY = 300  # type: int
 FEAST_SERVING_URL_ENV_KEY = "FEAST_SERVING_URL"  # type: str
@@ -48,11 +53,42 @@ def _check_field_mappings(
         )
 
 
-def _partition_by_date(
+def _write_non_partitioned_table_from_source(
+    column_names: List[str], table: pa.Table, chunk_size: int, max_workers: int
+) -> Tuple[str, str]:
+    """
+    Partitions dataset by date based on timestamp_column.
+    Assumes date_partition_column is in date format if provided.
+
+    Args:
+        column_names: Column names in provided ingestion source
+        table: PyArrow table of Dataset
+        chunk_size: Number of worker processes to use to encode values.
+        max_workers: Amount of rows to load and ingest at a time.
+    Returns:
+        Tuple[str,str]:
+            Tuple containing parent directory path, destination path to
+            parquet file.
+    """
+    dir_path = tempfile.mkdtemp()
+
+    # Write table as parquet file with a specified row_group_size
+    tmp_table_name = f"{int(time.time())}.parquet"
+    dest_path = f"{dir_path}/{tmp_table_name}"
+    row_group_size = min(ceil(table.num_rows / max_workers), chunk_size)
+    pq.write_table(table=table, where=dest_path, row_group_size=row_group_size)
+
+    # Remove table from memory
+    del table
+
+    return dir_path, dest_path
+
+
+def _write_partitioned_table_from_source(
     column_names: List[str],
+    table: pa.Table,
     feature_table_date_partition_column: str,
     feature_table_timestamp_column: str,
-    file_path: str,
 ) -> str:
     """
     Partitions dataset by date based on timestamp_column.
@@ -60,28 +96,138 @@ def _partition_by_date(
 
     Args:
         column_names: Column names in provided ingestion source
-        feature_table: FeatureTable
-        file_path: File path to existing parquet file that's not yet partitioned
-
+        table: PyArrow table of Dataset
+        feature_table_date_partition_column: Date-partition column of FeatureTable
+        feature_table_timestamp_column: Timestamp column of FeatureTable
     Returns:
         str:
             Root directory which contains date partitioned files.
     """
-    df = pd.read_parquet(file_path)
-    # Date-partitioned dataset temp path
     dir_path = tempfile.mkdtemp()
 
     # Case: date_partition_column is provided and dataset does not contain it
     if feature_table_date_partition_column not in column_names:
+        df = table.to_pandas()
         df[feature_table_date_partition_column] = df[
             feature_table_timestamp_column
         ].dt.date
+        table = pa.Table.from_pandas(df)
 
-    table = pa.Table.from_pandas(df)
     pq.write_to_dataset(
         table=table,
         root_path=dir_path,
         partition_cols=[feature_table_date_partition_column],
     )
 
+    # Remove table from memory
+    del table
+
     return dir_path
+
+
+def _read_table_from_source(
+    source: Union[pd.DataFrame, str]
+) -> Tuple[pa.Table, List[str]]:
+    """
+    Infers a data source type (path or Pandas DataFrame) and reads it in as
+    a PyArrow Table.
+
+    Args:
+        source (Union[pd.DataFrame, str]):
+            Either a string path or Pandas DataFrame.
+
+    Returns:
+        Tuple[pa.Table, List[str]]:
+            Tuple containing PyArrow table of dataset, and column names of PyArrow table.
+    """
+
+    # Pandas DataFrame detected
+    if isinstance(source, pd.DataFrame):
+        table = pa.Table.from_pandas(df=source)
+
+    # Inferring a string path
+    elif isinstance(source, str):
+        file_path = source
+        filename, file_ext = os.path.splitext(file_path)
+
+        if ".csv" in file_ext:
+            from pyarrow import csv
+
+            table = csv.read_csv(filename)
+        elif ".json" in file_ext:
+            from pyarrow import json
+
+            table = json.read_json(filename)
+        else:
+            table = pq.read_table(file_path)
+    else:
+        raise ValueError(f"Unknown data source provided for ingestion: {source}")
+
+    # Ensure that PyArrow table is initialised
+    assert isinstance(table, pa.lib.Table)
+
+    column_names = table.column_names
+
+    return table, column_names
+
+
+def _upload_to_file_source(file_url: str, to_partition: bool, dest_path: str) -> None:
+    """
+    Uploads data into a FileSource. Currently supports GCS, S3 and Local FS.
+
+    Args:
+        file_url: file url of FileSource defined for FeatureTable
+    """
+    from urllib.parse import urlparse
+
+    uri = urlparse(file_url)
+    staging_client = get_staging_client(uri.scheme)
+
+    if to_partition:
+        file_paths = list()
+        for (dirpath, dirnames, filenames) in os.walk(dest_path):
+            file_paths += [os.path.join(dirpath, file) for file in filenames]
+        for path in file_paths:
+            file_name = path.split("/")[-1]
+            partition_col = path.split("/")[-2]
+            staging_client.upload_file(
+                path,
+                uri.hostname,
+                str(uri.path).strip("/") + "/" + partition_col + "/" + file_name,
+            )
+    else:
+        file_name = dest_path.split("/")[-1]
+        staging_client.upload_file(
+            dest_path, uri.hostname, str(uri.path).strip("/") + "/" + file_name,
+        )
+
+
+def _upload_to_bq_source(
+    bq_table_ref: str, feature_table_timestamp_column: str, dest_path: str
+) -> None:
+    """
+    Uploads data into a BigQuerySource.
+
+    Args:
+        bq_table_ref: BigQuery table reference of format "project:dataset_name.table_name" defined for FeatureTable
+        feature_table_timestamp_column: Timestamp column of FeatureTable
+        dest_path: File path to existing parquet file
+    """
+    from google.cloud import bigquery
+
+    gcp_project, _ = bq_table_ref.split(":")
+
+    bq_client = bigquery.Client(project=gcp_project)
+
+    bq_table_ref = bq_table_ref.replace(":", ".")
+    table = bigquery.table.Table(bq_table_ref)
+
+    job_config = bigquery.LoadJobConfig()
+    job_config.source_format = bigquery.SourceFormat.PARQUET
+
+    time_partitioning_obj = bigquery.table.TimePartitioning(
+        field=feature_table_timestamp_column
+    )
+    job_config.time_partitioning = time_partitioning_obj
+    with open(dest_path, "rb") as source_file:
+        bq_client.load_table_from_file(source_file, table, job_config=job_config)
diff --git a/sdk/python/tests/data/dev_featuretable.csv b/sdk/python/tests/data/dev_featuretable.csv
new file mode 100644
index 0000000000..61fc785299
--- /dev/null
+++ b/sdk/python/tests/data/dev_featuretable.csv
@@ -0,0 +1,101 @@
+datetime,datetime_col,dev_feature_float,dev_feature_string
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,0.0,feat_0
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,1.0,feat_1
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,2.0,feat_2
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,3.0,feat_3
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,4.0,feat_4
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,5.0,feat_5
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,6.0,feat_6
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,7.0,feat_7
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,8.0,feat_8
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,9.0,feat_9
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,10.0,feat_10
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,11.0,feat_11
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,12.0,feat_12
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,13.0,feat_13
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,14.0,feat_14
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,15.0,feat_15
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,16.0,feat_16
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,17.0,feat_17
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,18.0,feat_18
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,19.0,feat_19
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,20.0,feat_20
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,21.0,feat_21
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,22.0,feat_22
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,23.0,feat_23
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,24.0,feat_24
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,25.0,feat_25
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,26.0,feat_26
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,27.0,feat_27
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,28.0,feat_28
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,29.0,feat_29
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,30.0,feat_30
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,31.0,feat_31
+2020-10-07 06:39:35.998951+00:00,2020-06-30 06:39:35.998951+00:00,32.0,feat_32
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,33.0,feat_33
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,34.0,feat_34
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,35.0,feat_35
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,36.0,feat_36
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,37.0,feat_37
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,38.0,feat_38
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,39.0,feat_39
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,40.0,feat_40
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,41.0,feat_41
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,42.0,feat_42
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,43.0,feat_43
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,44.0,feat_44
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,45.0,feat_45
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,46.0,feat_46
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,47.0,feat_47
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,48.0,feat_48
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,49.0,feat_49
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,50.0,feat_50
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,51.0,feat_51
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,52.0,feat_52
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,53.0,feat_53
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,54.0,feat_54
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,55.0,feat_55
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,56.0,feat_56
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,57.0,feat_57
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,58.0,feat_58
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,59.0,feat_59
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,60.0,feat_60
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,61.0,feat_61
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,62.0,feat_62
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,63.0,feat_63
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,64.0,feat_64
+2020-10-06 06:39:35.998951+00:00,2020-06-29 06:39:35.998951+00:00,65.0,feat_65
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,66.0,feat_66
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,67.0,feat_67
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,68.0,feat_68
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,69.0,feat_69
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,70.0,feat_70
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,71.0,feat_71
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,72.0,feat_72
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,73.0,feat_73
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,74.0,feat_74
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,75.0,feat_75
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,76.0,feat_76
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,77.0,feat_77
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,78.0,feat_78
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,79.0,feat_79
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,80.0,feat_80
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,81.0,feat_81
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,82.0,feat_82
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,83.0,feat_83
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,84.0,feat_84
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,85.0,feat_85
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,86.0,feat_86
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,87.0,feat_87
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,88.0,feat_88
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,89.0,feat_89
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,90.0,feat_90
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,91.0,feat_91
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,92.0,feat_92
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,93.0,feat_93
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,94.0,feat_94
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,95.0,feat_95
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,96.0,feat_96
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,97.0,feat_97
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,98.0,feat_98
+2020-10-05 06:39:35.998951+00:00,2020-06-28 06:39:35.998951+00:00,99.0,feat_99
diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index 8daa5db5db..a9f6c628e5 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -251,6 +251,43 @@ def client(self, core_server, serving_server):
             serving_url=f"localhost:{serving_server}",
         )
 
+    @pytest.fixture
+    def partitioned_df(self):
+        # Partitioned DataFrame
+        N_ROWS = 100
+        time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+        final_offset = (
+            [time_offset] * 33
+            + [time_offset - timedelta(days=1)] * 33
+            + [time_offset - timedelta(days=2)] * 34
+        )
+        final_part_offset = (
+            [time_offset - timedelta(days=99)] * 33
+            + [time_offset - timedelta(days=100)] * 33
+            + [time_offset - timedelta(days=101)] * 34
+        )
+        return pd.DataFrame(
+            {
+                "datetime": final_offset,
+                "datetime_col": final_part_offset,
+                "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
+                "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
+            }
+        )
+
+    @pytest.fixture
+    def non_partitioned_df(self):
+        # Non-Partitioned DataFrame
+        N_ROWS = 100
+        time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+        return pd.DataFrame(
+            {
+                "datetime": [time_offset] * N_ROWS,
+                "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
+                "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
+            }
+        )
+
     @pytest.mark.parametrize(
         "mocked_client",
         [lazy_fixture("mock_client"), lazy_fixture("secure_mock_client")],
@@ -400,30 +437,147 @@ def test_apply_feature_table_success(self, test_client):
     @pytest.mark.parametrize(
         "mocked_client", [lazy_fixture("mock_client")],
     )
-    def test_ingest(self, mocked_client, mocker):
+    def test_ingest_dataframe_partition(self, mocked_client, mocker, partitioned_df):
+        """
+        Test ingestion with local FileSource, using DataFrame.
+        Partition column stated but not provided in Dataset.
+        """
         mocked_client._core_service_stub = Core.CoreServiceStub(
             grpc.insecure_channel("")
         )
 
-        N_ROWS = 100
-        time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-        final_offset = (
-            [time_offset] * 33
-            + [time_offset - timedelta(days=1)] * 33
-            + [time_offset - timedelta(days=2)] * 34
+        mocker.patch.object(
+            mocked_client._core_service_stub,
+            "GetFeatureTable",
+            return_value=GetFeatureTableResponse(
+                table=FeatureTableProto(
+                    spec=FeatureTableSpecProto(
+                        name="ingest_featuretable",
+                        max_age=Duration(seconds=3600),
+                        features=[
+                            FeatureSpecProto(
+                                name="dev_feature_float",
+                                value_type=ValueProto.ValueType.FLOAT,
+                            ),
+                            FeatureSpecProto(
+                                name="dev_feature_string",
+                                value_type=ValueProto.ValueType.STRING,
+                            ),
+                        ],
+                        entities=["dev_entity"],
+                        batch_source=DataSourceProto(
+                            type="BATCH_FILE",
+                            file_options=DataSourceProto.FileOptions(
+                                file_format="parquet", file_url="file://feast/*"
+                            ),
+                            timestamp_column="datetime",
+                            date_partition_column="datetime_col",
+                        ),
+                    ),
+                    meta=FeatureTableMetaProto(),
+                )
+            ),
         )
-        final_part_offset = (
-            [time_offset - timedelta(days=99)] * 33
-            + [time_offset - timedelta(days=100)] * 33
-            + [time_offset - timedelta(days=101)] * 34
+
+        mocked_client.set_project("my_project")
+        ft = mocked_client.get_feature_table("ingest_featuretable")
+        mocked_client.ingest(ft, partitioned_df, timeout=600)
+
+        dest_fpath = os.path.join("feast/")
+        pq_df = pq.read_table(dest_fpath).to_pandas()
+
+        # Format Dataframes before comparing them
+        partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
+        pq_df.sort_values(by=["dev_feature_float"], inplace=True)
+        pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
+        partitioned_df = partitioned_df.reindex(sorted(partitioned_df.columns), axis=1)
+        partitioned_df.reset_index(drop=True, inplace=True)
+        pq_df.reset_index(drop=True, inplace=True)
+        pq_df["datetime_col"] = pd.to_datetime(pq_df.datetime_col).dt.tz_convert("UTC")
+
+        assert_frame_equal(partitioned_df, pq_df)
+
+    @pytest.mark.parametrize(
+        "mocked_client", [lazy_fixture("mock_client")],
+    )
+    def test_ingest_dataframe_no_partition(
+        self, mocked_client, mocker, non_partitioned_df
+    ):
+        """
+        Test ingestion with local FileSource, using DataFrame.
+        Partition column not stated.
+        """
+        mocked_client._core_service_stub = Core.CoreServiceStub(
+            grpc.insecure_channel("")
         )
-        ft_df = pd.DataFrame(
-            {
-                "datetime": final_offset,
-                "datetime_col": final_part_offset,
-                "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
-                "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
-            }
+
+        mocker.patch.object(
+            mocked_client._core_service_stub,
+            "GetFeatureTable",
+            return_value=GetFeatureTableResponse(
+                table=FeatureTableProto(
+                    spec=FeatureTableSpecProto(
+                        name="ingest_featuretable",
+                        max_age=Duration(seconds=3600),
+                        features=[
+                            FeatureSpecProto(
+                                name="dev_feature_float",
+                                value_type=ValueProto.ValueType.FLOAT,
+                            ),
+                            FeatureSpecProto(
+                                name="dev_feature_string",
+                                value_type=ValueProto.ValueType.STRING,
+                            ),
+                        ],
+                        entities=["dev_entity"],
+                        batch_source=DataSourceProto(
+                            type="BATCH_FILE",
+                            file_options=DataSourceProto.FileOptions(
+                                file_format="parquet", file_url="file://feast2/*"
+                            ),
+                            timestamp_column="datetime",
+                        ),
+                    ),
+                    meta=FeatureTableMetaProto(),
+                )
+            ),
+        )
+
+        mocked_client.set_project("my_project")
+        ft = mocked_client.get_feature_table("ingest_featuretable")
+        mocked_client.ingest(ft, non_partitioned_df, timeout=600)
+
+        # Since not partitioning, we're only looking for single file
+        dest_fpath = os.path.join("feast2/")
+        single_file = [
+            f
+            for f in os.listdir(dest_fpath)
+            if os.path.isfile(os.path.join(dest_fpath, f))
+        ][0]
+        pq_df = pq.read_table(dest_fpath + single_file).to_pandas()
+
+        # Format Dataframes before comparing them
+        non_partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
+        pq_df.sort_values(by=["dev_feature_float"], inplace=True)
+        pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
+        non_partitioned_df = non_partitioned_df.reindex(
+            sorted(non_partitioned_df.columns), axis=1
+        )
+        non_partitioned_df.reset_index(drop=True, inplace=True)
+        pq_df.reset_index(drop=True, inplace=True)
+
+        assert_frame_equal(non_partitioned_df, pq_df)
+
+    @pytest.mark.parametrize(
+        "mocked_client", [lazy_fixture("mock_client")],
+    )
+    def test_ingest_csv(self, mocked_client, mocker):
+        """
+        Test ingestion with local FileSource, using CSV file.
+        Partition column is provided.
+        """
+        mocked_client._core_service_stub = Core.CoreServiceStub(
+            grpc.insecure_channel("")
         )
 
         mocker.patch.object(
@@ -448,7 +602,7 @@ def test_ingest(self, mocked_client, mocker):
                         batch_source=DataSourceProto(
                             type="BATCH_FILE",
                             file_options=DataSourceProto.FileOptions(
-                                file_format="parquet", file_url="file://feast/*"
+                                file_format="parquet", file_url="file://feast3/*"
                             ),
                             timestamp_column="datetime",
                             date_partition_column="datetime_col",
@@ -459,22 +613,33 @@ def test_ingest(self, mocked_client, mocker):
             ),
         )
 
+        partitioned_df = pd.read_csv(
+            os.path.join(
+                os.path.dirname(os.path.realpath(__file__)),
+                "./data/dev_featuretable.csv",
+            )
+        )
+
         mocked_client.set_project("my_project")
         ft = mocked_client.get_feature_table("ingest_featuretable")
-        mocked_client.ingest(ft, ft_df, timeout=600)
+        mocked_client.ingest(ft, partitioned_df, timeout=600)
 
-        dest_fpath = os.path.join("feast/")
+        dest_fpath = os.path.join("feast3/")
         pq_df = pq.read_table(dest_fpath).to_pandas()
 
-        ft_df.sort_values(by=["dev_feature_float"], inplace=True)
+        # Format Dataframes before comparing them
+        partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
         pq_df.sort_values(by=["dev_feature_float"], inplace=True)
         pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
-        ft_df = ft_df.reindex(sorted(ft_df.columns), axis=1)
-        ft_df.reset_index(drop=True, inplace=True)
+        partitioned_df = partitioned_df.reindex(sorted(partitioned_df.columns), axis=1)
+        partitioned_df.reset_index(drop=True, inplace=True)
         pq_df.reset_index(drop=True, inplace=True)
+        partitioned_df["datetime_col"] = pd.to_datetime(
+            partitioned_df.datetime_col
+        ).dt.tz_convert("UTC")
         pq_df["datetime_col"] = pd.to_datetime(pq_df.datetime_col).dt.tz_convert("UTC")
 
-        assert_frame_equal(ft_df, pq_df)
+        assert_frame_equal(partitioned_df, pq_df)
 
     @patch("grpc.channel_ready_future")
     def test_secure_channel_creation_with_secure_client(

From c9506c3c4364d16969f6a119a5fc32fd8078e27a Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 16:07:28 +0800
Subject: [PATCH 24/34] Cleanup FeatureTable and DataSource

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/__init__.py      |  2 -
 sdk/python/feast/data_source.py   | 44 ++++++++++++++++++++
 sdk/python/feast/feature_table.py | 69 +++----------------------------
 3 files changed, 50 insertions(+), 65 deletions(-)

diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py
index adf2aaf181..5ac3658d18 100644
--- a/sdk/python/feast/__init__.py
+++ b/sdk/python/feast/__init__.py
@@ -3,7 +3,6 @@
 from .client import Client
 from .data_source import (
     BigQuerySource,
-    DataSource,
     FileSource,
     KafkaSource,
     KinesisSource,
@@ -23,7 +22,6 @@
 __all__ = [
     "Client",
     "Entity",
-    "DataSource",
     "BigQuerySource",
     "FileSource",
     "KafkaSource",
diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 40dfc8e3b2..bf6a0c0157 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -686,3 +686,47 @@ def to_proto(self) -> DataSourceProto:
         data_source_proto.date_partition_column = self.date_partition_column
 
         return data_source_proto
+
+
+def _get_data_source(data_source):
+    """
+    Convert data source config in FeatureTable spec to a DataSource class object.
+    """
+
+    if issubclass(type(data_source), FileSource):
+        data_source_obj = FileSource(
+            field_mapping=data_source.field_mapping,
+            file_format=data_source.file_options.file_format,
+            file_url=data_source.file_options.file_url,
+            timestamp_column=data_source.timestamp_column,
+            date_partition_column=data_source.date_partition_column,
+        )
+    elif issubclass(type(data_source), BigQuerySource):
+        data_source_obj = BigQuerySource(
+            field_mapping=data_source.field_mapping,
+            table_ref=data_source.bigquery_options.table_ref,
+            timestamp_column=data_source.timestamp_column,
+            date_partition_column=data_source.date_partition_column,
+        )
+    elif issubclass(type(data_source), KafkaSource):
+        data_source_obj = KafkaSource(
+            field_mapping=data_source.field_mapping,
+            bootstrap_servers=data_source.kafka_options.bootstrap_servers,
+            class_path=data_source.kafka_options.class_path,
+            topic=data_source.kafka_options.topic,
+            timestamp_column=data_source.timestamp_column,
+            date_partition_column=data_source.date_partition_column,
+        )
+    elif issubclass(type(data_source), KinesisSource):
+        data_source_obj = KinesisSource(
+            field_mapping=data_source.field_mapping,
+            class_path=data_source.kinesis_options.class_path,
+            region=data_source.kinesis_options.region,
+            stream_name=data_source.kinesis_options.stream_name,
+            timestamp_column=data_source.timestamp_column,
+            date_partition_column=data_source.date_partition_column,
+        )
+    else:
+        raise ValueError("Could not identify the source type being added")
+
+    return data_source_obj
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index 3f10003182..692894e077 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -29,6 +29,7 @@
     FileSource,
     KafkaSource,
     KinesisSource,
+    _get_data_source,
 )
 from feast.feature import Feature
 from feast.loaders import yaml as feast_yaml
@@ -80,7 +81,7 @@ def __eq__(self, other):
         ):
             return False
 
-        if sorted(self.entities) != sorted(other.entities):
+        if self.entities != other.entities:
             return False
         if self.features != other.features:
             return False
@@ -250,58 +251,6 @@ def from_dict(cls, ft_dict):
 
         return cls.from_proto(feature_table_proto)
 
-    @classmethod
-    def _get_data_source(cls, data_source):
-        """
-        Convert data source config in FeatureTable spec to a DataSource class object.
-        """
-
-        if data_source.file_options.file_format and data_source.file_options.file_url:
-            data_source_obj = FileSource(
-                field_mapping=data_source.field_mapping,
-                file_format=data_source.file_options.file_format,
-                file_url=data_source.file_options.file_url,
-                timestamp_column=data_source.timestamp_column,
-                date_partition_column=data_source.date_partition_column,
-            )
-        elif data_source.bigquery_options.table_ref:
-            data_source_obj = BigQuerySource(
-                field_mapping=data_source.field_mapping,
-                table_ref=data_source.bigquery_options.table_ref,
-                timestamp_column=data_source.timestamp_column,
-                date_partition_column=data_source.date_partition_column,
-            )
-        elif (
-            data_source.kafka_options.bootstrap_servers
-            and data_source.kafka_options.topic
-            and data_source.kafka_options.class_path
-        ):
-            data_source_obj = KafkaSource(
-                field_mapping=data_source.field_mapping,
-                bootstrap_servers=data_source.kafka_options.bootstrap_servers,
-                class_path=data_source.kafka_options.class_path,
-                topic=data_source.kafka_options.topic,
-                timestamp_column=data_source.timestamp_column,
-                date_partition_column=data_source.date_partition_column,
-            )
-        elif (
-            data_source.kinesis_options.class_path
-            and data_source.kinesis_options.region
-            and data_source.kinesis_options.stream_name
-        ):
-            data_source_obj = KinesisSource(
-                field_mapping=data_source.field_mapping,
-                class_path=data_source.kinesis_options.class_path,
-                region=data_source.kinesis_options.region,
-                stream_name=data_source.kinesis_options.stream_name,
-                timestamp_column=data_source.timestamp_column,
-                date_partition_column=data_source.date_partition_column,
-            )
-        else:
-            raise ValueError("Could not identify the source type being added")
-
-        return data_source_obj
-
     @classmethod
     def from_proto(cls, feature_table_proto: FeatureTableProto):
         """
@@ -332,15 +281,11 @@ def from_proto(cls, feature_table_proto: FeatureTableProto):
                 and feature_table_proto.spec.max_age.nanos == 0
                 else feature_table_proto.spec.max_age
             ),
-            batch_source=(
-                None
-                if not feature_table_proto.spec.batch_source.ByteSize()
-                else cls._get_data_source(feature_table_proto.spec.batch_source)
-            ),
+            batch_source=_get_data_source(feature_table_proto.spec.batch_source),
             stream_source=(
                 None
                 if not feature_table_proto.spec.stream_source.ByteSize()
-                else cls._get_data_source(feature_table_proto.spec.stream_source)
+                else _get_data_source(feature_table_proto.spec.stream_source)
             ),
         )
 
@@ -365,9 +310,8 @@ def to_proto(self) -> FeatureTableProto:
             name=self.name,
             entities=self.entities,
             features=[
-                feature.to_proto()
+                feature.to_proto() if type(feature) == Feature else feature
                 for feature in self.features
-                if type(feature) == Feature
             ],
             labels=self.labels,
             max_age=self.max_age,
@@ -398,9 +342,8 @@ def to_spec_proto(self) -> FeatureTableSpecProto:
             name=self.name,
             entities=self.entities,
             features=[
-                feature.to_proto()
+                feature.to_proto() if type(feature) == Feature else feature
                 for feature in self.features
-                if type(feature) == Feature
             ],
             labels=self.labels,
             max_age=self.max_age,

From f1fa274a664aadea5cf438395cd4446db90b5267 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 16:20:49 +0800
Subject: [PATCH 25/34] Some fixes

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/data_source.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index bf6a0c0157..346a5b883f 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -693,7 +693,7 @@ def _get_data_source(data_source):
     Convert data source config in FeatureTable spec to a DataSource class object.
     """
 
-    if issubclass(type(data_source), FileSource):
+    if data_source.file_options.file_format and data_source.file_options.file_url:
         data_source_obj = FileSource(
             field_mapping=data_source.field_mapping,
             file_format=data_source.file_options.file_format,
@@ -701,14 +701,18 @@ def _get_data_source(data_source):
             timestamp_column=data_source.timestamp_column,
             date_partition_column=data_source.date_partition_column,
         )
-    elif issubclass(type(data_source), BigQuerySource):
+    elif data_source.bigquery_options.table_ref:
         data_source_obj = BigQuerySource(
             field_mapping=data_source.field_mapping,
             table_ref=data_source.bigquery_options.table_ref,
             timestamp_column=data_source.timestamp_column,
             date_partition_column=data_source.date_partition_column,
         )
-    elif issubclass(type(data_source), KafkaSource):
+    elif (
+        data_source.kafka_options.bootstrap_servers
+        and data_source.kafka_options.topic
+        and data_source.kafka_options.class_path
+    ):
         data_source_obj = KafkaSource(
             field_mapping=data_source.field_mapping,
             bootstrap_servers=data_source.kafka_options.bootstrap_servers,
@@ -717,7 +721,11 @@ def _get_data_source(data_source):
             timestamp_column=data_source.timestamp_column,
             date_partition_column=data_source.date_partition_column,
         )
-    elif issubclass(type(data_source), KinesisSource):
+    elif (
+        data_source.kinesis_options.class_path
+        and data_source.kinesis_options.region
+        and data_source.kinesis_options.stream_name
+    ):
         data_source_obj = KinesisSource(
             field_mapping=data_source.field_mapping,
             class_path=data_source.kinesis_options.class_path,

From d43bb44e53d934f3fac0fce2bbd9610e96157f20 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 17:48:22 +0800
Subject: [PATCH 26/34] Add BQ source test

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 tests/e2e/test-register.py | 56 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/test-register.py b/tests/e2e/test-register.py
index 11f64fb854..99ecd1b339 100644
--- a/tests/e2e/test-register.py
+++ b/tests/e2e/test-register.py
@@ -1,11 +1,15 @@
 import os
 import uuid
+from datetime import datetime
 
+import numpy as np
+import pandas as pd
 import pytest
+import pytz
 from google.protobuf.duration_pb2 import Duration
 
 from feast.client import Client
-from feast.data_source import FileSource, KafkaSource
+from feast.data_source import BigQuerySource, FileSource, KafkaSource
 from feast.entity import Entity
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
@@ -13,6 +17,7 @@
 
 DIR_PATH = os.path.dirname(os.path.realpath(__file__))
 PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
+SUFFIX = str(int(datetime.now().timestamp()))
 
 
 @pytest.fixture(scope="module")
@@ -85,6 +90,36 @@ def basic_featuretable():
     )
 
 
+@pytest.fixture
+def bq_dataset():
+    N_ROWS = 100
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    return pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
+            "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
+        }
+    )
+
+
+@pytest.fixture
+def bq_featuretable():
+    batch_source = BigQuerySource(
+        table_ref=f"kf-feast:feaste2e.table{SUFFIX}", timestamp_column="datetime",
+    )
+    return FeatureTable(
+        name="basic_featuretable",
+        entities=["driver_id", "customer_id"],
+        features=[
+            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
+            Feature(name="dev_feature_string", dtype=ValueType.STRING),
+        ],
+        max_age=Duration(seconds=3600),
+        batch_source=batch_source,
+    )
+
+
 @pytest.fixture
 def alltypes_entity():
     return Entity(
@@ -194,3 +229,22 @@ def test_get_list_alltypes(
         ft for ft in client.list_feature_tables() if ft.name == "alltypes"
     ][0]
     assert actual_list_feature_table == alltypes_featuretable
+
+
+def test_ingest(
+    client: Client, bq_featuretable: FeatureTable, bq_dataset: pd.DataFrame
+):
+    # ApplyFeatureTable
+    client.apply_feature_table(bq_featuretable)
+    client.ingest(bq_featuretable, bq_dataset, timeout=600)
+
+    from google.cloud import bigquery
+
+    bq_client = bigquery.Client(project="kf-feast")
+    query_string = f"SELECT COUNT(*) FROM `kf-feast.feaste2e.table{SUFFIX}`"
+
+    job = bq_client.query(query_string)
+    results = job.result()
+
+    for row in results:
+        assert row[0] == 100

From 33bd9dba56b29498d99ff9791770e03b723e27cb Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 18:26:57 +0800
Subject: [PATCH 27/34] Revert "Add BQ source test"

This reverts commit d567937eaf80190cde59128c19af4644c810e7d9.

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 tests/e2e/test-register.py | 56 +-------------------------------------
 1 file changed, 1 insertion(+), 55 deletions(-)

diff --git a/tests/e2e/test-register.py b/tests/e2e/test-register.py
index 99ecd1b339..11f64fb854 100644
--- a/tests/e2e/test-register.py
+++ b/tests/e2e/test-register.py
@@ -1,15 +1,11 @@
 import os
 import uuid
-from datetime import datetime
 
-import numpy as np
-import pandas as pd
 import pytest
-import pytz
 from google.protobuf.duration_pb2 import Duration
 
 from feast.client import Client
-from feast.data_source import BigQuerySource, FileSource, KafkaSource
+from feast.data_source import FileSource, KafkaSource
 from feast.entity import Entity
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
@@ -17,7 +13,6 @@
 
 DIR_PATH = os.path.dirname(os.path.realpath(__file__))
 PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
-SUFFIX = str(int(datetime.now().timestamp()))
 
 
 @pytest.fixture(scope="module")
@@ -90,36 +85,6 @@ def basic_featuretable():
     )
 
 
-@pytest.fixture
-def bq_dataset():
-    N_ROWS = 100
-    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
-    return pd.DataFrame(
-        {
-            "datetime": [time_offset] * N_ROWS,
-            "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
-            "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
-        }
-    )
-
-
-@pytest.fixture
-def bq_featuretable():
-    batch_source = BigQuerySource(
-        table_ref=f"kf-feast:feaste2e.table{SUFFIX}", timestamp_column="datetime",
-    )
-    return FeatureTable(
-        name="basic_featuretable",
-        entities=["driver_id", "customer_id"],
-        features=[
-            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
-            Feature(name="dev_feature_string", dtype=ValueType.STRING),
-        ],
-        max_age=Duration(seconds=3600),
-        batch_source=batch_source,
-    )
-
-
 @pytest.fixture
 def alltypes_entity():
     return Entity(
@@ -229,22 +194,3 @@ def test_get_list_alltypes(
         ft for ft in client.list_feature_tables() if ft.name == "alltypes"
     ][0]
     assert actual_list_feature_table == alltypes_featuretable
-
-
-def test_ingest(
-    client: Client, bq_featuretable: FeatureTable, bq_dataset: pd.DataFrame
-):
-    # ApplyFeatureTable
-    client.apply_feature_table(bq_featuretable)
-    client.ingest(bq_featuretable, bq_dataset, timeout=600)
-
-    from google.cloud import bigquery
-
-    bq_client = bigquery.Client(project="kf-feast")
-    query_string = f"SELECT COUNT(*) FROM `kf-feast.feaste2e.table{SUFFIX}`"
-
-    job = bq_client.query(query_string)
-    results = job.result()
-
-    for row in results:
-        assert row[0] == 100

From 99821c5dedb3f691bf3ad665cf0fd27c1cc89b97 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 22:14:48 +0800
Subject: [PATCH 28/34] Add BQ source test

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 tests/e2e/test-register.py | 77 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/test-register.py b/tests/e2e/test-register.py
index 11f64fb854..3581ae8891 100644
--- a/tests/e2e/test-register.py
+++ b/tests/e2e/test-register.py
@@ -1,11 +1,17 @@
 import os
+import time
 import uuid
+from datetime import datetime
 
+import numpy as np
+import pandas as pd
 import pytest
+import pytz
 from google.protobuf.duration_pb2 import Duration
+from pandas.testing import assert_frame_equal
 
 from feast.client import Client
-from feast.data_source import FileSource, KafkaSource
+from feast.data_source import BigQuerySource, FileSource, KafkaSource
 from feast.entity import Entity
 from feast.feature import Feature
 from feast.feature_table import FeatureTable
@@ -13,6 +19,7 @@
 
 DIR_PATH = os.path.dirname(os.path.realpath(__file__))
 PROJECT_NAME = "basic_" + uuid.uuid4().hex.upper()[0:6]
+SUFFIX = str(int(datetime.now().timestamp()))
 
 
 @pytest.fixture(scope="module")
@@ -27,6 +34,11 @@ def client(pytestconfig):
     return client
 
 
+@pytest.fixture
+def bq_table_id():
+    return f"kf-feast:feaste2e.table{SUFFIX}"
+
+
 @pytest.fixture
 def customer_entity():
     return Entity(
@@ -85,6 +97,34 @@ def basic_featuretable():
     )
 
 
+@pytest.fixture
+def bq_dataset():
+    N_ROWS = 100
+    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
+    return pd.DataFrame(
+        {
+            "datetime": [time_offset] * N_ROWS,
+            "dev_feature_float": [np.float(row) for row in range(N_ROWS)],
+            "dev_feature_string": ["feat_" + str(row) for row in range(N_ROWS)],
+        }
+    )
+
+
+@pytest.fixture
+def bq_featuretable(bq_table_id):
+    batch_source = BigQuerySource(table_ref=bq_table_id, timestamp_column="datetime",)
+    return FeatureTable(
+        name="basic_featuretable",
+        entities=["driver_id", "customer_id"],
+        features=[
+            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
+            Feature(name="dev_feature_string", dtype=ValueType.STRING),
+        ],
+        max_age=Duration(seconds=3600),
+        batch_source=batch_source,
+    )
+
+
 @pytest.fixture
 def alltypes_entity():
     return Entity(
@@ -194,3 +234,38 @@ def test_get_list_alltypes(
         ft for ft in client.list_feature_tables() if ft.name == "alltypes"
     ][0]
     assert actual_list_feature_table == alltypes_featuretable
+
+
+def test_ingest(
+    client: Client,
+    customer_entity: Entity,
+    driver_entity: Entity,
+    bq_featuretable: FeatureTable,
+    bq_dataset: pd.DataFrame,
+    bq_table_id: str,
+):
+    gcp_project, _ = bq_table_id.split(":")
+    bq_table_id = bq_table_id.replace(":", ".")
+
+    # ApplyEntity
+    client.apply_entity(customer_entity)
+    client.apply_entity(driver_entity)
+
+    # ApplyFeatureTable
+    client.apply_feature_table(bq_featuretable)
+    client.ingest(bq_featuretable, bq_dataset, timeout=120)
+
+    # Give time to allow data to propagate to BQ table
+    time.sleep(15)
+
+    from google.cloud import bigquery
+
+    bq_client = bigquery.Client(project=gcp_project)
+    query_string = f"SELECT * FROM `{bq_table_id}`"
+
+    job = bq_client.query(query_string)
+    query_df = job.to_dataframe()
+
+    assert_frame_equal(query_df, bq_dataset)
+
+    bq_client.delete_table(bq_table_id, not_found_ok=True)

From 5367a480201acf3ae87d4072330fc2f27210cb30 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 22:32:47 +0800
Subject: [PATCH 29/34] Update perms for bq test

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 .prow/config.yaml                              | 4 ++--
 infra/scripts/test-end-to-end-redis-cluster.sh | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.prow/config.yaml b/.prow/config.yaml
index 39c275603d..4155adad91 100644
--- a/.prow/config.yaml
+++ b/.prow/config.yaml
@@ -147,7 +147,7 @@ presubmits:
     spec:
       containers:
       - image: maven:3.6-jdk-11
-        command: ["infra/scripts/test-end-to-end.sh"]
+        command: ["infra/scripts/test-end-to-end.sh", "True"]
         resources:
           requests:
             cpu: "6"
@@ -199,7 +199,7 @@ presubmits:
     spec:
       containers:
       - image: maven:3.6-jdk-11
-        command: ["infra/scripts/test-end-to-end-redis-cluster.sh"]
+        command: ["infra/scripts/test-end-to-end-redis-cluster.sh", "True"]
         resources:
           requests:
             cpu: "6"
diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index 0e5aa5879a..ffb4c21aca 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -2,6 +2,8 @@
 
 set -e
 set -o pipefail
+[[ $1 == "True" ]] && ENABLE_AUTH="true" || ENABLE_AUTH="false"
+echo "Authenication enabled : ${ENABLE_AUTH}"
 
 test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/gcloud/service-account.json"
 test -z ${SKIP_BUILD_JARS} && SKIP_BUILD_JARS="false"

From d8ade3765835c1cc86aaf9ea9d648b861e0f47ad Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Wed, 7 Oct 2020 23:16:59 +0800
Subject: [PATCH 30/34] Revert "Update perms for bq test"

This reverts commit 7e74e9069f97af9c0e108aba8f4bd1197ba5c3ed.

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 .prow/config.yaml                             | 40 ++++---------------
 .../scripts/test-end-to-end-redis-cluster.sh  |  5 +--
 2 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/.prow/config.yaml b/.prow/config.yaml
index 4155adad91..381fcccea8 100644
--- a/.prow/config.yaml
+++ b/.prow/config.yaml
@@ -147,23 +147,11 @@ presubmits:
     spec:
       containers:
       - image: maven:3.6-jdk-11
-        command: ["infra/scripts/test-end-to-end.sh", "True"]
+        command: ["infra/scripts/test-end-to-end.sh"]
         resources:
           requests:
             cpu: "6"
             memory: "6144Mi"
-        env:
-        - name: GOOGLE_APPLICATION_CREDENTIALS
-          value: /etc/gcloud/service-account.json
-        volumeMounts:
-        - mountPath: /etc/gcloud/service-account.json
-          name: service-account
-          readOnly: true
-          subPath: service-account.json
-      volumes:
-      - name: service-account
-        secret:
-          secretName: feast-service-account
     skip_branches:
     - ^v0\.(3|4)-branch$
 
@@ -198,26 +186,14 @@ presubmits:
     always_run: true
     spec:
       containers:
-      - image: maven:3.6-jdk-11
-        command: ["infra/scripts/test-end-to-end-redis-cluster.sh", "True"]
-        resources:
-          requests:
-            cpu: "6"
-            memory: "6144Mi"
-        env:
-        - name: GOOGLE_APPLICATION_CREDENTIALS
-          value: /etc/gcloud/service-account.json
-        volumeMounts:
-        - mountPath: /etc/gcloud/service-account.json
-          name: service-account
-          readOnly: true
-          subPath: service-account.json
-      volumes:
-      - name: service-account
-        secret:
-          secretName: feast-service-account
+        - image: maven:3.6-jdk-11
+          command: ["infra/scripts/test-end-to-end-redis-cluster.sh"]
+          resources:
+            requests:
+              cpu: "6"
+              memory: "6144Mi"
     skip_branches:
-    - ^v0\.(3|4)-branch$
+      - ^v0\.(3|4)-branch$
 
   - name: test-end-to-end-java-8
     decorate: true
diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index ffb4c21aca..12e7dc8b45 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -2,10 +2,8 @@
 
 set -e
 set -o pipefail
-[[ $1 == "True" ]] && ENABLE_AUTH="true" || ENABLE_AUTH="false"
-echo "Authenication enabled : ${ENABLE_AUTH}"
 
-test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/gcloud/service-account.json"
+test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account/service-account.json"
 test -z ${SKIP_BUILD_JARS} && SKIP_BUILD_JARS="false"
 test -z ${GOOGLE_CLOUD_PROJECT} && GOOGLE_CLOUD_PROJECT="kf-feast"
 test -z ${TEMP_BUCKET} && TEMP_BUCKET="feast-templocation-kf-feast"
@@ -32,7 +30,6 @@ This script will run end-to-end tests for Feast Core and Online Serving.
 source ${SCRIPTS_DIR}/setup-common-functions.sh
 
 install_test_tools
-install_gcloud_sdk
 install_and_start_local_redis_cluster
 install_and_start_local_postgres
 install_and_start_local_zookeeper_and_kafka

From 20cd4d93b538af4ddd003468c3487879d9e3afed Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Thu, 8 Oct 2020 09:39:57 +0800
Subject: [PATCH 31/34] Cleanup datasource

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/data_source.py   | 109 +++++++++++++++---------------
 sdk/python/feast/feature_table.py |   5 +-
 2 files changed, 55 insertions(+), 59 deletions(-)

diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 346a5b883f..04f4752c37 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -415,12 +415,57 @@ def date_partition_column(self, date_partition_column):
         """
         self._date_partition_column = date_partition_column
 
-    @classmethod
-    def from_proto(cls, data_source_proto: DataSourceProto):
+    @staticmethod
+    def from_proto(data_source):
         """
-        Creates a DataSource from a protobuf representation of a data source
+        Convert data source config in FeatureTable spec to a DataSource class object.
         """
-        raise NotImplementedError
+
+        if data_source.file_options.file_format and data_source.file_options.file_url:
+            data_source_obj = FileSource(
+                field_mapping=data_source.field_mapping,
+                file_format=data_source.file_options.file_format,
+                file_url=data_source.file_options.file_url,
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            )
+        elif data_source.bigquery_options.table_ref:
+            data_source_obj = BigQuerySource(
+                field_mapping=data_source.field_mapping,
+                table_ref=data_source.bigquery_options.table_ref,
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            )
+        elif (
+            data_source.kafka_options.bootstrap_servers
+            and data_source.kafka_options.topic
+            and data_source.kafka_options.class_path
+        ):
+            data_source_obj = KafkaSource(
+                field_mapping=data_source.field_mapping,
+                bootstrap_servers=data_source.kafka_options.bootstrap_servers,
+                class_path=data_source.kafka_options.class_path,
+                topic=data_source.kafka_options.topic,
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            )
+        elif (
+            data_source.kinesis_options.class_path
+            and data_source.kinesis_options.region
+            and data_source.kinesis_options.stream_name
+        ):
+            data_source_obj = KinesisSource(
+                field_mapping=data_source.field_mapping,
+                class_path=data_source.kinesis_options.class_path,
+                region=data_source.kinesis_options.region,
+                stream_name=data_source.kinesis_options.stream_name,
+                timestamp_column=data_source.timestamp_column,
+                date_partition_column=data_source.date_partition_column,
+            )
+        else:
+            raise ValueError("Could not identify the source type being added")
+
+        return data_source_obj
 
     def to_proto(self) -> DataSourceProto:
         """
@@ -467,6 +512,7 @@ def file_options(self, file_options):
         """
         self._file_options = file_options
 
+    @classmethod
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
@@ -527,6 +573,7 @@ def bigquery_options(self, bigquery_options):
         """
         self._bigquery_options = bigquery_options
 
+    @classmethod
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
@@ -596,6 +643,7 @@ def kafka_options(self, kafka_options):
         """
         self._kafka_options = kafka_options
 
+    @classmethod
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
@@ -664,6 +712,7 @@ def kinesis_options(self, kinesis_options):
         """
         self._kinesis_options = kinesis_options
 
+    @classmethod
     def from_proto(cls, data_source_proto):
 
         data_source = cls(
@@ -686,55 +735,3 @@ def to_proto(self) -> DataSourceProto:
         data_source_proto.date_partition_column = self.date_partition_column
 
         return data_source_proto
-
-
-def _get_data_source(data_source):
-    """
-    Convert data source config in FeatureTable spec to a DataSource class object.
-    """
-
-    if data_source.file_options.file_format and data_source.file_options.file_url:
-        data_source_obj = FileSource(
-            field_mapping=data_source.field_mapping,
-            file_format=data_source.file_options.file_format,
-            file_url=data_source.file_options.file_url,
-            timestamp_column=data_source.timestamp_column,
-            date_partition_column=data_source.date_partition_column,
-        )
-    elif data_source.bigquery_options.table_ref:
-        data_source_obj = BigQuerySource(
-            field_mapping=data_source.field_mapping,
-            table_ref=data_source.bigquery_options.table_ref,
-            timestamp_column=data_source.timestamp_column,
-            date_partition_column=data_source.date_partition_column,
-        )
-    elif (
-        data_source.kafka_options.bootstrap_servers
-        and data_source.kafka_options.topic
-        and data_source.kafka_options.class_path
-    ):
-        data_source_obj = KafkaSource(
-            field_mapping=data_source.field_mapping,
-            bootstrap_servers=data_source.kafka_options.bootstrap_servers,
-            class_path=data_source.kafka_options.class_path,
-            topic=data_source.kafka_options.topic,
-            timestamp_column=data_source.timestamp_column,
-            date_partition_column=data_source.date_partition_column,
-        )
-    elif (
-        data_source.kinesis_options.class_path
-        and data_source.kinesis_options.region
-        and data_source.kinesis_options.stream_name
-    ):
-        data_source_obj = KinesisSource(
-            field_mapping=data_source.field_mapping,
-            class_path=data_source.kinesis_options.class_path,
-            region=data_source.kinesis_options.region,
-            stream_name=data_source.kinesis_options.stream_name,
-            timestamp_column=data_source.timestamp_column,
-            date_partition_column=data_source.date_partition_column,
-        )
-    else:
-        raise ValueError("Could not identify the source type being added")
-
-    return data_source_obj
diff --git a/sdk/python/feast/feature_table.py b/sdk/python/feast/feature_table.py
index 692894e077..b1401ec97a 100644
--- a/sdk/python/feast/feature_table.py
+++ b/sdk/python/feast/feature_table.py
@@ -29,7 +29,6 @@
     FileSource,
     KafkaSource,
     KinesisSource,
-    _get_data_source,
 )
 from feast.feature import Feature
 from feast.loaders import yaml as feast_yaml
@@ -281,11 +280,11 @@ def from_proto(cls, feature_table_proto: FeatureTableProto):
                 and feature_table_proto.spec.max_age.nanos == 0
                 else feature_table_proto.spec.max_age
             ),
-            batch_source=_get_data_source(feature_table_proto.spec.batch_source),
+            batch_source=DataSource.from_proto(feature_table_proto.spec.batch_source),
             stream_source=(
                 None
                 if not feature_table_proto.spec.stream_source.ByteSize()
-                else _get_data_source(feature_table_proto.spec.stream_source)
+                else DataSource.from_proto(feature_table_proto.spec.stream_source)
             ),
         )
 

From 404cc364c3ce8b302bafa3bf9c65c4fa6a3a2d11 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Thu, 8 Oct 2020 10:23:27 +0800
Subject: [PATCH 32/34] Renaming and optimize file search loop

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/feast/client.py         | 11 ++++++-----
 sdk/python/feast/loaders/ingest.py | 12 ++++++------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py
index 96ed8e7538..0b418e303d 100644
--- a/sdk/python/feast/client.py
+++ b/sdk/python/feast/client.py
@@ -663,11 +663,12 @@ def ingest(
         )
 
         dir_path = None
-        to_partition = False
-        if feature_table.batch_source.date_partition_column and issubclass(
-            type(feature_table.batch_source), FileSource
+        with_partitions = False
+        if (
+            issubclass(type(feature_table.batch_source), FileSource)
+            and feature_table.batch_source.date_partition_column
         ):
-            to_partition = True
+            with_partitions = True
             dest_path = _write_partitioned_table_from_source(
                 column_names,
                 pyarrow_table,
@@ -682,7 +683,7 @@ def ingest(
         try:
             if issubclass(type(feature_table.batch_source), FileSource):
                 file_url = feature_table.batch_source.file_options.file_url[:-1]
-                _upload_to_file_source(file_url, to_partition, dest_path)
+                _upload_to_file_source(file_url, with_partitions, dest_path)
             if issubclass(type(feature_table.batch_source), BigQuerySource):
                 bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                 feature_table_timestamp_column = (
diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py
index 528e47fa7c..581958f5c0 100644
--- a/sdk/python/feast/loaders/ingest.py
+++ b/sdk/python/feast/loaders/ingest.py
@@ -1,3 +1,4 @@
+import glob
 import os
 import tempfile
 import time
@@ -171,7 +172,9 @@ def _read_table_from_source(
     return table, column_names
 
 
-def _upload_to_file_source(file_url: str, to_partition: bool, dest_path: str) -> None:
+def _upload_to_file_source(
+    file_url: str, with_partitions: bool, dest_path: str
+) -> None:
     """
     Uploads data into a FileSource. Currently supports GCS, S3 and Local FS.
 
@@ -183,11 +186,8 @@ def _upload_to_file_source(file_url: str, to_partition: bool, dest_path: str) ->
     uri = urlparse(file_url)
     staging_client = get_staging_client(uri.scheme)
 
-    if to_partition:
-        file_paths = list()
-        for (dirpath, dirnames, filenames) in os.walk(dest_path):
-            file_paths += [os.path.join(dirpath, file) for file in filenames]
-        for path in file_paths:
+    if with_partitions:
+        for path in glob.glob(os.path.join(dest_path, "**/*")):
             file_name = path.split("/")[-1]
             partition_col = path.split("/")[-2]
             staging_client.upload_file(

From 6990f16ef0f535ec9dd128369aa98704deb72aa4 Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Thu, 8 Oct 2020 11:02:00 +0800
Subject: [PATCH 33/34] Remove duplicated code in pytest

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 sdk/python/tests/test_client.py | 185 +++++++++++++-------------------
 1 file changed, 77 insertions(+), 108 deletions(-)

diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py
index a9f6c628e5..05e598ec34 100644
--- a/sdk/python/tests/test_client.py
+++ b/sdk/python/tests/test_client.py
@@ -16,6 +16,7 @@
 import socket
 from concurrent import futures
 from datetime import datetime, timedelta
+from typing import Tuple
 from unittest import mock
 
 import grpc
@@ -449,33 +450,8 @@ def test_ingest_dataframe_partition(self, mocked_client, mocker, partitioned_df)
         mocker.patch.object(
             mocked_client._core_service_stub,
             "GetFeatureTable",
-            return_value=GetFeatureTableResponse(
-                table=FeatureTableProto(
-                    spec=FeatureTableSpecProto(
-                        name="ingest_featuretable",
-                        max_age=Duration(seconds=3600),
-                        features=[
-                            FeatureSpecProto(
-                                name="dev_feature_float",
-                                value_type=ValueProto.ValueType.FLOAT,
-                            ),
-                            FeatureSpecProto(
-                                name="dev_feature_string",
-                                value_type=ValueProto.ValueType.STRING,
-                            ),
-                        ],
-                        entities=["dev_entity"],
-                        batch_source=DataSourceProto(
-                            type="BATCH_FILE",
-                            file_options=DataSourceProto.FileOptions(
-                                file_format="parquet", file_url="file://feast/*"
-                            ),
-                            timestamp_column="datetime",
-                            date_partition_column="datetime_col",
-                        ),
-                    ),
-                    meta=FeatureTableMetaProto(),
-                )
+            return_value=_ingest_test_getfeaturetable_mocked_resp(
+                "file://feast/*", "datetime_col"
             ),
         )
 
@@ -486,14 +462,9 @@ def test_ingest_dataframe_partition(self, mocked_client, mocker, partitioned_df)
         dest_fpath = os.path.join("feast/")
         pq_df = pq.read_table(dest_fpath).to_pandas()
 
-        # Format Dataframes before comparing them
-        partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
-        pq_df.sort_values(by=["dev_feature_float"], inplace=True)
-        pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
-        partitioned_df = partitioned_df.reindex(sorted(partitioned_df.columns), axis=1)
-        partitioned_df.reset_index(drop=True, inplace=True)
-        pq_df.reset_index(drop=True, inplace=True)
-        pq_df["datetime_col"] = pd.to_datetime(pq_df.datetime_col).dt.tz_convert("UTC")
+        partitioned_df, pq_df = _ingest_test_format_dataframes(
+            partitioned_df, pq_df, True
+        )
 
         assert_frame_equal(partitioned_df, pq_df)
 
@@ -514,33 +485,7 @@ def test_ingest_dataframe_no_partition(
         mocker.patch.object(
             mocked_client._core_service_stub,
             "GetFeatureTable",
-            return_value=GetFeatureTableResponse(
-                table=FeatureTableProto(
-                    spec=FeatureTableSpecProto(
-                        name="ingest_featuretable",
-                        max_age=Duration(seconds=3600),
-                        features=[
-                            FeatureSpecProto(
-                                name="dev_feature_float",
-                                value_type=ValueProto.ValueType.FLOAT,
-                            ),
-                            FeatureSpecProto(
-                                name="dev_feature_string",
-                                value_type=ValueProto.ValueType.STRING,
-                            ),
-                        ],
-                        entities=["dev_entity"],
-                        batch_source=DataSourceProto(
-                            type="BATCH_FILE",
-                            file_options=DataSourceProto.FileOptions(
-                                file_format="parquet", file_url="file://feast2/*"
-                            ),
-                            timestamp_column="datetime",
-                        ),
-                    ),
-                    meta=FeatureTableMetaProto(),
-                )
-            ),
+            return_value=_ingest_test_getfeaturetable_mocked_resp("file://feast2/*"),
         )
 
         mocked_client.set_project("my_project")
@@ -556,15 +501,9 @@ def test_ingest_dataframe_no_partition(
         ][0]
         pq_df = pq.read_table(dest_fpath + single_file).to_pandas()
 
-        # Format Dataframes before comparing them
-        non_partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
-        pq_df.sort_values(by=["dev_feature_float"], inplace=True)
-        pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
-        non_partitioned_df = non_partitioned_df.reindex(
-            sorted(non_partitioned_df.columns), axis=1
+        non_partitioned_df, pq_df = _ingest_test_format_dataframes(
+            non_partitioned_df, pq_df
         )
-        non_partitioned_df.reset_index(drop=True, inplace=True)
-        pq_df.reset_index(drop=True, inplace=True)
 
         assert_frame_equal(non_partitioned_df, pq_df)
 
@@ -583,33 +522,8 @@ def test_ingest_csv(self, mocked_client, mocker):
         mocker.patch.object(
             mocked_client._core_service_stub,
             "GetFeatureTable",
-            return_value=GetFeatureTableResponse(
-                table=FeatureTableProto(
-                    spec=FeatureTableSpecProto(
-                        name="ingest_featuretable",
-                        max_age=Duration(seconds=3600),
-                        features=[
-                            FeatureSpecProto(
-                                name="dev_feature_float",
-                                value_type=ValueProto.ValueType.FLOAT,
-                            ),
-                            FeatureSpecProto(
-                                name="dev_feature_string",
-                                value_type=ValueProto.ValueType.STRING,
-                            ),
-                        ],
-                        entities=["dev_entity"],
-                        batch_source=DataSourceProto(
-                            type="BATCH_FILE",
-                            file_options=DataSourceProto.FileOptions(
-                                file_format="parquet", file_url="file://feast3/*"
-                            ),
-                            timestamp_column="datetime",
-                            date_partition_column="datetime_col",
-                        ),
-                    ),
-                    meta=FeatureTableMetaProto(),
-                )
+            return_value=_ingest_test_getfeaturetable_mocked_resp(
+                "file://feast3/*", "datetime_col"
             ),
         )
 
@@ -627,17 +541,9 @@ def test_ingest_csv(self, mocked_client, mocker):
         dest_fpath = os.path.join("feast3/")
         pq_df = pq.read_table(dest_fpath).to_pandas()
 
-        # Format Dataframes before comparing them
-        partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
-        pq_df.sort_values(by=["dev_feature_float"], inplace=True)
-        pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
-        partitioned_df = partitioned_df.reindex(sorted(partitioned_df.columns), axis=1)
-        partitioned_df.reset_index(drop=True, inplace=True)
-        pq_df.reset_index(drop=True, inplace=True)
-        partitioned_df["datetime_col"] = pd.to_datetime(
-            partitioned_df.datetime_col
-        ).dt.tz_convert("UTC")
-        pq_df["datetime_col"] = pd.to_datetime(pq_df.datetime_col).dt.tz_convert("UTC")
+        partitioned_df, pq_df = _ingest_test_format_dataframes(
+            partitioned_df, pq_df, True
+        )
 
         assert_frame_equal(partitioned_df, pq_df)
 
@@ -710,3 +616,66 @@ def test_no_auth_sent_when_auth_disabled(
     ):
         client = Client(core_url=f"localhost:{insecure_core_server_that_blocks_auth}")
         client.list_feature_tables()
+
+
+def _ingest_test_getfeaturetable_mocked_resp(
+    file_url: str, date_partition_col: str = None
+):
+    return GetFeatureTableResponse(
+        table=FeatureTableProto(
+            spec=FeatureTableSpecProto(
+                name="ingest_featuretable",
+                max_age=Duration(seconds=3600),
+                features=[
+                    FeatureSpecProto(
+                        name="dev_feature_float", value_type=ValueProto.ValueType.FLOAT,
+                    ),
+                    FeatureSpecProto(
+                        name="dev_feature_string",
+                        value_type=ValueProto.ValueType.STRING,
+                    ),
+                ],
+                entities=["dev_entity"],
+                batch_source=DataSourceProto(
+                    file_options=DataSourceProto.FileOptions(
+                        file_format="parquet", file_url=file_url
+                    ),
+                    timestamp_column="datetime",
+                    date_partition_column=date_partition_col
+                    if date_partition_col is not None
+                    else None,
+                ),
+            ),
+            meta=FeatureTableMetaProto(),
+        )
+    )
+
+
+def _ingest_test_format_dataframes(
+    partitioned_df: pd.DataFrame, pq_df: pd.DataFrame, with_partitions: bool = False
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    Format Dataframes before comparing them through assertion.
+
+    Args:
+        partitioned_df: DataFrame from pytest fixture
+        pq_df: DataFrame from parquet files
+        with_partitions: Flag to indicate if data has been partitioned
+
+    Returns:
+        Formatted DataFrames for comparison
+    """
+    partitioned_df.sort_values(by=["dev_feature_float"], inplace=True)
+    pq_df.sort_values(by=["dev_feature_float"], inplace=True)
+    pq_df = pq_df.reindex(sorted(pq_df.columns), axis=1)
+    partitioned_df = partitioned_df.reindex(sorted(partitioned_df.columns), axis=1)
+    partitioned_df.reset_index(drop=True, inplace=True)
+    pq_df.reset_index(drop=True, inplace=True)
+
+    if with_partitions:
+        partitioned_df["datetime_col"] = pd.to_datetime(
+            partitioned_df.datetime_col
+        ).dt.tz_convert("UTC")
+        pq_df["datetime_col"] = pd.to_datetime(pq_df.datetime_col).dt.tz_convert("UTC")
+
+    return partitioned_df, pq_df

From b6ee24bded0f0a055f3129485ec77d87d26dd9ce Mon Sep 17 00:00:00 2001
From: Terence <terencelimxp@gmail.com>
Date: Thu, 8 Oct 2020 11:10:45 +0800
Subject: [PATCH 34/34] Fix prow config

Signed-off-by: Terence <terencelimxp@gmail.com>
---
 .prow/config.yaml                             | 38 +++++++++++++++----
 .../scripts/test-end-to-end-redis-cluster.sh  |  3 +-
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/.prow/config.yaml b/.prow/config.yaml
index 381fcccea8..39c275603d 100644
--- a/.prow/config.yaml
+++ b/.prow/config.yaml
@@ -152,6 +152,18 @@ presubmits:
           requests:
             cpu: "6"
             memory: "6144Mi"
+        env:
+        - name: GOOGLE_APPLICATION_CREDENTIALS
+          value: /etc/gcloud/service-account.json
+        volumeMounts:
+        - mountPath: /etc/gcloud/service-account.json
+          name: service-account
+          readOnly: true
+          subPath: service-account.json
+      volumes:
+      - name: service-account
+        secret:
+          secretName: feast-service-account
     skip_branches:
     - ^v0\.(3|4)-branch$
 
@@ -186,14 +198,26 @@ presubmits:
     always_run: true
     spec:
       containers:
-        - image: maven:3.6-jdk-11
-          command: ["infra/scripts/test-end-to-end-redis-cluster.sh"]
-          resources:
-            requests:
-              cpu: "6"
-              memory: "6144Mi"
+      - image: maven:3.6-jdk-11
+        command: ["infra/scripts/test-end-to-end-redis-cluster.sh"]
+        resources:
+          requests:
+            cpu: "6"
+            memory: "6144Mi"
+        env:
+        - name: GOOGLE_APPLICATION_CREDENTIALS
+          value: /etc/gcloud/service-account.json
+        volumeMounts:
+        - mountPath: /etc/gcloud/service-account.json
+          name: service-account
+          readOnly: true
+          subPath: service-account.json
+      volumes:
+      - name: service-account
+        secret:
+          secretName: feast-service-account
     skip_branches:
-      - ^v0\.(3|4)-branch$
+    - ^v0\.(3|4)-branch$
 
   - name: test-end-to-end-java-8
     decorate: true
diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh
index 12e7dc8b45..0e5aa5879a 100755
--- a/infra/scripts/test-end-to-end-redis-cluster.sh
+++ b/infra/scripts/test-end-to-end-redis-cluster.sh
@@ -3,7 +3,7 @@
 set -e
 set -o pipefail
 
-test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/service-account/service-account.json"
+test -z ${GOOGLE_APPLICATION_CREDENTIALS} && GOOGLE_APPLICATION_CREDENTIALS="/etc/gcloud/service-account.json"
 test -z ${SKIP_BUILD_JARS} && SKIP_BUILD_JARS="false"
 test -z ${GOOGLE_CLOUD_PROJECT} && GOOGLE_CLOUD_PROJECT="kf-feast"
 test -z ${TEMP_BUCKET} && TEMP_BUCKET="feast-templocation-kf-feast"
@@ -30,6 +30,7 @@ This script will run end-to-end tests for Feast Core and Online Serving.
 source ${SCRIPTS_DIR}/setup-common-functions.sh
 
 install_test_tools
+install_gcloud_sdk
 install_and_start_local_redis_cluster
 install_and_start_local_postgres
 install_and_start_local_zookeeper_and_kafka