Skip to content

Commit

Permalink
fix: Fix incorrect on demand feature view diffing and improve Java te…
Browse files Browse the repository at this point in the history
…sts (#3074)

* fix: Fix ODFV bug

Signed-off-by: Danny Chiao <[email protected]>
  • Loading branch information
adchia committed Aug 15, 2022
1 parent 8e6a6b1 commit dd46d45
Show file tree
Hide file tree
Showing 13 changed files with 362 additions and 32 deletions.
88 changes: 85 additions & 3 deletions .github/workflows/java_master_only.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,52 @@ jobs:
java-version: '11'
java-package: jdk
architecture: x64
- name: Setup Python (to call feast apply)
uses: actions/setup-python@v2
id: setup-python
with:
python-version: 3.8
architecture: x64
- name: Setup Go
id: setup-go
uses: actions/setup-go@v2
with:
go-version: 1.18.0
- name: Upgrade pip version
run: |
pip install --upgrade "pip>=21.3.1,<22.1"
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
uses: actions/cache@v2
with:
path: |
${{ steps.pip-cache.outputs.dir }}
/opt/hostedtoolcache/Python
/Users/runner/hostedtoolcache/Python
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
restore-keys: |
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
- name: Install pip-tools
run: pip install pip-tools
- name: Install apache-arrow on ubuntu
run: |
sudo apt update
sudo apt install -y -V ca-certificates lsb-release wget
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt update
sudo apt install -y -V libarrow-dev
- name: Install Python dependencies
run: make install-python-ci-dependencies
- uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-it-maven-
- uses: actions/cache@v2
with:
path: ~/.m2/repository
Expand All @@ -91,10 +137,46 @@ jobs:
java-version: '11'
java-package: jdk
architecture: x64
- uses: actions/setup-python@v2
- name: Setup Python (to call feast apply)
uses: actions/setup-python@v2
id: setup-python
with:
python-version: 3.8
architecture: x64
- name: Setup Go
id: setup-go
uses: actions/setup-go@v2
with:
python-version: '3.8'
architecture: 'x64'
go-version: 1.18.0
- name: Upgrade pip version
run: |
pip install --upgrade "pip>=21.3.1,<22.1"
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
uses: actions/cache@v2
with:
path: |
${{ steps.pip-cache.outputs.dir }}
/opt/hostedtoolcache/Python
/Users/runner/hostedtoolcache/Python
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
restore-keys: |
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
- name: Install pip-tools
run: pip install pip-tools
- name: Install apache-arrow on ubuntu
run: |
sudo apt update
sudo apt install -y -V ca-certificates lsb-release wget
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt update
sudo apt install -y -V libarrow-dev
- name: Install Python dependencies
run: make install-python-ci-dependencies
- uses: actions/cache@v2
with:
path: ~/.m2/repository
Expand Down
86 changes: 86 additions & 0 deletions .github/workflows/java_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,52 @@ jobs:
java-version: '11'
java-package: jdk
architecture: x64
- name: Setup Python (to call feast apply)
uses: actions/setup-python@v2
id: setup-python
with:
python-version: 3.8
architecture: x64
- name: Setup Go
id: setup-go
uses: actions/setup-go@v2
with:
go-version: 1.18.0
- name: Upgrade pip version
run: |
pip install --upgrade "pip>=21.3.1,<22.1"
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
uses: actions/cache@v2
with:
path: |
${{ steps.pip-cache.outputs.dir }}
/opt/hostedtoolcache/Python
/Users/runner/hostedtoolcache/Python
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
restore-keys: |
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
- name: Install pip-tools
run: pip install pip-tools
- name: Install apache-arrow on ubuntu
run: |
sudo apt update
sudo apt install -y -V ca-certificates lsb-release wget
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt update
sudo apt install -y -V libarrow-dev
- name: Install Python dependencies
run: make install-python-ci-dependencies
- uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-it-maven-
- uses: actions/cache@v2
with:
path: ~/.m2/repository
Expand Down Expand Up @@ -98,6 +144,46 @@ jobs:
aws-region: us-west-2
- name: Use AWS CLI
run: aws sts get-caller-identity
- name: Setup Python (to call feast apply)
uses: actions/setup-python@v2
id: setup-python
with:
python-version: 3.8
architecture: x64
- name: Setup Go
id: setup-go
uses: actions/setup-go@v2
with:
go-version: 1.18.0
- name: Upgrade pip version
run: |
pip install --upgrade "pip>=21.3.1,<22.1"
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
uses: actions/cache@v2
with:
path: |
${{ steps.pip-cache.outputs.dir }}
/opt/hostedtoolcache/Python
/Users/runner/hostedtoolcache/Python
key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
restore-keys: |
${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
- name: Install pip-tools
run: pip install pip-tools
- name: Install apache-arrow on ubuntu
run: |
sudo apt update
sudo apt install -y -V ca-certificates lsb-release wget
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt update
sudo apt install -y -V libarrow-dev
- name: Install Python dependencies
run: make install-python-ci-dependencies
- name: Run integration tests
run: make test-java-integration
- name: Save report
Expand Down
1 change: 1 addition & 0 deletions java/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ mvn spotless:apply
#### Project Makefile
The Project Makefile provides useful shorthands for common development tasks:

> Note: These commands rely on a local version of `feast` (Python) to be installed
Run all Unit tests:
```
Expand Down
2 changes: 2 additions & 0 deletions java/serving/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,6 @@ Unit &amp; Integration Tests can be used to verify functionality:
mvn test -pl serving --also-make
# run integration tests
mvn verify -pl serving --also-make
# run integration tests with debugger
mvn -Dmaven.failsafe.debug verify -pl serving --also-make
```
22 changes: 22 additions & 0 deletions java/serving/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,28 @@
</configuration>
</plugin>

<!-- Call feast apply before running integration tests -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<configuration>
<executable>python</executable>
<workingDirectory>src/test/resources/docker-compose/feast10/</workingDirectory>
<arguments>
<argument>setup_it.py</argument>
</arguments>
</configuration>
<id>feast_test_apply</id>
<phase>process-test-resources</phase>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,17 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:

entity = Entity(name="entity", value_type=ValueType.STRING,)

benchmark_feature_views = [
FeatureView(
benchmark_feature_views = []
for i in range(25):
fv = FeatureView(
name=f"feature_view_{i}",
entities=[entity],
ttl=Duration(seconds=86400),
schema=[Field(name=f"feature_{10 * i + j}", dtype=Int64) for j in range(10)],
online=True,
source=generated_data_source,
)
for i in range(25)
]
benchmark_feature_views.append(fv)

benchmark_feature_service = FeatureService(
name=f"benchmark_feature_service", features=benchmark_feature_views,
Expand Down
Binary file not shown.
86 changes: 86 additions & 0 deletions java/serving/src/test/resources/docker-compose/feast10/setup_it.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from pathlib import Path
from feast.repo_config import load_repo_config
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

from definitions import (
benchmark_feature_service,
benchmark_feature_views,
driver,
driver_hourly_stats_view,
entity,
transformed_conv_rate,
)

from feast import FeatureStore


def setup_data():
start = datetime.now() - timedelta(days=10)

df = pd.DataFrame()
df["driver_id"] = np.arange(1000, 1010)
df["created"] = datetime.now()
df["conv_rate"] = np.arange(0, 1, 0.1)
df["acc_rate"] = np.arange(0.5, 1, 0.05)
df["avg_daily_trips"] = np.arange(0, 1000, 100)

# some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status
df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(
lambda days: timedelta(days=days)
)

# Store data in parquet files. Parquet is convenient for local development mode. For
# production, you can use your favorite DWH, such as BigQuery. See Feast documentation
# for more info.
df.to_parquet("driver_stats.parquet")

# For Benchmarks
# Please read more in Feast RFC-031
# (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit)
# about this benchmark setup
def generate_data(
num_rows: int, num_features: int, destination: str
) -> pd.DataFrame:
features = [f"feature_{i}" for i in range(num_features)]
columns = ["entity", "event_timestamp"] + features
df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns)
df["event_timestamp"] = datetime.utcnow()
for column in features:
df[column] = np.random.randint(1, num_rows, num_rows)

df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype(
pd.StringDtype()
)

df.to_parquet(destination)

generate_data(10**3, 250, "benchmark_data.parquet")


def main():
print("Running setup_it.py")

setup_data()
existing_repo_config = load_repo_config(Path("."))

# Update to default online store since otherwise, relies on Dockerized Redis service
fs = FeatureStore(config=existing_repo_config.copy(update={"online_store": {}}))
fs.apply(
[
driver_hourly_stats_view,
transformed_conv_rate,
driver,
entity,
benchmark_feature_service,
*benchmark_feature_views,
]
)

print("setup_it finished")


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions sdk/python/feast/diff/registry_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ def diff_registry_objects(
continue
elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name):
if _field.name == "user_defined_function":
current_spec = cast(OnDemandFeatureViewSpec, current_proto)
new_spec = cast(OnDemandFeatureViewSpec, new_proto)
current_spec = cast(OnDemandFeatureViewSpec, current_spec)
new_spec = cast(OnDemandFeatureViewSpec, new_spec)
current_udf = current_spec.user_defined_function
new_udf = new_spec.user_defined_function
for _udf_field in current_udf.DESCRIPTOR.fields:
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/feature_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ class LoggingSource:

@abc.abstractmethod
def get_schema(self, registry: "BaseRegistry") -> pa.Schema:
""" Generate schema for logs destination. """
"""Generate schema for logs destination."""
raise NotImplementedError

@abc.abstractmethod
def get_log_timestamp_column(self) -> str:
""" Return timestamp column that must exist in generated schema. """
"""Return timestamp column that must exist in generated schema."""
raise NotImplementedError


Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2349,10 +2349,10 @@ def get_validation_reference(
self, name: str, allow_cache: bool = False
) -> ValidationReference:
"""
Retrieves a validation reference.
Retrieves a validation reference.
Raises:
ValidationReferenceNotFoundException: The validation reference could not be found.
Raises:
ValidationReferenceNotFoundException: The validation reference could not be found.
"""
ref = self._registry.get_validation_reference(
name, project=self.project, allow_cache=allow_cache
Expand Down
Loading

0 comments on commit dd46d45

Please sign in to comment.