From 90c9786e0598f3ed037be2003aa1e24b3c15d043 Mon Sep 17 00:00:00 2001
From: Willem Pienaar <6728866+woop@users.noreply.github.com>
Date: Sun, 2 Feb 2020 15:39:48 +0800
Subject: [PATCH] Deduplicate example notebooks (#456)
* Deduplicate example notebooks
* Merge docker-compose.yml for both batch and online serving.
---
examples/basic/basic.ipynb | 256 ++++++--
infra/docker-compose/.env.sample | 24 +-
infra/docker-compose/docker-compose.batch.yml | 25 -
infra/docker-compose/docker-compose.yml | 38 +-
.../jupyter/features/cust_trans_fs.yaml | 11 -
.../features/cust_trans_fs_updated.yaml | 13 -
.../notebooks/feast-batch-serving.ipynb | 504 ----------------
.../jupyter/notebooks/feast-quickstart.ipynb | 569 ------------------
infra/docker/jupyter/Dockerfile | 3 -
infra/docker/jupyter/Dockerfile.dev | 8 -
sdk/python/setup.py | 9 +-
11 files changed, 242 insertions(+), 1218 deletions(-)
delete mode 100644 infra/docker-compose/docker-compose.batch.yml
delete mode 100644 infra/docker-compose/jupyter/features/cust_trans_fs.yaml
delete mode 100644 infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml
delete mode 100644 infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb
delete mode 100644 infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb
delete mode 100644 infra/docker/jupyter/Dockerfile
delete mode 100644 infra/docker/jupyter/Dockerfile.dev
diff --git a/examples/basic/basic.ipynb b/examples/basic/basic.ipynb
index 49658b4235..94fc82f2ce 100644
--- a/examples/basic/basic.ipynb
+++ b/examples/basic/basic.ipynb
@@ -15,15 +15,15 @@
"1. Create a synthetic customer feature dataset\n",
"2. Register a feature set to represent these features in Feast\n",
"3. Ingest these features into Feast\n",
- "4. Create a feature query and retrieve historical feature data\n",
- "5. Create a feature query and retrieve online feature data"
+ "4. Create a feature query and retrieve online feature data\n",
+ "5. Create a feature query and retrieve historical feature data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 1. Clone Feast and install all dependencies"
+ "### 0. Configuration"
]
},
{
@@ -32,9 +32,79 @@
"metadata": {},
"outputs": [],
"source": [
- "!git clone https://github.com/gojek/feast.git \\\n",
- "&& cd feast/sdk/python/ && pip install --upgrade --quiet -e . \\\n",
- "&& pip install --quiet --upgrade pandas numpy protobuf"
+ "import os\n",
+ "\n",
+ "# Feast Core acts as the central feature registry\n",
+ "FEAST_CORE_URL = os.getenv('FEAST_CORE_URL', 'core:6565')\n",
+ "\n",
+ "# Feast Online Serving allows for the retrieval of real-time feature data\n",
+ "FEAST_ONLINE_SERVING_URL = os.getenv('FEAST_ONLINE_SERVING_URL', 'online-serving:6566')\n",
+ "\n",
+ "# Feast Batch Serving allows for the retrieval of historical feature data\n",
+ "FEAST_BATCH_SERVING_URL = os.getenv('FEAST_BATCH_SERVING_URL', 'batch-serving:6567')\n",
+ "\n",
+ "# PYTHON_REPOSITORY_PATH is the path to the Python SDK inside the Feast Git Repo\n",
+ "PYTHON_REPOSITORY_PATH = os.getenv('PYTHON_REPOSITORY_PATH', '../../')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 1. Install Feast SDK"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Install from PyPi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install --ignore-installed --upgrade feast"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "(Alternative) Install from local repository"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "os.environ['PYTHON_SDK_PATH'] = os.path.join(PYTHON_REPOSITORY_PATH, 'sdk/python')\n",
+ "sys.path.append(os.environ['PYTHON_SDK_PATH'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!echo $PYTHON_SDK_PATH"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!python -m pip install --ignore-installed --upgrade -e ${PYTHON_SDK_PATH}"
]
},
{
@@ -66,7 +136,25 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 3. Configure Feast services and connect the Feast client"
+ "### 3. Configure Feast services and connect the Feast client\n",
+ "\n",
+ "Connect to Feast Core and Feast Online Serving"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "client = Client(core_url=FEAST_CORE_URL, serving_url=FEAST_ONLINE_SERVING_URL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a project workspace"
]
},
{
@@ -75,9 +163,14 @@
"metadata": {},
"outputs": [],
"source": [
- "CORE_URL = 'localhost:6565'\n",
- "ONLINE_SERVING_URL = 'localhost:6566'\n",
- "BATCH_SERVING_URL = 'localhost:6567'"
+ "client.create_project('customer_project')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Set the active project"
]
},
{
@@ -86,8 +179,6 @@
"metadata": {},
"outputs": [],
"source": [
- "client = Client(core_url=CORE_URL, serving_url=BATCH_SERVING_URL) # Connect to Feast Core\n",
- "client.create_project('customer_project')\n",
"client.set_project('customer_project')"
]
},
@@ -95,7 +186,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 4. Create synthetic customer features"
+ "### 4. Create customer features"
]
},
{
@@ -132,7 +223,7 @@
" }\n",
")\n",
"\n",
- "print(customer_features.head(10))"
+ "print(customer_features.head(500))"
]
},
{
@@ -147,9 +238,7 @@
"metadata": {},
"source": [
"Now we will create a feature set for these features. Feature sets are essentially a schema that represent\n",
- "feature values. Feature sets allow Feast to both identify feature values and their structure. \n",
- "\n",
- "In this case we need to define any entity columns as well as the maximum age. The entity column in this case is \"customer_id\". Max age is set to 1 day (defined in seconds). This means that for each feature query during retrieval, the serving API will only retrieve features up to a maximum of 1 day per provided timestamp and entity combination. "
+ "feature values. Feature sets allow Feast to both identify feature values and their structure. The following feature set contains no features yet."
]
},
{
@@ -160,8 +249,8 @@
"source": [
"customer_fs = FeatureSet(\n",
" \"customer_transactions\",\n",
- " max_age=Duration(seconds=86400),\n",
- " entities=[Entity(name='customer_id', dtype=ValueType.INT64)]\n",
+ " entities=[Entity(name='customer_id', dtype=ValueType.INT64)],\n",
+ " max_age=Duration(seconds=432000) \n",
")"
]
},
@@ -169,7 +258,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Here we are automatically inferring the schema from the provided dataset"
+ "Here we are automatically inferring the schema from the provided dataset. The two features from the dataset will be added to the feature set"
]
},
{
@@ -241,16 +330,21 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 8. Create a batch retrieval query"
+ "### 8. Retrieve online features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "In order to retrieve historical feature data, the user must provide an entity_rows dataframe. This dataframe contains a combination of timestamps and entities. In this case, the user must provide both customer_ids and timestamps. \n",
- "\n",
- "We will randomly generate timestamps over the last 30 days, and assign customer_ids to them. When these entity rows are sent to the Feast Serving API to retrieve feature values, along with a list of feature ids, Feast is then able to attach the correct feature values to each entity row. The one exception is if the feature values fall outside of the maximum age window."
+ "The process of retrieving features from the online API is very similar to that of the batch API. The only major difference is that users do not have to provide timestamps (only the latest features are returned, as long as they are within the maximum age window)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The example below retrieves online features for a single customer: \"1001\". It is possible to retrieve any features from feast, even outside of the current project."
]
},
{
@@ -259,23 +353,51 @@
"metadata": {},
"outputs": [],
"source": [
- "event_timestamps = [datetime.utcnow().replace(tzinfo=utc) - timedelta(days=randrange(15), hours=randrange(24), minutes=randrange(60)) for day in range(30)]\n",
- "\n",
- "entity_rows = pd.DataFrame(\n",
- " {\n",
- " \"datetime\": event_timestamps,\n",
- " \"customer_id\": [customers[idx % len(customers)] for idx in range(len(event_timestamps))],\n",
- " }\n",
+ "online_features = client.get_online_features(\n",
+ " feature_refs=[\n",
+ " f\"daily_transactions\",\n",
+ " f\"total_transactions\",\n",
+ " ],\n",
+ " entity_rows=[\n",
+ " GetOnlineFeaturesRequest.EntityRow(\n",
+ " fields={\n",
+ " \"customer_id\": Value(\n",
+ " int64_val=1001)\n",
+ " }\n",
+ " )\n",
+ " ],\n",
")\n",
- "\n",
- "print(entity_rows.head(10))"
+ "print(online_features)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 9. Retrieve historical/batch features"
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### The following section requires Google Cloud Platform (Google Cloud Storage and BigQuery)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 9. Create a batch retrieval query"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In order to retrieve historical feature data, the user must provide an entity_rows dataframe. This dataframe contains a combination of timestamps and entities. In this case, the user must provide both customer_ids and timestamps. \n",
+ "\n",
+ "We will randomly generate timestamps over the last 30 days, and assign customer_ids to them. When these entity rows are sent to the Feast Serving API to retrieve feature values, along with a list of feature ids, Feast is then able to attach the correct feature values to each entity row. "
]
},
{
@@ -284,29 +406,30 @@
"metadata": {},
"outputs": [],
"source": [
- "job = client.get_batch_features(\n",
- " feature_refs=[\n",
- " f\"daily_transactions\", \n",
- " f\"total_transactions\", \n",
- " ],\n",
- " entity_rows=entity_rows\n",
- " )\n",
- "df = job.to_dataframe()\n",
- "print(df.head(10))"
+ "event_timestamps = [datetime.utcnow().replace(tzinfo=utc) - timedelta(days=randrange(15), hours=randrange(24), minutes=randrange(60)) for day in range(30)]\n",
+ "\n",
+ "entity_rows = pd.DataFrame(\n",
+ " {\n",
+ " \"datetime\": event_timestamps,\n",
+ " \"customer_id\": [customers[idx % len(customers)] for idx in range(len(event_timestamps))],\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "print(entity_rows.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 10. Retrieve online features"
+ "### 10. Retrieve historical/batch features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "The process of retrieving features from the online API is very similar to that of the batch API. The only major difference is that users do not have to provide timestamps (only the latest features are returned, as long as they are within the maximum age window)"
+ "Next we will create a new client object, but this time we will configure it to connect to the Batch Serving Service. This service will allow us to retrieve historical feature data."
]
},
{
@@ -315,37 +438,39 @@
"metadata": {},
"outputs": [],
"source": [
- "online_client = Client(core_url=CORE_URL, serving_url=ONLINE_SERVING_URL)\n",
- "online_client.set_project(\"customer_project\")"
+ "batch_client = Client(core_url=FEAST_CORE_URL, serving_url=FEAST_BATCH_SERVING_URL)\n",
+ "batch_client.set_project(\"customer_project\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "The example below retrieves online features for a single customer: \"1001\""
+ "By calling the `get_batch_features` method we are able to retrieve a `job` object for the exporting of feature data. For every entity and timestamp combination in `entity_rows` we will be receiving a row with feature values joined to it."
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [],
"source": [
- "online_features = online_client.get_online_features(\n",
- " feature_refs=[\n",
- " f\"daily_transactions\",\n",
- " f\"total_transactions\",\n",
- " ],\n",
- " entity_rows=[\n",
- " GetOnlineFeaturesRequest.EntityRow(\n",
- " fields={\n",
- " \"customer_id\": Value(\n",
- " int64_val=1001)\n",
- " }\n",
- " )\n",
- " ],\n",
- ")"
+ "job = batch_client.get_batch_features(\n",
+ " feature_refs=[\n",
+ " f\"customer_project/daily_transactions\", \n",
+ " f\"customer_project/total_transactions\", \n",
+ " ],\n",
+ " entity_rows=entity_rows\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once the job is complete, it is possible to retrieve the exported data (from Google Cloud Storage) and load it into memory as a Pandas Dataframe."
]
},
{
@@ -354,7 +479,8 @@
"metadata": {},
"outputs": [],
"source": [
- "print(online_features)"
+ "df = job.to_dataframe()\n",
+ "print(df.head(10))"
]
}
],
@@ -374,7 +500,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.7.4"
},
"pycharm": {
"stem_cell": {
diff --git a/infra/docker-compose/.env.sample b/infra/docker-compose/.env.sample
index e14bde2772..c8652e8fe0 100644
--- a/infra/docker-compose/.env.sample
+++ b/infra/docker-compose/.env.sample
@@ -1,19 +1,21 @@
+# General
COMPOSE_PROJECT_NAME=feast
-
FEAST_VERSION=latest
+# Feast Core
FEAST_CORE_IMAGE=gcr.io/kf-feast/feast-core
-FEAST_CORE_CONFIG=direct-runner
-FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY=placeholder
+FEAST_CORE_CONFIG=direct-runner.yml
+FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY=placeholder.json
+# Feast Serving
FEAST_SERVING_IMAGE=gcr.io/kf-feast/feast-serving
-FEAST_ONLINE_SERVING_CONFIG=online-serving
-FEAST_ONLINE_STORE_CONFIG=redis-store
-FEAST_BATCH_SERVING_CONFIG=batch-serving
-FEAST_BATCH_STORE_CONFIG=bq-store
-FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY=placeholder
-FEAST_JOB_STAGING_LOCATION=gs://your-gcp-project/bucket
+FEAST_ONLINE_SERVING_CONFIG=online-serving.yml
+FEAST_ONLINE_STORE_CONFIG=redis-store.yml
+FEAST_BATCH_SERVING_CONFIG=batch-serving.yml
+FEAST_BATCH_STORE_CONFIG=bq-store.yml
+FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY=placeholder.json
+FEAST_JOB_STAGING_LOCATION=gs://your-gcs-bucket/staging
-FEAST_JUPYTER_IMAGE=gcr.io/kf-feast/feast-jupyter
-FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY=placeholder
+# Jupyter
+FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY=placeholder.json
diff --git a/infra/docker-compose/docker-compose.batch.yml b/infra/docker-compose/docker-compose.batch.yml
deleted file mode 100644
index c00ac9475b..0000000000
--- a/infra/docker-compose/docker-compose.batch.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-version: "3.7"
-
-services:
- batch-serving:
- image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION}
- volumes:
- - ./serving/${FEAST_BATCH_SERVING_CONFIG}.yml:/etc/feast/application.yml
- - ./serving/${FEAST_BATCH_STORE_CONFIG}.yml:/etc/feast/store.yml
- - ./gcp-service-accounts/${FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json
- depends_on:
- - core
- - redis
- ports:
- - 6567:6567
- restart: on-failure
- environment:
- GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json
- FEAST_JOB_STAGING_LOCATION: ${FEAST_JOB_STAGING_LOCATION}
- command:
- - "java"
- - "-Xms1024m"
- - "-Xmx1024m"
- - "-jar"
- - "/opt/feast/feast-serving.jar"
- - "--spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml"
\ No newline at end of file
diff --git a/infra/docker-compose/docker-compose.yml b/infra/docker-compose/docker-compose.yml
index 44750650ce..27d82efc3c 100644
--- a/infra/docker-compose/docker-compose.yml
+++ b/infra/docker-compose/docker-compose.yml
@@ -4,8 +4,8 @@ services:
core:
image: ${FEAST_CORE_IMAGE}:${FEAST_VERSION}
volumes:
- - ./core/${FEAST_CORE_CONFIG}.yml:/etc/feast/application.yml
- - ./gcp-service-accounts/${FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json
+ - ./core/${FEAST_CORE_CONFIG}:/etc/feast/application.yml
+ - ./gcp-service-accounts/${FEAST_CORE_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json
environment:
DB_HOST: db
GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json
@@ -24,8 +24,8 @@ services:
online-serving:
image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION}
volumes:
- - ./serving/${FEAST_ONLINE_SERVING_CONFIG}.yml:/etc/feast/application.yml
- - ./serving/${FEAST_ONLINE_STORE_CONFIG}.yml:/etc/feast/store.yml
+ - ./serving/${FEAST_ONLINE_SERVING_CONFIG}:/etc/feast/application.yml
+ - ./serving/${FEAST_ONLINE_STORE_CONFIG}:/etc/feast/store.yml
depends_on:
- core
- redis
@@ -38,12 +38,34 @@ services:
- /opt/feast/feast-serving.jar
- --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml
+ batch-serving:
+ image: ${FEAST_SERVING_IMAGE}:${FEAST_VERSION}
+ volumes:
+ - ./serving/${FEAST_BATCH_SERVING_CONFIG}:/etc/feast/application.yml
+ - ./serving/${FEAST_BATCH_STORE_CONFIG}:/etc/feast/store.yml
+ - ./gcp-service-accounts/${FEAST_BATCH_SERVING_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json
+ depends_on:
+ - core
+ - redis
+ ports:
+ - 6567:6567
+ restart: on-failure
+ environment:
+ GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json
+ FEAST_JOB_STAGING_LOCATION: ${FEAST_JOB_STAGING_LOCATION}
+ command:
+ - "java"
+ - "-Xms1024m"
+ - "-Xmx1024m"
+ - "-jar"
+ - "/opt/feast/feast-serving.jar"
+ - "--spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml"
+
jupyter:
- image: ${FEAST_JUPYTER_IMAGE}:${FEAST_VERSION}
+ image: jupyter/datascience-notebook:latest
volumes:
- - ./jupyter/notebooks:/home/jovyan/feast-notebooks
- - ./jupyter/features:/home/jovyan/features
- - ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}.json:/etc/gcloud/service-accounts/key.json
+ - ../../:/home/jovyan/feast
+ - ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json
depends_on:
- core
- online-serving
diff --git a/infra/docker-compose/jupyter/features/cust_trans_fs.yaml b/infra/docker-compose/jupyter/features/cust_trans_fs.yaml
deleted file mode 100644
index eb21ce9b35..0000000000
--- a/infra/docker-compose/jupyter/features/cust_trans_fs.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: customer_transactions
-kind: feature_set
-entities:
-- name: customer_id
- valueType: INT64
-features:
-- name: daily_transactions
- valueType: FLOAT
-- name: total_transactions
- valueType: FLOAT
-maxAge: 3600s
\ No newline at end of file
diff --git a/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml b/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml
deleted file mode 100644
index 8293d04b88..0000000000
--- a/infra/docker-compose/jupyter/features/cust_trans_fs_updated.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: customer_transactions
-kind: feature_set
-entities:
-- name: customer_id
- valueType: INT64
-features:
-- name: daily_transactions
- valueType: FLOAT
-- name: total_transactions
- valueType: FLOAT
-- name: discounts
- valueType: FLOAT
-maxAge: 3600s
\ No newline at end of file
diff --git a/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb b/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb
deleted file mode 100644
index c288093f07..0000000000
--- a/infra/docker-compose/jupyter/notebooks/feast-batch-serving.ipynb
+++ /dev/null
@@ -1,504 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Feast Batch Serving\n",
- "This is an extension to `feast-quickstart` notebook to demonstrate the batch serving capability of Feast.\n",
- "\n",
- "## Prerequisite\n",
- "- A running Feast Serving service with store configuration that supports batch retrieval. (eg. BigQuery store)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Preparation\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import feast\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "from datetime import datetime, timedelta\n",
- "from feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest\n",
- "from feast.types.Value_pb2 import Value as Value\n",
- "from feast.client import Client\n",
- "from feast.feature_set import FeatureSet"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "client = feast.Client(core_url=\"core:6565\", serving_url=\"batch-serving:6567\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "cust_trans_fs = FeatureSet.from_yaml(\"../features/cust_trans_fs.yaml\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Feature set updated/created: \"customer_transactions:1\".\n"
- ]
- }
- ],
- "source": [
- "client.apply(cust_trans_fs)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " datetime | \n",
- " customer_id | \n",
- " daily_transactions | \n",
- " total_transactions | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2019-12-06 02:17:46.899904 | \n",
- " 10000 | \n",
- " 2.797627 | \n",
- " 175.978266 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2019-12-06 02:17:46.899915 | \n",
- " 10001 | \n",
- " 4.931632 | \n",
- " 153.871975 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2019-12-06 02:17:46.899922 | \n",
- " 10002 | \n",
- " 0.206628 | \n",
- " 108.558844 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2019-12-06 02:17:46.899929 | \n",
- " 10003 | \n",
- " 2.354937 | \n",
- " 119.549455 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2019-12-06 02:17:46.899937 | \n",
- " 10004 | \n",
- " 7.171423 | \n",
- " 115.345183 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " datetime customer_id daily_transactions \\\n",
- "0 2019-12-06 02:17:46.899904 10000 2.797627 \n",
- "1 2019-12-06 02:17:46.899915 10001 4.931632 \n",
- "2 2019-12-06 02:17:46.899922 10002 0.206628 \n",
- "3 2019-12-06 02:17:46.899929 10003 2.354937 \n",
- "4 2019-12-06 02:17:46.899937 10004 7.171423 \n",
- "\n",
- " total_transactions \n",
- "0 175.978266 \n",
- "1 153.871975 \n",
- "2 108.558844 \n",
- "3 119.549455 \n",
- "4 115.345183 "
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "offset = 10000\n",
- "nr_of_customers = 5\n",
- "customer_df = pd.DataFrame(\n",
- " {\n",
- " \"datetime\": [datetime.utcnow() for _ in range(nr_of_customers)],\n",
- " \"customer_id\": [offset + inc for inc in range(nr_of_customers)],\n",
- " \"daily_transactions\": [np.random.uniform(0, 10) for _ in range(nr_of_customers)],\n",
- " \"total_transactions\": [np.random.uniform(100, 200) for _ in range(nr_of_customers)],\n",
- " }\n",
- ")\n",
- "customer_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 5/5 [00:00<00:00, 7.24rows/s]"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Ingested 5 rows into customer_transactions:1\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "client.ingest(cust_trans_fs, dataframe=customer_df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "pycharm": {
- "name": "#%% md\n"
- }
- },
- "source": [
- "## Batch Retrieval\n",
- "Batch retrieval takes a dataframe containing the entities column and event timestamp as an input. The result would be the outer join of the input and the features. The input dataframe needs to have a column named `datetime` as event timestamp. No results will be returned if the difference between the feature ingestion timestamp and the `event_timestamp` is greater than the `maxAge` parameter specified in the feature set."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "pycharm": {
- "name": "#%%\n"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_transactions_v1_feature_timestamp | \n",
- " customer_id | \n",
- " event_timestamp | \n",
- " customer_transactions_v1_daily_transactions | \n",
- " customer_transactions_v1_total_transactions | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2019-12-06 02:17:46+00:00 | \n",
- " 10001 | \n",
- " 2019-12-06 02:17:55.612449+00:00 | \n",
- " 4.931632 | \n",
- " 153.871980 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2019-12-06 02:17:46+00:00 | \n",
- " 10004 | \n",
- " 2019-12-06 02:17:55.612449+00:00 | \n",
- " 7.171423 | \n",
- " 115.345184 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2019-12-06 02:17:46+00:00 | \n",
- " 10000 | \n",
- " 2019-12-06 02:17:55.612449+00:00 | \n",
- " 2.797627 | \n",
- " 175.978270 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2019-12-06 02:17:46+00:00 | \n",
- " 10002 | \n",
- " 2019-12-06 02:17:55.612449+00:00 | \n",
- " 0.206628 | \n",
- " 108.558846 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2019-12-06 02:17:46+00:00 | \n",
- " 10003 | \n",
- " 2019-12-06 02:17:55.612449+00:00 | \n",
- " 2.354937 | \n",
- " 119.549450 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_transactions_v1_feature_timestamp customer_id \\\n",
- "0 2019-12-06 02:17:46+00:00 10001 \n",
- "1 2019-12-06 02:17:46+00:00 10004 \n",
- "2 2019-12-06 02:17:46+00:00 10000 \n",
- "3 2019-12-06 02:17:46+00:00 10002 \n",
- "4 2019-12-06 02:17:46+00:00 10003 \n",
- "\n",
- " event_timestamp \\\n",
- "0 2019-12-06 02:17:55.612449+00:00 \n",
- "1 2019-12-06 02:17:55.612449+00:00 \n",
- "2 2019-12-06 02:17:55.612449+00:00 \n",
- "3 2019-12-06 02:17:55.612449+00:00 \n",
- "4 2019-12-06 02:17:55.612449+00:00 \n",
- "\n",
- " customer_transactions_v1_daily_transactions \\\n",
- "0 4.931632 \n",
- "1 7.171423 \n",
- "2 2.797627 \n",
- "3 0.206628 \n",
- "4 2.354937 \n",
- "\n",
- " customer_transactions_v1_total_transactions \n",
- "0 153.871980 \n",
- "1 115.345184 \n",
- "2 175.978270 \n",
- "3 108.558846 \n",
- "4 119.549450 "
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "entity_df = customer_df[[\"customer_id\"]].assign(datetime=datetime.utcnow())\n",
- "feature_ids=[\n",
- " \"customer_transactions:1:daily_transactions\",\n",
- " \"customer_transactions:1:total_transactions\",\n",
- "]\n",
- "batch_job = client.get_batch_features(feature_ids, entity_df)\n",
- "batch_job.to_dataframe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_transactions_v1_feature_timestamp | \n",
- " customer_id | \n",
- " event_timestamp | \n",
- " customer_transactions_v1_daily_transactions | \n",
- " customer_transactions_v1_total_transactions | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " None | \n",
- " 10000 | \n",
- " 2020-01-05 02:18:43.900732+00:00 | \n",
- " None | \n",
- " None | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " None | \n",
- " 10001 | \n",
- " 2020-01-05 02:18:43.900732+00:00 | \n",
- " None | \n",
- " None | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " None | \n",
- " 10002 | \n",
- " 2020-01-05 02:18:43.900732+00:00 | \n",
- " None | \n",
- " None | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " None | \n",
- " 10003 | \n",
- " 2020-01-05 02:18:43.900732+00:00 | \n",
- " None | \n",
- " None | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " None | \n",
- " 10004 | \n",
- " 2020-01-05 02:18:43.900732+00:00 | \n",
- " None | \n",
- " None | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " customer_transactions_v1_feature_timestamp customer_id \\\n",
- "0 None 10000 \n",
- "1 None 10001 \n",
- "2 None 10002 \n",
- "3 None 10003 \n",
- "4 None 10004 \n",
- "\n",
- " event_timestamp \\\n",
- "0 2020-01-05 02:18:43.900732+00:00 \n",
- "1 2020-01-05 02:18:43.900732+00:00 \n",
- "2 2020-01-05 02:18:43.900732+00:00 \n",
- "3 2020-01-05 02:18:43.900732+00:00 \n",
- "4 2020-01-05 02:18:43.900732+00:00 \n",
- "\n",
- " customer_transactions_v1_daily_transactions \\\n",
- "0 None \n",
- "1 None \n",
- "2 None \n",
- "3 None \n",
- "4 None \n",
- "\n",
- " customer_transactions_v1_total_transactions \n",
- "0 None \n",
- "1 None \n",
- "2 None \n",
- "3 None \n",
- "4 None "
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "stale_entity_df = customer_df[[\"customer_id\"]].assign(datetime=datetime.utcnow() + timedelta(days=30))\n",
- "feature_ids=[\n",
- " \"customer_transactions:1:daily_transactions\",\n",
- " \"customer_transactions:1:total_transactions\",\n",
- "]\n",
- "batch_job = client.get_batch_features(feature_ids, stale_entity_df)\n",
- "batch_job.to_dataframe()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.3"
- },
- "pycharm": {
- "stem_cell": {
- "cell_type": "raw",
- "metadata": {
- "collapsed": false
- },
- "source": []
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb b/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb
deleted file mode 100644
index b89e59b1e4..0000000000
--- a/infra/docker-compose/jupyter/notebooks/feast-quickstart.ipynb
+++ /dev/null
@@ -1,569 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Feast Quick Start\n",
- "This is a quick example to demonstrate:\n",
- "- Register a feature set on Feast\n",
- "- Ingest features into Feast\n",
- "- Retrieve the ingested features from Feast\n",
- "- Update a feature"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import feast\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "from datetime import datetime\n",
- "from feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest\n",
- "from feast.types.Value_pb2 import Value as Value\n",
- "from feast.client import Client\n",
- "from feast.feature_set import FeatureSet"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "First, instantiate the client.\n",
- "Feast endpoints can be set via the following environmental variables: `FEAST_CORE_URL`, `FEAST_SERVING_URL`.\n",
- "Alternatively, they can also be passed in explicitly as follows:\n",
- " \n",
- "`client = feast.Client(core_url=core:6565, serving_url=online-serving:6566)`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "client = feast.Client()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Register a feature set\n",
- "\n",
- "Let's create and register our first feature set. Below is an example of a basic customer transactions feature set that has been exported to YAML:\n",
- "```\n",
- "name: customer_transactions\n",
- "kind: feature_set\n",
- "entities:\n",
- "- name: customer_id\n",
- " valueType: INT64\n",
- "features:\n",
- "- name: daily_transactions\n",
- " valueType: FLOAT\n",
- "- name: total_transactions\n",
- " valueType: FLOAT\n",
- "maxAge: 3600s \n",
- "```"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "cust_trans_fs = FeatureSet.from_yaml(\"../features/cust_trans_fs.yaml\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Feature set updated/created: \"customer_transactions:1\".\n"
- ]
- }
- ],
- "source": [
- "client.apply(cust_trans_fs)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Ingest features into Feast\n",
- "The dataframe below contains the features and entities of the above feature set."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " datetime | \n",
- " customer_id | \n",
- " daily_transactions | \n",
- " total_transactions | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2019-11-26 12:03:47.320634 | \n",
- " 10000 | \n",
- " 5.178112 | \n",
- " 110.670651 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2019-11-26 12:03:47.320644 | \n",
- " 10001 | \n",
- " 0.268114 | \n",
- " 195.393913 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2019-11-26 12:03:47.320651 | \n",
- " 10002 | \n",
- " 1.486614 | \n",
- " 136.929052 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2019-11-26 12:03:47.320658 | \n",
- " 10003 | \n",
- " 9.676433 | \n",
- " 166.022999 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2019-11-26 12:03:47.320665 | \n",
- " 10004 | \n",
- " 5.928573 | \n",
- " 165.687951 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " datetime customer_id daily_transactions \\\n",
- "0 2019-11-26 12:03:47.320634 10000 5.178112 \n",
- "1 2019-11-26 12:03:47.320644 10001 0.268114 \n",
- "2 2019-11-26 12:03:47.320651 10002 1.486614 \n",
- "3 2019-11-26 12:03:47.320658 10003 9.676433 \n",
- "4 2019-11-26 12:03:47.320665 10004 5.928573 \n",
- "\n",
- " total_transactions \n",
- "0 110.670651 \n",
- "1 195.393913 \n",
- "2 136.929052 \n",
- "3 166.022999 \n",
- "4 165.687951 "
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "offset = 10000\n",
- "nr_of_customers = 5\n",
- "customer_df = pd.DataFrame(\n",
- " {\n",
- " \"datetime\": [datetime.utcnow() for _ in range(nr_of_customers)],\n",
- " \"customer_id\": [offset + inc for inc in range(nr_of_customers)],\n",
- " \"daily_transactions\": [np.random.uniform(0, 10) for _ in range(nr_of_customers)],\n",
- " \"total_transactions\": [np.random.uniform(100, 200) for _ in range(nr_of_customers)],\n",
- " }\n",
- ")\n",
- "customer_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " 0%| | 0/5 [00:00, ?rows/s]8rows/s]"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Ingested 5 rows into customer_transactions:1\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "client.ingest(cust_trans_fs, dataframe=customer_df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Retrieve the ingested features from Feast\n",
- "Once the features are ingested, they can be retrieved from the Feast store for model training or prediction purposes."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "field_values {\n",
- " fields {\n",
- " key: \"customer_id\"\n",
- " value {\n",
- " int64_val: 10001\n",
- " }\n",
- " }\n",
- " fields {\n",
- " key: \"customer_transactions:1:daily_transactions\"\n",
- " value {\n",
- " float_val: 0.2681143283843994\n",
- " }\n",
- " }\n",
- " fields {\n",
- " key: \"customer_transactions:1:total_transactions\"\n",
- " value {\n",
- " float_val: 195.39390563964844\n",
- " }\n",
- " }\n",
- "}"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "client.get_online_features(\n",
- " entity_rows=[\n",
- " GetOnlineFeaturesRequest.EntityRow(\n",
- " fields={\n",
- " \"customer_id\": Value(\n",
- " int64_val=10001\n",
- " )\n",
- " }\n",
- " )\n",
- " ],\n",
- " feature_ids=[\n",
- " \"customer_transactions:1:daily_transactions\",\n",
- " \"customer_transactions:1:total_transactions\",\n",
- " ],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Update a feature\n",
- "We can also update feature set which has been ingested."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "cust_trans_fs_updated = FeatureSet.from_yaml(\"../features/cust_trans_fs_updated.yaml\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Feature set updated/created: \"customer_transactions:2\".\n"
- ]
- }
- ],
- "source": [
- "client.apply(cust_trans_fs_updated)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " datetime | \n",
- " customer_id | \n",
- " daily_transactions | \n",
- " total_transactions | \n",
- " discounts | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2019-11-26 12:03:47.320634 | \n",
- " 10000 | \n",
- " 5.178112 | \n",
- " 110.670651 | \n",
- " 8.389938 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2019-11-26 12:03:47.320644 | \n",
- " 10001 | \n",
- " 0.268114 | \n",
- " 195.393913 | \n",
- " 0.430047 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2019-11-26 12:03:47.320651 | \n",
- " 10002 | \n",
- " 1.486614 | \n",
- " 136.929052 | \n",
- " 7.408917 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2019-11-26 12:03:47.320658 | \n",
- " 10003 | \n",
- " 9.676433 | \n",
- " 166.022999 | \n",
- " 1.192721 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2019-11-26 12:03:47.320665 | \n",
- " 10004 | \n",
- " 5.928573 | \n",
- " 165.687951 | \n",
- " 2.051037 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " datetime customer_id daily_transactions \\\n",
- "0 2019-11-26 12:03:47.320634 10000 5.178112 \n",
- "1 2019-11-26 12:03:47.320644 10001 0.268114 \n",
- "2 2019-11-26 12:03:47.320651 10002 1.486614 \n",
- "3 2019-11-26 12:03:47.320658 10003 9.676433 \n",
- "4 2019-11-26 12:03:47.320665 10004 5.928573 \n",
- "\n",
- " total_transactions discounts \n",
- "0 110.670651 8.389938 \n",
- "1 195.393913 0.430047 \n",
- "2 136.929052 7.408917 \n",
- "3 166.022999 1.192721 \n",
- "4 165.687951 2.051037 "
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "discounts = [np.random.uniform(0, 10) for _ in range(nr_of_customers)]\n",
- "customer_df_updated = customer_df.assign(discounts=discounts)\n",
- "customer_df_updated"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " 0%| | 0/5 [00:00, ?rows/s]6rows/s]"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Ingested 5 rows into customer_transactions:2\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "client.ingest(cust_trans_fs_updated, dataframe=customer_df_updated)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "field_values {\n",
- " fields {\n",
- " key: \"customer_id\"\n",
- " value {\n",
- " int64_val: 10001\n",
- " }\n",
- " }\n",
- " fields {\n",
- " key: \"customer_transactions:2:daily_transactions\"\n",
- " value {\n",
- " float_val: 0.2681143283843994\n",
- " }\n",
- " }\n",
- " fields {\n",
- " key: \"customer_transactions:2:discounts\"\n",
- " value {\n",
- " float_val: 0.4300469756126404\n",
- " }\n",
- " }\n",
- " fields {\n",
- " key: \"customer_transactions:2:total_transactions\"\n",
- " value {\n",
- " float_val: 195.39390563964844\n",
- " }\n",
- " }\n",
- "}"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "client.get_online_features(\n",
- " entity_rows=[\n",
- " GetOnlineFeaturesRequest.EntityRow(\n",
- " fields={\n",
- " \"customer_id\": Value(\n",
- " int64_val=10001\n",
- " )\n",
- " }\n",
- " )\n",
- " ],\n",
- " feature_ids=[\n",
- " \"customer_transactions:2:daily_transactions\",\n",
- " \"customer_transactions:2:total_transactions\",\n",
- " \"customer_transactions:2:discounts\",\n",
- " ],\n",
- ")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.3"
- },
- "pycharm": {
- "stem_cell": {
- "cell_type": "raw",
- "source": [],
- "metadata": {
- "collapsed": false
- }
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
\ No newline at end of file
diff --git a/infra/docker/jupyter/Dockerfile b/infra/docker/jupyter/Dockerfile
deleted file mode 100644
index d0302761cc..0000000000
--- a/infra/docker/jupyter/Dockerfile
+++ /dev/null
@@ -1,3 +0,0 @@
-FROM jupyter/datascience-notebook:latest
-ARG VERSION=0.3.2
-RUN pip install feast==${VERSION}
\ No newline at end of file
diff --git a/infra/docker/jupyter/Dockerfile.dev b/infra/docker/jupyter/Dockerfile.dev
deleted file mode 100644
index 41fb431fbe..0000000000
--- a/infra/docker/jupyter/Dockerfile.dev
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM python:3.6-alpine as builder
-WORKDIR /build/feast-sdk
-ADD . ./
-RUN python setup.py sdist bdist_wheel
-
-FROM jupyter/datascience-notebook:latest
-COPY --from=builder /build/feast-sdk/dist /usr/local/lib/feast
-RUN pip install -f /usr/local/lib/feast feast
\ No newline at end of file
diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index 1617f83852..d0b37ad941 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -13,6 +13,7 @@
# limitations under the License.
import os
+import subprocess
from setuptools import find_packages, setup
@@ -48,7 +49,13 @@
]
# README file from Feast repo root directory
-README_FILE = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
+repo_root = (
+ subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE)
+ .communicate()[0]
+ .rstrip()
+ .decode("utf-8")
+)
+README_FILE = os.path.join(repo_root, "README.md")
with open(os.path.join(README_FILE), "r") as f:
LONG_DESCRIPTION = f.read()