Skip to content

Commit

Permalink
Created databricks-emulator (feast-dev#13)
Browse files Browse the repository at this point in the history
Closes KE-610
Closes KE-643

- Added 4 projects in the Maven structure Spark:

  - spark/databricks-emulator: Implemented a Databricks REST API emulator running in Docker
  - spark/databricks-types: Databricks REST API types - used by databricks-emulator, and also to be used by DatabricksJobManager (in Feast Core)
  - spark/spark-historical-retriever-job: Placeholder for implementing Historical Retriever Spark job
  - spark/spark-ingestion-job: Placeholder for implementing Ingestion Spark job

The Databricks emulator Dockerfile exists, but is not yet built by the CI pipeline & deployed in end-to-end test (to be done later).
  • Loading branch information
algattik authored Jun 5, 2020
1 parent 1ae84df commit 88e43a8
Show file tree
Hide file tree
Showing 32 changed files with 1,376 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker_build_test_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
FEAST_SERVING_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-serving

- name: test docker compose
run: ./infra/scripts/test-docker-compose.sh
run: ./infra/scripts/test-docker-compose-databricks.sh
env:
COMPOSE_PROJECT_NAME: feast
FEAST_VERSION: v${{ github.sha }}
Expand Down
16 changes: 16 additions & 0 deletions infra/docker-compose/core/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
feast:
jobs:
polling_interval_milliseconds: 30000
job_update_timeout_seconds: 240
# TODO replace with DatabricksRunner
active_runner: direct
runners:
# TODO replace with DatabricksRunner
- name: direct
type: DirectRunner
options: {}
stream:
type: kafka
options:
topic: feast-features
bootstrapServers: "kafka:9092,localhost:9094"
13 changes: 13 additions & 0 deletions infra/docker-compose/docker-compose.databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: "3.7"

services:
databricks-emulator:
build:
context: ../..
dockerfile: infra/docker/databricks-emulator/Dockerfile
image: ${FEAST_DATABRICKS_EMULATOR_IMAGE}:${FEAST_VERSION}
volumes:
- ./temp/databricks-emulator-storage:/mnt/storage
restart: on-failure
ports:
- 9080:8080
1 change: 1 addition & 0 deletions infra/docker-compose/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ services:
image: jupyter/minimal-notebook:619e9cc2fc07
volumes:
- ./gcp-service-accounts/${FEAST_JUPYTER_GCP_SERVICE_ACCOUNT_KEY}:/etc/gcloud/service-accounts/key.json
- ../../:/home/jovyan/feast
- ./jupyter/startup.sh:/etc/startup.sh
depends_on:
- core
Expand Down
3 changes: 0 additions & 3 deletions infra/docker-compose/jupyter/startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

set -ex

# Clone Feast repository into Jupyter container
git clone -b ${FEAST_REPOSITORY_VERSION} --single-branch https://github.com/feast-dev/feast.git || true

# Install Python dependencies
make -C feast/ compile-protos-python

Expand Down
1 change: 1 addition & 0 deletions infra/docker/core/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ COPY core core
COPY ingestion ingestion
COPY protos protos
COPY serving serving
COPY spark spark
COPY pom.xml pom.xml

# Trick to copy .m2 directory only if it exists.
Expand Down
68 changes: 68 additions & 0 deletions infra/docker/databricks-emulator/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# ============================================================
# Build stage 1: Builder
# ============================================================

FROM maven:3.6-jdk-11 as builder

ARG SPARK_VERSION=2.4.5
ARG HADOOP_VERSION=2.7

# Install Spark runtime
WORKDIR /

RUN wget -q https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& tar -zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark

RUN cd spark/jars && for jar in \
org/apache/spark/spark-sql-kafka-0-10_2.11/2.4.5/spark-sql-kafka-0-10_2.11-2.4.5.jar \
org/apache/kafka/kafka-clients/2.3.0/kafka-clients-2.3.0.jar \
; do \
wget https://repo1.maven.org/maven2/$jar; \
done

RUN mv spark/conf/log4j.properties.template spark/conf/log4j.properties

ARG REVISION=dev

WORKDIR /build

COPY datatypes datatypes
COPY storage storage
COPY sdk/java sdk/java
COPY core core
COPY ingestion ingestion
COPY protos protos
COPY serving serving
COPY spark spark
COPY pom.xml pom.xml

# Trick to copy .m2 directory only if it exists.
# The LICENSE file is any file that actually exists, to make sure the command doesn't fail.
COPY LICENSE .m[2] .m2/

#
# Setting Maven repository .m2 directory relative to /build folder gives the
# user to optionally use cached repository when building the image by copying
# the existing .m2 directory to $FEAST_REPO_ROOT/.m2
#
ENV MAVEN_OPTS="-Dmaven.repo.local=/build/.m2/repository -DdependencyLocationsEnabled=false"
RUN mvn --also-make --projects spark/databricks-emulator -Drevision=$REVISION \
--batch-mode clean package

# ============================================================
# Build stage 2: Production
# ============================================================

FROM openjdk:8u252-jre as production
ARG REVISION=dev

ENV SPARK_HOME /spark

COPY --from=builder /spark /spark
COPY --from=builder /build/spark/databricks-emulator/target/databricks-emulator-$REVISION.jar /opt/databricks-emulator.jar
CMD ["java",\
"-Xms2048m",\
"-Xmx2048m",\
"-jar",\
"/opt/databricks-emulator.jar"]
1 change: 1 addition & 0 deletions infra/docker/serving/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ COPY core core
COPY ingestion ingestion
COPY protos protos
COPY serving serving
COPY spark spark
COPY pom.xml pom.xml

# Trick to copy .m2 directory only if it exists.
Expand Down
53 changes: 53 additions & 0 deletions infra/scripts/test-docker-compose-databricks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

set -e

echo "
============================================================
Running Docker Compose tests with pytest at 'tests/e2e'
============================================================
"

COMPOSE_ARGS="-f docker-compose.yml -f docker-compose.online.yml -f docker-compose.databricks.yml"

clean_up () {
ARG=$?

# Shut down docker-compose images
docker-compose $COMPOSE_ARGS down

# Remove configuration file
rm .env

exit $ARG
}

trap clean_up EXIT

export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
export COMPOSE_INTERACTIVE_NO_CLI=1

# Create Docker Compose configuration file
cd ${PROJECT_ROOT_DIR}/infra/docker-compose/
cp .env.sample .env

export FEAST_CORE_CONFIG=${FEAST_CORE_CONFIG:-databricks.yml}
export FEAST_DATABRICKS_EMULATOR_IMAGE=${FEAST_DATABRICKS_EMULATOR_IMAGE:-gcr.io/kf-feast/feast-databricks-emulator}

# Build Databricks emulator image
docker-compose -f docker-compose.databricks.yml build

# Start Docker Compose containers
docker-compose $COMPOSE_ARGS up -d

# Get Jupyter container IP address
export JUPYTER_DOCKER_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' feast_jupyter_1)

# Print Jupyter container information
docker logs feast_jupyter_1

# Wait for Jupyter Notebook Container to come online
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${JUPYTER_DOCKER_CONTAINER_IP_ADDRESS}:8888 --timeout=300

# Run e2e tests for Redis
docker exec feast_jupyter_1 bash -c 'cd feast/tests/e2e/ && pytest -s basic-ingest-redis-serving.py --core_url core:6565 --serving_url=online-serving:6566 --databricks'
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
<module>core</module>
<module>serving</module>
<module>sdk/java</module>
<module>docs/coverage/java</module>
<module>spark/databricks-emulator</module>
<module>spark/databricks-types</module>
<module>spark/spark-historical-retriever-job</module>
<module>spark/spark-ingestion-job</module>
</modules>

<properties>
Expand All @@ -61,6 +64,8 @@
<opencensus.version>0.21.0</opencensus.version>
<!-- Force log4j2 to 2.11+ to support objectMessageAsJsonObject -->
<log4jVersion>2.12.1</log4jVersion>
<spark.version>2.4.5</spark.version>
<scala.compat.version>2.11</scala.compat.version>
</properties>

<organization>
Expand Down
113 changes: 113 additions & 0 deletions spark/databricks-emulator/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<name>Databricks emulator</name>
<description>REST API emulating Databricks API and running local Spark jobs, for integration testing</description>
<artifactId>databricks-emulator</artifactId>

<parent>
<groupId>dev.feast</groupId>
<artifactId>feast-parent</artifactId>
<version>${revision}</version>
<relativePath>../..</relativePath>
</parent>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<!-- Spark 2 only runs on Java 8 -->
<release>8</release>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>feast.databricks.emulator.DatabricksEmulator</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

<dependencies>
<dependency>
<groupId>dev.feast</groupId>
<artifactId>databricks-types</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.sparkjava</groupId>
<artifactId>spark-core</artifactId>
<version>2.9.1</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-launcher_${scala.compat.version}</artifactId>
<version>${spark.version}</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>

<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-library</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path-assert</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Loading

0 comments on commit 88e43a8

Please sign in to comment.