From 80a0ad66a3d21ad744460804bc91b26b884c1ae5 Mon Sep 17 00:00:00 2001 From: Xun Liu Date: Sun, 26 Nov 2023 21:26:04 +0800 Subject: [PATCH] Issue 791 (#825) ### What changes were proposed in this pull request? (Please outline the changes and how this PR fixes the issue.) ### Why are the changes needed? (Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug.) Fix: # (issue) ### Does this PR introduce _any_ user-facing change? (Please list the user-facing changes introduced by your change, including 1. Change in user-facing APIs. 2. Addition or removal of property keys.) ### How was this patch tested? (Please test your changes, and provide instructions on how to test it: 1. If you add a feature or fix a bug, add a test to cover your changes. 2. If you fix a flaky test, repeat it for many times to prove it works.) --- .github/workflows/docker-image.yml | 4 + .gitignore | 3 +- dev/docker/build-docker.sh | 4 +- dev/docker/gravitino/Dockerfile | 14 ++++ dev/docker/gravitino/gravitino-dependency.sh | 23 ++++++ dev/docker/hive/Dockerfile | 4 + dev/docker/playground/README.md | 31 ++++++++ dev/docker/playground/docker-compose.yaml | 74 +++++++++++++++++++ .../healthcheck/gravitino-healthcheck.sh | 28 +++++++ .../healthcheck/trino-healthcheck.sh | 19 +++++ dev/docker/playground/launch-playground.sh | 33 +++++++++ 11 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 dev/docker/gravitino/Dockerfile create mode 100755 dev/docker/gravitino/gravitino-dependency.sh create mode 100644 dev/docker/playground/README.md create mode 100644 dev/docker/playground/docker-compose.yaml create mode 100755 dev/docker/playground/healthcheck/gravitino-healthcheck.sh create mode 100755 dev/docker/playground/healthcheck/trino-healthcheck.sh create mode 100755 dev/docker/playground/launch-playground.sh diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 2d461508ddc..63013e0dce6 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -9,6 +9,7 @@ on: required: true default: 'gravitino-ci-hive' options: + - 'gravitino' - 'gravitino-ci-hive' - 'gravitino-ci-trino' tag: @@ -36,6 +37,9 @@ jobs: elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-trino" ]; then echo "image_type=trino" >> $GITHUB_ENV echo "image_name=datastrato/gravitino-ci-trino" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino" ]; then + echo "image_type=gravitino" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino" >> $GITHUB_ENV fi - uses: actions/checkout@v3 diff --git a/.gitignore b/.gitignore index a12d193b326..443774afd4f 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,8 @@ out/** distribution server/src/main/resources/project.properties -dev/docker/hive/packages +dev/docker/*/packages +dev/docker/gravitino/package docs/build dev/docker/tools/docker-connector diff --git a/dev/docker/build-docker.sh b/dev/docker/build-docker.sh index 543f53d2413..13a2a0c6def 100755 --- a/dev/docker/build-docker.sh +++ b/dev/docker/build-docker.sh @@ -8,7 +8,7 @@ script_dir="$(dirname "${BASH_SOURCE-$0}")" script_dir="$(cd "${script_dir}">/dev/null; pwd)" # Build docker image for multi-arch -USAGE="-e Usage: ./build-docker.sh --platform [all|linux/amd64|linux/arm64] --type [hive|trino] --image {image_name} --tag {tag_name} --latest" +USAGE="-e Usage: ./build-docker.sh --platform [all|linux/amd64|linux/arm64] --type [hive|trino|gravitino] --image {image_name} --tag {tag_name} --latest" # Get platform type if [[ "$1" == "--platform" ]]; then @@ -67,6 +67,8 @@ if [[ "${component_type}" == "hive" ]]; then build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME}" elif [ "${component_type}" == "trino" ]; then true # Placeholder, do nothing +elif [ "${component_type}" == "gravitino" ]; then + . ${script_dir}/gravitino/gravitino-dependency.sh else echo "ERROR : ${component_type} is not a valid component type" echo ${USAGE} diff --git a/dev/docker/gravitino/Dockerfile b/dev/docker/gravitino/Dockerfile new file mode 100644 index 00000000000..70eccb8e56a --- /dev/null +++ b/dev/docker/gravitino/Dockerfile @@ -0,0 +1,14 @@ +# +# Copyright 2023 Datastrato. +# This software is licensed under the Apache License version 2. +# +FROM openjdk:8-jdk-buster +LABEL maintainer="support@datastrato.com" + +WORKDIR /root/gravitino + +COPY /packages/gravitino /root/gravitino + +EXPOSE 8090 + +ENTRYPOINT ["/bin/bash", "/root/gravitino/bin/gravitino.sh", "start"] \ No newline at end of file diff --git a/dev/docker/gravitino/gravitino-dependency.sh b/dev/docker/gravitino/gravitino-dependency.sh new file mode 100755 index 00000000000..43a2b6cc62e --- /dev/null +++ b/dev/docker/gravitino/gravitino-dependency.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# +# Copyright 2023 Datastrato. +# This software is licensed under the Apache License version 2. +# +set -ex +gravitino_dir="$(dirname "${BASH_SOURCE-$0}")" +gravitino_dir="$(cd "${gravitino_dir}">/dev/null; pwd)" +gravitino_home="$(cd "${gravitino_dir}/../../..">/dev/null; pwd)" + +# Prepare compile Gravitino packages +if [[ ! -d "${gravitino_home}/distribution/package/" ]]; then + . "${gravitino_home}/gradlew compileDistribution -x test" +fi +rm -rf "${gravitino_dir}/packages/gravitino" +cp -r "${gravitino_home}/distribution/package" "${gravitino_dir}/packages/gravitino" + +# Let gravitino.sh can not quit +cat <> "${gravitino_dir}/packages/gravitino/bin/gravitino.sh" + +# persist the container +tail -f /dev/null +EOF diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 4b0929f959b..05b226ec6b6 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -153,6 +153,10 @@ RUN chown -R datastrato:hadoop /home/datastrato # removed install packages RUN rm -rf /tmp/packages +################################################################################ +HEALTHCHECK --interval=30s --timeout=30s --retries=10 \ + CMD /tmp/check-status.sh || exit 1 + ################################################################################ # expose port EXPOSE 22 3306 8088 9000 9083 10000 10002 50070 50075 50010 diff --git a/dev/docker/playground/README.md b/dev/docker/playground/README.md new file mode 100644 index 00000000000..54d47837ac1 --- /dev/null +++ b/dev/docker/playground/README.md @@ -0,0 +1,31 @@ + +# Playground +This is a complete Gravitino runtime environment with `Hive`, `Hdfs`, `Trino`, and `Gravitno` Server. just execute the `./luanch-playgraound.sh` script. +It will automatically start the `gravitino-ci-hive`, `gravitino-ci-trino`, and `Gravitino` Docker containers on the local host. +Depending on your network, the startup may take 3-5 minutes. +Once the playground environment has started, you can open http://localhost:8090 to access the Gravitino Web UI. +And use a Trino client (such as Datagrip) to test Gravitino by connecting to the Trino Docker continer via `jdbc:trino://127.0.0.1:8080`. +You test in Trino using the following SQL + +```shell +CREATE SCHEMA "playground_metalake.playground_hive".db1 +WITH (location = 'hdfs://hive:9000/user/hive/warehouse/db1.db'); + +show create schema "playground_metalake.playground_hive".db1; + +create table "playground_metalake.playground_hive".db1.table_001 +( + name varchar, + salary varchar +) +WITH ( + format = 'TEXTFILE' +); + +insert into "playground_metalake.playground_hive".db1.table_001 (name, salary) values ('sam', '11'); + +select * from "playground_metalake.playground_hive".db1.table_001; +``` \ No newline at end of file diff --git a/dev/docker/playground/docker-compose.yaml b/dev/docker/playground/docker-compose.yaml new file mode 100644 index 00000000000..ef5a10a98c7 --- /dev/null +++ b/dev/docker/playground/docker-compose.yaml @@ -0,0 +1,74 @@ +version: '3.3' +services: + hive: + image: datastrato/gravitino-ci-hive:0.1.6 + ports: + - "3306:3306" + - "8088:8088" + - "9000:9000" + - "9083:9083" + - "10000:10000" + - "10002:10002" + - "50070:50070" + - "50010:50010" + container_name: playground-hive + environment: + - HADOOP_USER_NAME=root + healthcheck: + test: ["CMD", "/tmp/check-status.sh"] + interval: 10s + timeout: 30s + retries: 10 + deploy: + resources: + limits: + cpus: "1" + memory: 1G + + gravitino: + image: datastrato/gravitino:0.3.0-SNAPSHORT + ports: + - "8090:8090" + container_name: playground-gravition + depends_on: + hive : + condition: service_healthy + volumes: + - ./healthcheck:/tmp/healthcheck + deploy: + resources: + limits: + cpus: "0.5" + memory: 500M + healthcheck: + test: ["CMD", "/tmp/healthcheck/gravitino-healthcheck.sh"] + interval: 10s + timeout: 30s + retries: 10 + + trino: + image: datastrato/gravitino-ci-trino:0.1.0 + ports: + - "8080:8080" + container_name: playground-trino + environment: + - HADOOP_USER_NAME=root + volumes: + - ./packages/trino/conf:/etc/trino:ro + - ./packages/gravitino-trino-connector:/usr/lib/trino/plugin/gravitino:ro + - ./healthcheck:/tmp/healthcheck + depends_on: + hive : + condition: service_healthy + gravitino : + condition: service_healthy + deploy: + resources: + limits: + cpus: "1" + memory: 1G +# healthcheck: +# test: ["CMD", "/tmp/healthcheck/trino-healthcheck.sh"] +# interval: 30s +# timeout: 30s +# retries: 5 diff --git a/dev/docker/playground/healthcheck/gravitino-healthcheck.sh b/dev/docker/playground/healthcheck/gravitino-healthcheck.sh new file mode 100755 index 00000000000..0bdffaaf570 --- /dev/null +++ b/dev/docker/playground/healthcheck/gravitino-healthcheck.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Copyright 2023 Datastrato. +# This software is licensed under the Apache License version 2. +# +set -ex + +curl -X POST -H "Content-Type: application/json" -d '{"name":"playground_metalake","comment":"comment","properties":{}}' http://127.0.0.1:8090/api/metalakes + +curl -X POST -H "Content-Type: application/json" -d '{"name":"playground_hive","type":"RELATIONAL", "provider":"hive", "comment":"comment","properties":{"metastore.uris":"thrift://hive:9083"}}' http://127.0.0.1:8090/api/metalakes/playground_metalake/catalogs + +response=$(curl -X GET -H "Content-Type: application/json" http://127.0.0.1:8090/api/metalakes) +if echo "$response" | grep -q "playground_metalake"; then + echo "Matalake playground_metalake successfully created" +else + echo "Matalake playground_metalake create failed" + exit 1 +fi + +response=$(curl -X GET -H "Content-Type: application/json" http://127.0.0.1:8090/api/metalakes/playground_metalake/catalogs) +if echo "$response" | grep -q "playground_hive"; then + echo "Catalog playground_hive successfully created" +else + echo "Catalog playground_hive create failed" + exit 1 +fi + +exit 0 \ No newline at end of file diff --git a/dev/docker/playground/healthcheck/trino-healthcheck.sh b/dev/docker/playground/healthcheck/trino-healthcheck.sh new file mode 100755 index 00000000000..bbd270be80b --- /dev/null +++ b/dev/docker/playground/healthcheck/trino-healthcheck.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 Datastrato. +# This software is licensed under the Apache License version 2. +# +set -ex + +SHOW CATALOGS LIKE '%s.%s' + + +response=$(trino --execute "SHOW CATALOGS LIKE '%s.%s'") +if echo "$response" | grep -q "playground_metalake.playground_hive"; then + echo "Catalog playground_hive successfully created" +else + echo "Catalog playground_hive create failed" + exit 1 +fi + +exit 0 \ No newline at end of file diff --git a/dev/docker/playground/launch-playground.sh b/dev/docker/playground/launch-playground.sh new file mode 100755 index 00000000000..e02a26f24dd --- /dev/null +++ b/dev/docker/playground/launch-playground.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Copyright 2023 Datastrato. +# This software is licensed under the Apache License version 2. +# +set -ex + +playground_dir="$(dirname "${BASH_SOURCE-$0}")" +playground_dir="$(cd "${playground_dir}">/dev/null; pwd)" +gravitino_home="$(cd "${playground_dir}/../../..">/dev/null; pwd)" + +if [[ ! -d "${playground_dir}/packages/trino" ]]; then + mkdir -p "${playground_dir}/packages/trino" +fi + +if [[ ! -d "${gravitino_home}/distribution/gravitino-trino-connector" ]]; then + . "${gravitino_home}/gradlew assembleTrinoConnector -x test" +fi +rm -rf "${playground_dir}/packages/gravitino-trino-connector" +cp -r "${gravitino_home}/distribution/gravitino-trino-connector" "${playground_dir}/packages/gravitino-trino-connector" + +rm -rf "${playground_dir}/packages/trino/conf" +cp -r -p "${gravitino_home}/dev/docker/trino/conf" "${playground_dir}/packages/trino/conf" + +# gravitino.uri = http://GRAVITINO_HOST_IP:GRAVITINO_HOST_PORT +# gravitino.metalake = GRAVITINO_METALAKE_NAME +sed 's/GRAVITINO_HOST_IP:GRAVITINO_HOST_PORT/gravitino:8090/g' "${playground_dir}/packages/trino/conf/catalog/gravitino.properties.template" > "${playground_dir}/packages/trino/conf/catalog/gravitino.properties" +sed -i '' 's/GRAVITINO_METALAKE_NAME/playground_metalake/g' "${playground_dir}/packages/trino/conf/catalog/gravitino.properties" + +# hive.metastore.uri = thrift://HIVE_HOST_IP:9083 +sed 's/HIVE_HOST_IP/hive/g' "${playground_dir}/packages/trino/conf/catalog/hive.properties.template" > "${playground_dir}/packages/trino/conf/catalog/hive.properties" + +docker-compose up \ No newline at end of file