diff --git a/.github/workflows/cron-integration-test.yml b/.github/workflows/cron-integration-test.yml index 1e996aeb24d..db805fe6da9 100644 --- a/.github/workflows/cron-integration-test.yml +++ b/.github/workflows/cron-integration-test.yml @@ -11,7 +11,7 @@ concurrency: jobs: changes: - if: github.repository == 'datastrato/gravitino' + if: github.repository == 'apache/gravitino' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index f064ea789af..005cee8e2fe 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -10,17 +10,22 @@ on: default: 'gravitino' options: - 'gravitino' - - 'gravitino-ci-hive' - - 'gravitino-ci-kerberos-hive' - - 'gravitino-ci-trino' - - 'gravitino-ci-doris' - - 'gravitino-ci-ranger' + - 'gravitino-ci:hive' + - 'gravitino-ci:kerberos-hive' + - 'gravitino-ci:trino' + - 'gravitino-ci:doris' + - 'gravitino-ci:ranger' + - 'gravitino-playground:trino' + - 'gravitino-playground:hive' + - 'gravitino-playground:ranger' - 'gravitino-iceberg-rest-server' - - 'trino' - - 'hive' - - 'ranger' - tag: - description: 'Docker tag to apply to this image' + version: + description: 'Docker version to apply to this image' + required: true + type: string + + username: + description: 'Docker username' required: true type: string token: @@ -38,36 +43,45 @@ jobs: steps: - name: Set environment variables run: | - if [ "${{ github.event.inputs.image }}" == "gravitino-ci-hive" ]; then + if [ "${{ github.event.inputs.image }}" == "gravitino-ci:hive" ]; then echo "image_type=hive" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino-ci-hive" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-kerberos-hive" ]; then + echo "image_name=apache/gravitino-ci" >> $GITHUB_ENV + echo "tag_name=hive" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci:kerberos-hive" ]; then echo "image_type=kerberos-hive" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino-ci-kerberos-hive" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-trino" ]; then + echo "image_name=apache/gravitino-ci" >> $GITHUB_ENV + echo "tag_name=kerberos-hive" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci:trino" ]; then echo "image_type=trino" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino-ci-trino" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-doris" ]; then + echo "image_name=apache/gravitino-ci" >> $GITHUB_ENV + echo "tag_name=trino" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci:doris" ]; then echo "image_type=doris" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino-ci-doris" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-ranger" ]; then + echo "image_name=apache/gravitino-ci" >> $GITHUB_ENV + echo "tag_name=doris" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci:ranger" ]; then echo "image_type=ranger" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino-ci-ranger" >> $GITHUB_ENV + echo "image_name=apache/gravitino-ci" >> $GITHUB_ENV + echo "tag_name=ranger" >> $GITHUB_ENV elif [ "${{ github.event.inputs.image }}" == "gravitino" ]; then echo "image_type=gravitino" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "trino" ]; then + echo "image_name=apache/gravitino" >> $GITHUB_ENV + # `apache/gravitino` is the default image name, didn't need to tag alias name + elif [ "${{ github.event.inputs.image }}" == "gravitino-playground:trino" ]; then echo "image_type=trino" >> $GITHUB_ENV - echo "image_name=datastrato/trino" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "hive" ]; then + echo "image_name=apache/gravitino-playground" >> $GITHUB_ENV + echo "tag_name=trino" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-playground:hive" ]; then echo "image_type=hive" >> $GITHUB_ENV - echo "image_name=datastrato/hive" >> $GITHUB_ENV - elif [ "${{ github.event.inputs.image }}" == "ranger" ]; then + echo "image_name=apache/gravitino-playground" >> $GITHUB_ENV + echo "tag_name=hive" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-playground:ranger" ]; then echo "image_type=ranger" >> $GITHUB_ENV - echo "image_name=datastrato/ranger" >> $GITHUB_ENV + echo "image_name=apache/gravitino-playground" >> $GITHUB_ENV + echo "tag_name=ranger" >> $GITHUB_ENV elif [ "${{ github.event.inputs.image }}" == "gravitino-iceberg-rest-server" ]; then echo "image_type=iceberg-rest-server" >> $GITHUB_ENV - echo "image_name=datastrato/gravitino-iceberg-rest-server" >> $GITHUB_ENV + echo "image_name=apache/gravitino-iceberg-rest" >> $GITHUB_ENV fi - name: Check publish Docker token @@ -83,7 +97,7 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v2 with: - username: datastrato + username: ${{ github.event.inputs.username }} password: ${{ secrets.DOCKER_REPOSITORY_PASSWORD }} - name: Set up Docker Buildx @@ -100,4 +114,9 @@ jobs: run: | sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/hostedtoolcache/CodeQL - ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }} --latest + + if [[ "${image_type}" == "gravitino" || "{image_type}" == "iceberg-rest-server" ]]; then + ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.version }} --latest + else + ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag "${tag_name}-${{ github.event.inputs.version }}" + fi \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 89a63d79db4..77b620cc54f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,7 +61,7 @@ cd gravitino ### Development Setup -Once you have cloned the [GitHub repository](https://github.com/apache/gravitino), see [how to build](/docs/how-to-build.md) for instructions on how to build, or you can use the provided docker images at [Datastrato's DockerHub repository](https://hub.docker.com/u/datastrato). +Once you have cloned the [GitHub repository](https://github.com/apache/gravitino), see [how to build](/docs/how-to-build.md) for instructions on how to build, or you can use the provided docker images at [Apache DockerHub repository](https://hub.docker.com/u/apache). To stop and start a local Gravitino server via `bin/gravitino.sh start` and `bin/gravitino.sh stop` in a Gravitino distribution, see [how to build](/docs/how-to-build.md) for more instructions. diff --git a/build.gradle.kts b/build.gradle.kts index 6943c5f96e0..97b7dc679f2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -156,7 +156,7 @@ allprojects { } } - val setIntegrationTestEnvironment: (Test) -> Unit = { param -> + val setTestEnvironment: (Test) -> Unit = { param -> param.doFirst { param.jvmArgs(project.property("extraJvmArgs") as List<*>) @@ -167,6 +167,14 @@ allprojects { param.environment("HADOOP_HOME", "/tmp") param.environment("PROJECT_VERSION", project.version) + // Gravitino CI Docker image + param.environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "apache/gravitino-ci:hive-0.1.13") + param.environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "apache/gravitino-ci:kerberos-hive-0.1.5") + param.environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "apache/gravitino-ci:doris-0.1.5") + param.environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE", "apache/gravitino-ci:trino-0.1.6") + param.environment("GRAVITINO_CI_RANGER_DOCKER_IMAGE", "apache/gravitino-ci:ranger-0.1.1") + param.environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0") + val dockerRunning = project.rootProject.extra["dockerRunning"] as? Boolean ?: false val macDockerConnector = project.rootProject.extra["macDockerConnector"] as? Boolean ?: false if (OperatingSystem.current().isMacOsX() && @@ -205,7 +213,7 @@ allprojects { } } - extra["initIntegrationTest"] = setIntegrationTestEnvironment + extra["initTestParam"] = setTestEnvironment } nexusPublishing { @@ -409,6 +417,11 @@ subprojects { } tasks.configureEach { + if (project.name != "server-common") { + val initTest = project.extra.get("initTestParam") as (Test) -> Unit + initTest(this) + } + testLogging { exceptionFormat = TestExceptionFormat.FULL showExceptions = true diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index 03d381e190c..8c962fe9b4e 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -125,14 +125,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.5") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-hive/build.gradle.kts b/catalogs/catalog-hive/build.gradle.kts index 93efe11bf65..720428e0adc 100644 --- a/catalogs/catalog-hive/build.gradle.kts +++ b/catalogs/catalog-hive/build.gradle.kts @@ -179,14 +179,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.5") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-jdbc-doris/build.gradle.kts b/catalogs/catalog-jdbc-doris/build.gradle.kts index c10496067ae..19f232bc662 100644 --- a/catalogs/catalog-jdbc-doris/build.gradle.kts +++ b/catalogs/catalog-jdbc-doris/build.gradle.kts @@ -85,24 +85,16 @@ tasks { tasks.test { val skipUTs = project.hasProperty("skipTests") - doFirst { - environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "datastrato/gravitino-ci-doris:0.1.5") - } - if (skipUTs) { // Only run integration tests include("**/integration/**") } - val skipITs = project.hasProperty("skipITs") if (skipITs) { // Exclude integration tests exclude("**/integration/**") } else { dependsOn(tasks.jar) - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-jdbc-mysql/build.gradle.kts b/catalogs/catalog-jdbc-mysql/build.gradle.kts index d96b6209c6b..e6ba106cdb4 100644 --- a/catalogs/catalog-jdbc-mysql/build.gradle.kts +++ b/catalogs/catalog-jdbc-mysql/build.gradle.kts @@ -98,9 +98,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-jdbc-postgresql/build.gradle.kts b/catalogs/catalog-jdbc-postgresql/build.gradle.kts index f35585509c6..8a03e6c18b1 100644 --- a/catalogs/catalog-jdbc-postgresql/build.gradle.kts +++ b/catalogs/catalog-jdbc-postgresql/build.gradle.kts @@ -103,9 +103,6 @@ tasks.test { // PG will use project jdbc-mysql/build/libs directory, so we add the task dependency here. dependsOn(":catalogs:catalog-jdbc-mysql:jar") dependsOn(tasks.jar) - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-kafka/build.gradle.kts b/catalogs/catalog-kafka/build.gradle.kts index d423604949d..5fd27a0eb46 100644 --- a/catalogs/catalog-kafka/build.gradle.kts +++ b/catalogs/catalog-kafka/build.gradle.kts @@ -107,12 +107,5 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts index 8936c75c155..5ee05e39724 100644 --- a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts +++ b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts @@ -150,14 +150,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.5") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 1faf5e426d2..a6adf999d31 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -138,14 +138,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.3") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/clients/client-python/build.gradle.kts b/clients/client-python/build.gradle.kts index 6716f345ba1..2770df01ce9 100644 --- a/clients/client-python/build.gradle.kts +++ b/clients/client-python/build.gradle.kts @@ -222,7 +222,7 @@ tasks { "GRAVITINO_HOME" to project.rootDir.path + "/distribution/package", "START_EXTERNAL_GRAVITINO" to "true", "DOCKER_TEST" to dockerTest.toString(), - "GRAVITINO_CI_HIVE_DOCKER_IMAGE" to "datastrato/gravitino-ci-hive:0.1.13", + "GRAVITINO_CI_HIVE_DOCKER_IMAGE" to "apache/gravitino-ci:hive-0.1.13", // Set the PYTHONPATH to the client-python directory, make sure the tests can import the // modules from the client-python directory. "PYTHONPATH" to "${project.rootDir.path}/clients/client-python" diff --git a/dev/docker/build-docker.sh b/dev/docker/build-docker.sh index 71c47c0ea9b..a2cef6c8f1f 100755 --- a/dev/docker/build-docker.sh +++ b/dev/docker/build-docker.sh @@ -121,7 +121,7 @@ else exit 1 fi -build_args="${build_args} --build-arg IMAGE_NAME=${image_name}" +build_args="${build_args} --build-arg IMAGE_NAME=${image_name} --build-arg TAG_NAME=${tag_name}" # Create multi-arch builder BUILDER_NAME="gravitino-builder" diff --git a/dev/docker/trino/Dockerfile b/dev/docker/trino/Dockerfile index 980528bfd7d..2050ab7987e 100644 --- a/dev/docker/trino/Dockerfile +++ b/dev/docker/trino/Dockerfile @@ -67,7 +67,8 @@ RUN mkdir /tmp/gravitino COPY --chown=trino:trino packages/gravitino-trino-connector /tmp/gravitino ARG IMAGE_NAME -RUN if [ "$IMAGE_NAME" = "datastrato/trino" ] ; then \ +ARG TAG_NAME +RUN if [ "$TAG_NAME" = "trino-*" ] ; then \ mv /tmp/gravitino /usr/lib/trino/plugin/; \ else echo "Copying files for other images"; \ fi diff --git a/docs/assets/publish-docker-image.jpg b/docs/assets/publish-docker-image.jpg new file mode 100644 index 00000000000..ca22da1db96 Binary files /dev/null and b/docs/assets/publish-docker-image.jpg differ diff --git a/docs/assets/publish-docker-image.png b/docs/assets/publish-docker-image.png deleted file mode 100644 index 4f4c63eed72..00000000000 Binary files a/docs/assets/publish-docker-image.png and /dev/null differ diff --git a/docs/docker-image-details.md b/docs/docker-image-details.md index 16c7a965932..81a573f9be4 100644 --- a/docs/docker-image-details.md +++ b/docs/docker-image-details.md @@ -14,28 +14,27 @@ You can deploy the service with the Gravitino Docker image. Container startup commands ```shell -docker run --rm -d -p 8090:8090 -p 9001:9001 datastrato/gravitino +docker run --rm -d -p 8090:8090 -p 9001:9001 apache/gravitino:0.6.0-incubating ``` Changelog -- gravitino:0.5.1 - - Based on Gravitino 0.5.1, you can know more information from 0.5.1 release notes. +- apache/gravitino:0.6.0-incubating (Switch to Apache official DockerHub repository) + - Use the latest Gravitino version 0.6.0 source code to build the image. +- datastrato/gravitino:0.5.1 + - Based on Gravitino 0.5.1, you can know more information from 0.5.1 release notes. -- gravitino:0.5.0 +- datastrato/gravitino:0.5.0 - Based on Gravitino 0.5.0, you can know more information from 0.5.0 release notes. - -- gravitino:0.4.0 +- datastrato/gravitino:0.4.0 - Based on Gravitino 0.4.0, you can know more information from 0.4.0 release notes. - -- gravitino:0.3.1 +- datastrato/gravitino:0.3.1 - Fix some issues - -- gravitino:0.3.0 +- datastrato/gravitino:0.3.0 - Docker image `datastrato/gravitino:0.3.0` - Gravitino Server - Expose ports: @@ -49,13 +48,12 @@ You can deploy the standalone Gravitino Iceberg REST server with the Docker imag Container startup commands ```shell -docker run --rm -d -p 9001:9001 datastrato/gravitino-iceberg-rest-server +docker run --rm -d -p 9001:9001 apache/gravitino-iceberg-rest:0.6.0-incubating ``` Changelog -- gravitino-iceberg-rest-server:0.6.0 - - Docker image `datastrato/gravitino-iceberg-rest-server:0.6.0` +- apache/gravitino-iceberg-rest:0.6.0-incubating. - Gravitino Iceberg REST Server with memory catalog backend. - Expose ports: - `9001` Iceberg REST service @@ -72,7 +70,10 @@ The Docker images of the playground have suitable configurations for users to ex Changelog -- hive:2.7.3-no-yarn +- apache/gravitino-playground:hive-2.7.3 (Switch to Apache official DockerHub repository) + - Use `datastrato/hive:2.7.3-no-yarn` Dockerfile to rebuild the image. + +- datastrato/hive:2.7.3-no-yarn - Docker image `datastrato/hive:2.7.3-no-yarn` - `hadoop-2.7.3` - `hive-2.3.9` @@ -82,23 +83,22 @@ Changelog Changelog -- trino:435-gravitino-0.5.1 - - Based on Gravitino 0.5.1, you can know more information from 0.5.1 release notes. +- apache/gravitino-playground:trino-435-gravitino-0.6.0-incubating (Switch to Apache official DockerHub repository) + - Use Gravitino release 0.6.0 Dockerfile to build the image. +- datastrato/trino:435-gravitino-0.5.1 + - Based on Gravitino 0.5.1, you can know more information from 0.5.1 release notes. -- trino:426-gravitino-0.5.0 +- datastrato/trino:426-gravitino-0.5.0 - Based on Gravitino 0.5.0, you can know more information from 0.5.0 release notes. - -- trino:426-gravitino-0.4.0 +- datastrato/trino:426-gravitino-0.4.0 - Based on Gravitino 0.4.0, you can know more information from 0.4.0 release notes. - -- trino:426-gravitino-0.3.1 +- datastrato/trino:426-gravitino-0.3.1 - Fix some issues - -- trino:426-gravitino-0.3.0 +- datastrato/trino:426-gravitino-0.3.0 - Docker image `datastrato/trino:426-gravitino-0.3.0` - Base on `trino:462` - Added Gravitino trino-connector-0.3.0 libraries into the `/usr/lib/trino/plugin/gravitino` @@ -112,28 +112,32 @@ You can use these kinds of Docker images to facilitate integration testing of al You can use this kind of image to test the catalog of Apache Hive with kerberos enable Changelog -- gravitino-ci-kerberos-hive:0.1.5 + +- apache/gravitino-ci:kerberos-hive-0.1.5 (Switch to Apache official DockerHub repository) + - Use Gravitino release 0.6.0 Dockerfile to build the image. + +- datastrato/gravitino-ci-kerberos-hive:0.1.5 - Start another HMS for the Hive cluster in the container with port 19083. This is to test whether Kerberos authentication works for a Kerberos-enabled Hive cluster with multiple HMS. - Refresh ssh keys in the startup script. - Add test logic to log in localhost via ssh without password. -- gravitino-ci-kerberos-hive:0.1.4 +- datastrato/gravitino-ci-kerberos-hive:0.1.4 - Increase the total check time for the status of DataNode to 150s. - Output the log of the DataNode fails to start -- gravitino-ci-kerberos-hive:0.1.3 +- datastrato/gravitino-ci-kerberos-hive:0.1.3 - Add more proxy users in the core-site.xml file. - fix bugs in the `start.sh` script. -- gravitino-ci-kerberos-hive:0.1.2 +- datastrato/gravitino-ci-kerberos-hive:0.1.2 - Add `${HOSTNAME} >> /root/.ssh/known_hosts` to the startup script. - Add check for the status of DataNode, if the DataNode is not running or ready within 100s, the container will exit. -- gravitino-ci-kerberos-hive:0.1.1 +- datastrato/gravitino-ci-kerberos-hive:0.1.1 - Add a principal for Gravitino web server named 'HTTP/localhost@HADOOPKRB'. - Fix bugs about the configuration of proxy users. -- gravitino-ci-kerberos-hive:0.1.0 +- datastrato/gravitino-ci-kerberos-hive:0.1.0 - Set up a Hive cluster with kerberos enabled. - Install a KDC server and create a principal for Hive. For more please see [kerberos-hive](../dev/docker/kerberos-hive) @@ -143,7 +147,10 @@ You can use this kind of image to test the catalog of Apache Hive. Changelog -- gravitino-ci-hive:0.1.13 +- apache/gravitino-ci:hive-0.1.13 (Switch to Apache official DockerHub repository) + - Use Gravitino release 0.6.0 Dockerfile to build the image. + +- datastrato/gravitino-ci-hive:0.1.13 - Support Hive 2.3.9 and HDFS 2.7.3 - Docker environment variables: - `HIVE_RUNTIME_VERSION`: `hive2` (default) @@ -157,57 +164,57 @@ Changelog - If you want to enable HDFS Ranger plugin, you need both set the `RANGER_SERVER_URL` and `RANGER_HDFS_REPOSITORY_NAME` environment variables. HDFS Ranger audit logs are stored in the `/usr/local/hadoop/logs/ranger-hdfs-audit.log` - Example: docker run -e HIVE_RUNTIME_VERSION='hive3' -e RANGER_SERVER_URL='http://ranger-server:6080' -e RANGER_HIVE_REPOSITORY_NAME='hiveDev' -e RANGER_HDFS_REPOSITORY_NAME='hdfsDev' ... datastrato/gravitino-ci-hive:0.1.13 -- gravitino-ci-hive:0.1.12 +- datastrato/gravitino-ci-hive:0.1.12 - Shrink hive Docker image size by 420MB -- gravitino-ci-hive:0.1.11 +- datastrato/gravitino-ci-hive:0.1.11 - Remove `yarn` from the startup script; Remove `yarn-site.xml` and `yarn-env.sh` files; - Change the value of `mapreduce.framework.name` from `yarn` to `local` in the `mapred-site.xml` file. -- gravitino-ci-hive:0.1.10 +- datastrato/gravitino-ci-hive:0.1.10 - Remove SSH service from the startup script. - Use `hadoop-daemon.sh` to start HDFS services. -- gravitino-ci-hive:0.1.9 +- datastrato/gravitino-ci-hive:0.1.9 - Remove cache after installing packages. -- gravitino-ci-hive:0.1.8 +- datastrato/gravitino-ci-hive:0.1.8 - Change the value of `hive.server2.enable.doAs` to `true` -- gravitino-ci-hive:0.1.7 +- datastrato/gravitino-ci-hive:0.1.7 - Download MySQL JDBC driver before building the Docker image - Set `hdfs` as HDFS superuser group -- gravitino-ci-hive:0.1.6 +- datastrato/gravitino-ci-hive:0.1.6 - No starting YARN when container startup - Removed expose ports: - `22` SSH - `8088` YARN Service -- gravitino-ci-hive:0.1.5 +- datastrato/gravitino-ci-hive:0.1.5 - Rollback `Map container hostname to 127.0.0.1 before starting Hadoop` of `datastrato/gravitino-ci-hive:0.1.4` -- gravitino-ci-hive:0.1.4 +- datastrato/gravitino-ci-hive:0.1.4 - Configure HDFS DataNode data transfer address to be `0.0.0.0:50010` - Map the container hostname to `127.0.0.1` before starting Hadoop - Expose `50010` port for the HDFS DataNode -- gravitino-ci-hive:0.1.3 +- datastrato/gravitino-ci-hive:0.1.3 - Change MySQL bind-address from `127.0.0.1` to `0.0.0.0` - Add `iceberg` to MySQL users with password `iceberg` - Export `3306` port for MySQL -- gravitino-ci-hive:0.1.2 +- datastrato/gravitino-ci-hive:0.1.2 - Based on `datastrato/gravitino-ci-hive:0.1.1` - Modify `fs.defaultFS` from `local` to `0.0.0.0` in the `core-site.xml` file. - Expose `9000` port in the `Dockerfile` file. -- gravitino-ci-hive:0.1.1 +- datastrato/gravitino-ci-hive:0.1.1 - Based on `datastrato/gravitino-ci-hive:0.1.0` - Modify HDFS/YARN/HIVE `MaxPermSize` from `8GB` to `128MB` - Modify `HADOOP_HEAPSIZE` from `8192` to `128` -- gravitino-ci-hive:0.1.0 +- datastrato/gravitino-ci-hive:0.1.0 - Docker image `datastrato/gravitino-ci-hive:0.1.0` - `hadoop-2.7.3` - `hive-2.3.9` @@ -228,22 +235,25 @@ You can use this image to test Trino. Changelog -- gravitino-ci-trino:0.1.6 +- apache/gravitino-ci:trino-0.1.6 (Switch to Apache official DockerHub repository) + - Use Gravitino release 0.6.0 Dockerfile to build the image. + +- datastrato/gravitino-ci-trino:0.1.6 - Upgrade trino:426 to trino:435 -- gravitino-ci-trino:0.1.5 +- datastrato/gravitino-ci-trino:0.1.5 - Add check for the version of gravitino-trino-connector -- gravitino-ci-trino:0.1.4 +- datastrato/gravitino-ci-trino:0.1.4 - Change `-Xmx1G` to `-Xmx2G` in the config file `/etc/trino/jvm.config` -- gravitino-ci-trino:0.1.3 +- datastrato/gravitino-ci-trino:0.1.3 - Remove copy content in folder `gravitino-trino-connector` to plugin folder `/usr/lib/trino/plugin/gravitino` -- gravitino-ci-trino:0.1.2 +- datastrato/gravitino-ci-trino:0.1.2 - Copy JDBC driver 'mysql-connector-java' and 'postgres' to `/usr/lib/trino/iceberg/` folder -- gravitino-ci-trino:0.1.0 +- datastrato/gravitino-ci-trino:0.1.0 - Docker image `datastrato/gravitino-ci-trino:0.1.0` - Based on `trinodb/trino:426` and removed some unused plugins from it. - Expose ports: @@ -254,23 +264,27 @@ Changelog You can use this image to test Apache Doris. Changelog -- gravitino-ci-doris:0.1.5 + +- apache/gravitino-ci:doris-0.1.5 (Switch to Apache official DockerHub repository) + - Use Gravitino release 0.6.0 Dockerfile to build the image. + +- datastrato/gravitino-ci-doris:0.1.5 - Remove the chmod command in the Dockerfile to decrease the size of the Docker image. -- gravitino-ci-doris:0.1.4 +- datastrato/gravitino-ci-doris:0.1.4 - remove chmod in start.sh to accelerate the startup speed -- gravitino-ci-doris:0.1.3 +- datastrato/gravitino-ci-doris:0.1.3 - To adapt to the CI framework, don't exit container when start failed, logs are no longer printed to stdout. - Add `report_disk_state_interval_seconds` config to decrease report interval. -- gravitino-ci-doris:0.1.2 +- datastrato/gravitino-ci-doris:0.1.2 - Add a check for the status of Doris BE, add retry for adding BE nodes. -- gravitino-ci-doris:0.1.1 +- datastrato/gravitino-ci-doris:0.1.1 - Optimize `start.sh`, add disk space check before starting Doris, exit when FE or BE start failed, add log to stdout -- gravitino-ci-doris:0.1.0 +- datastrato/gravitino-ci-doris:0.1.0 - Docker image `datastrato/gravitino-ci-doris:0.1.0` - Start Doris BE & FE in one container - Please set table properties `"replication_num" = "1"` when creating a table in Doris, because the default replication number is 3, but the Doris container only has one BE. @@ -285,7 +299,10 @@ You can use this image to control Trino's permissions. Changelog -- gravitino-ci-ranger:0.1.1 +- apache/gravitino-ci:ranger-0.1.1 (Switch to Apache official DockerHub repository) + - Use Gravitino release 0.6.0 Dockerfile to build the image. + +- datastrato/gravitino-ci-ranger:0.1.1 - Docker image datastrato/gravitino-ci-ranger:0.1.1 - Use `ranger-admin` release from `datastrato/apache-ranger:2.4.0` to build docker image. - Remove unnecessary hack in `start-ranger-service.sh`. @@ -297,7 +314,7 @@ Changelog - Clone the `Apache Ranger` project from GiHub and checkout the `2.4.0` release. - Create a remote debug configuration (`Use model classpath` = `EmbeddedServer`) in your IDE and connect to the Ranger admin container. -- gravitino-ci-ranger:0.1.0 +- datastrato/gravitino-ci-ranger:0.1.0 - Docker image `datastrato/gravitino-ci-ranger:0.1.0` - Support Apache Ranger 2.4.0 - Use environment variable `RANGER_PASSWORD` to set up Apache Ranger admin password, Please notice Apache Ranger Password should be minimum 8 characters with min one alphabet and one numeric. diff --git a/docs/getting-started.md b/docs/getting-started.md index 420d583d78a..0155cfa4f89 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -195,7 +195,7 @@ Installing and configuring Hive can be a little complex. If you don't already ha Follow these instructions for setting up [Docker on Ubuntu](https://docs.docker.com/engine/install/ubuntu/). ```shell -sudo docker run --name gravitino-container -d -p 9000:9000 -p 8088:8088 -p 50010:50010 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 datastrato/hive:2.7.3-no-yarn +sudo docker run --name gravitino-container -d -p 9000:9000 -p 8088:8088 -p 50010:50010 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 apache/gravitino-playground:hive:2.7.3 ``` Once Docker is installed, you can start the container with the command: diff --git a/docs/how-to-install.md b/docs/how-to-install.md index 66d655e8f94..7e38a94905c 100644 --- a/docs/how-to-install.md +++ b/docs/how-to-install.md @@ -145,11 +145,11 @@ For more detailed information about the Gravitino Iceberg REST server, please re ### Get the Apache Gravitino Docker image -Gravitino publishes the Docker image to [Docker Hub](https://hub.docker.com/r/datastrato/gravitino/tags). +Gravitino publishes the Docker image to [Docker Hub](https://hub.docker.com/r/apache/gravitino/tags). Run the Gravitino Docker image by running: ```shell -docker run -d -i -p 8090:8090 datastrato/gravitino: +docker run -d -i -p 8090:8090 apache/gravitino: ``` Access the Gravitino Web UI by typing `http://localhost:8090` in your browser, or you diff --git a/docs/how-to-test.md b/docs/how-to-test.md index 60cf0244979..17ce097a1f9 100644 --- a/docs/how-to-test.md +++ b/docs/how-to-test.md @@ -87,7 +87,7 @@ Some integration test cases depend on the Gravitino CI Docker image. If an integration test relies on the specific Gravitino CI Docker image, set the `@tag(gravitino-docker-test)` annotation in the test class. For example, the `integration-test/src/test/.../CatalogHiveIT.java` test needs to connect to -the `datastrato/gravitino-ci-hive` Docker container for testing the Hive data source. +the `apache/gravitino-ci:hive-{hive-version}` Docker container for testing the Hive data source. Therefore, it should have the following `@tag` annotation:`@tag(gravitino-docker-test)`. This annotation helps identify the specific Docker container required for the integration test. diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 1753cc9496e..5389f934f49 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -303,13 +303,13 @@ SELECT * FROM dml.test; You could run Gravitino Iceberg REST server though docker container: ```shell -docker run -d -p 9001:9001 datastrato/iceberg-rest-server:0.6 +docker run -d -p 9001:9001 apache/gravitino-iceberg-rest:0.6.0 ``` Or build it manually to add custom logics: ```shell -sh ./dev/docker/build-docker.sh --platform linux/arm64 --type iceberg-rest-server --image datastrato/iceberg-rest-server --tag 0.6 +sh ./dev/docker/build-docker.sh --platform linux/arm64 --type iceberg-rest-server --image apache/gravitino-iceberg-rest --tag 0.6.0 ``` You could try Spark with Gravitino REST catalog service in our [playground](./how-to-use-the-playground.md#using-iceberg-rest-service). diff --git a/docs/index.md b/docs/index.md index 0d680236fcb..2d1d48bd68d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,7 +24,7 @@ your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java i See [How to install Gravitino](./how-to-install.md) to learn how to install the Gravitino server. -Gravitino provides Docker images on [Docker Hub](https://hub.docker.com/u/datastrato). +Gravitino provides Docker images on [Docker Hub](https://hub.docker.com/u/apache). Pull the image and run it. For details of the Gravitino Docker image, see [Docker image details](./docker-image-details.md). diff --git a/docs/publish-docker-images.md b/docs/publish-docker-images.md index da37aeee4ae..953d3120662 100644 --- a/docs/publish-docker-images.md +++ b/docs/publish-docker-images.md @@ -9,7 +9,7 @@ license: "This software is licensed under the Apache License version 2." ## Introduction The Apache Gravitino project provides a set of Docker images to facilitate the publishing, development, and testing of the Gravitino project. -[Datastrato Docker Hub](https://hub.docker.com/u/datastrato) repository publishes the official Gravitino Docker images. +[Apache Docker Hub](https://hub.docker.com/u/apache) repository publishes the official Gravitino Docker images. ## Publish Docker images to Docker Hub @@ -21,14 +21,18 @@ You can use GitHub actions to publish Docker images to the Docker Hub repository + Selecting the main branch results in publishing the Docker image with the specified tag and the latest tag. + Selecting another branch, results are publishing the Docker image with the specified tag. 4. Choose the image you want to build - + `datastrato/gravitino-ci-hive`. - + `datastrato/gravitino-ci-trino`. + + `apache/gravitino-ci:hive`. + + `apache/gravitino-ci:trino`. + Future plans include support for other data sources. -5. Input the `tag name`, for example: `0.1.0`, Then build and push the Docker image name as `datastrato/{image-name}:0.1.0`. -6. You must enter the correct `publish docker token` before you can execute run `Publish Docker Image` workflow. -7. Wait for the workflow to complete. You can see a new Docker image shown in the [Datastrato Docker Hub](https://hub.docker.com/u/datastrato) repository. - -![Publish Docker image](assets/publish-docker-image.png) +5. Input the `tag name`, for example: `0.1.0`, Then build and push the Docker image name. Currently, the Docker image name is in the format: + 1. `apache/gravitino-ci:{image-type}-0.1.0` if this is a trino CI image, image-type is `trino`, `hive`, `kerberos-hive`, `doris`, `ranger`. + 2. `apache/gravitino-playground:{image-type}-0.1.0` if this is a playground image, image-type is `trino`, `hive`, `ranger`. + 3. `apache/gravitino:0.1.0` if this is a gravitino server image. + 4. `apache/gravitino-iceberg-rest:0.1.0` if this is an iceberg-rest server image. +6. You must enter the correct `docker user name`and `publish docker token` before you can execute run `Publish Docker Image` workflow. +7. Wait for the workflow to complete. You can see a new Docker image shown in the [Apache Docker Hub](https://hub.docker.com/u/apache) repository. + +![Publish Docker image](assets/publish-docker-image.jpg) ## More details of Apache Gravitino Docker images diff --git a/flink-connector/build.gradle.kts b/flink-connector/build.gradle.kts index 169b5f80988..c2b13f40843 100644 --- a/flink-connector/build.gradle.kts +++ b/flink-connector/build.gradle.kts @@ -157,13 +157,6 @@ tasks.test { } else { dependsOn(tasks.jar) dependsOn(":catalogs:catalog-hive:jar") - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/iceberg/iceberg-rest-server/build.gradle.kts b/iceberg/iceberg-rest-server/build.gradle.kts index 2c0fdb0091f..0fa77b8509d 100644 --- a/iceberg/iceberg-rest-server/build.gradle.kts +++ b/iceberg/iceberg-rest-server/build.gradle.kts @@ -158,14 +158,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") - environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.3") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/integration-test/build.gradle.kts b/integration-test/build.gradle.kts index 4506bd49734..cdd33be4b77 100644 --- a/integration-test/build.gradle.kts +++ b/integration-test/build.gradle.kts @@ -162,13 +162,6 @@ tasks.test { dependsOn(":web:build") doFirst { - // Gravitino CI Docker image - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE", "datastrato/gravitino-ci-trino:0.1.5") - environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0") - environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "datastrato/gravitino-ci-doris:0.1.5") - environment("GRAVITINO_CI_RANGER_DOCKER_IMAGE", "datastrato/gravitino-ci-ranger:0.1.1") - copy { from("${project.rootDir}/dev/docker/trino/conf") into("build/trino-conf") @@ -196,9 +189,6 @@ tasks.test { } } } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/integration-test/trino-it/docker-compose.yaml b/integration-test/trino-it/docker-compose.yaml index c00c1a45d83..b9ba4199f5d 100644 --- a/integration-test/trino-it/docker-compose.yaml +++ b/integration-test/trino-it/docker-compose.yaml @@ -19,7 +19,7 @@ services: hive: - image: datastrato/gravitino-ci-hive:0.1.13 + image: apache/gravitino-ci:hive-0.1.13 networks: - trino-net container_name: trino-ci-hive diff --git a/spark-connector/spark-common/build.gradle.kts b/spark-connector/spark-common/build.gradle.kts index f4dacc569eb..daa34c811a1 100644 --- a/spark-connector/spark-common/build.gradle.kts +++ b/spark-connector/spark-common/build.gradle.kts @@ -141,13 +141,6 @@ tasks.test { exclude("**/integration/**") } else { dependsOn(tasks.jar) - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index fb3743c2a0a..4f60c46555e 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -156,13 +156,6 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index 74dc6d4fedd..4cdaaa628dd 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -156,13 +156,6 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } } diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 907a847cd5c..a70d56b9e07 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -158,13 +158,6 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") - - doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") - } - - val init = project.extra.get("initIntegrationTest") as (Test) -> Unit - init(this) } }