diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 00000000000..0fc23e5345d --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,89 @@ +name: Integration Test + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +env: + HIVE2_IMAGE_NAME: datastrato/hive2 + HIVE2_IMAGE_TAG_NAME: 0.1.0 + +concurrency: + group: ${{ github.worklfow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_requests' }} + +jobs: + # Integration test for AMD64 architecture + test-amd64-arch: + runs-on: ubuntu-latest + timeout-minutes: 60 + strategy: + matrix: + architecture: [linux/amd64] + env: + DOCKER_RUN_NAME: hive2-amd64 + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v3 + with: + java-version: '8' + distribution: 'temurin' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Build the hive2 Docker image for AMD64 + if: ${{ contains(github.event.pull_request.labels.*.name, 'build docker image') }} + run: ./dev/docker/hive2/build-docker.sh --platform ${PLATFORM} --image ${HIVE2_IMAGE_NAME}:${HIVE2_IMAGE_TAG_NAME} + + - name: Run AMD64 container + run: | + docker run --rm --name ${DOCKER_RUN_NAME} --platform ${PLATFORM} -d -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 ${HIVE2_IMAGE_NAME}:${HIVE2_IMAGE_TAG_NAME} + docker ps -a + + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + gradle-version: '8.1.1' + + - name: Show gradle version + run: gradle --version + + - name: Package Graviton + run: | + gradle build + gradle compileDistribution + + - name: Setup Debug Action + if: ${{ contains(github.event.pull_request.labels.*.name, 'debug action') }} + uses: csexton/debugger-action@master + + - name: Integration Test + run: | + gradle integrationTest + + - name: Print logs when Graviton integration tests failure + if: ${{ failure() }} + run: | + if [ -f "distribution/package/logs/graviton-server.out" ]; then + cat distribution/package/logs/graviton-server.out + fi + if [ -f "distribution/package/logs/graviton-server.log" ]; then + cat distribution/package/logs/graviton-server.log + fi + + - name: Stop and remove container + run: | + docker stop ${DOCKER_RUN_NAME} + sleep 3 + docker ps -a + docker rmi ${HIVE2_IMAGE_NAME}:${HIVE2_IMAGE_TAG_NAME} \ No newline at end of file diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml deleted file mode 100644 index 9d181ec59e8..00000000000 --- a/.github/workflows/integration.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: integration - -# Controls when the workflow will run -on: - # Triggers the workflow on push or pull request events but only for the "main" branch - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -concurrency: - group: ${{ github.worklfow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_requests' }} - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - integration_test: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v3 - - - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'temurin' - - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - gradle-version: '8.1.1' - - - name: Show gradle version - run: gradle --version - - - name: Package Graviton - run: | - gradle build - gradle compileDistribution - - - name: Graviton Integration Tests - run: | - gradle integrationTest \ No newline at end of file diff --git a/README.md b/README.md index 1c286f8db3e..45821480805 100644 --- a/README.md +++ b/README.md @@ -2,4 +2,13 @@ Copyright 2023 Datastrato. This software is licensed under the Apache License version 2. --> -# Graviton \ No newline at end of file +# Graviton +## Introduction + +Graviton is a high-performance, geo-distributed and federated metadata lake. + +## Development Guide + +1. [How to build Graviton](docs/how-to-build.md) +2. [How to Run Integration Test](docs/integration-test.md) +3. [How to publish Docker images](docs/publish-docker-images.md) diff --git a/bin/graviton.sh b/bin/graviton.sh index 22e7a23b118..84f579d7f12 100755 --- a/bin/graviton.sh +++ b/bin/graviton.sh @@ -124,14 +124,14 @@ function stop() { } HOSTNAME=$(hostname) -GRAVITON_OUTFILE="${GRAVITON_LOG_DIR}/graviton-${HOSTNAME}.out" +GRAVITON_OUTFILE="${GRAVITON_LOG_DIR}/graviton-server.out" GRAVITON_SERVER_NAME=com.datastrato.graviton.server.GravitonServer JAVA_OPTS+=" -Dfile.encoding=UTF-8" JAVA_OPTS+=" -Dlog4j2.configurationFile=file://${GRAVITON_CONF_DIR}/log4j2.properties" JAVA_OPTS+=" -Dgraviton.log.path=${GRAVITON_LOG_DIR} ${GRAVITON_MEM}" -addJarInDir "${GRAVITON_HOME}/lib" +addJarInDir "${GRAVITON_HOME}/libs" case "${1}" in start) diff --git a/build.gradle.kts b/build.gradle.kts index a5f1c1c8130..788dee30750 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -149,7 +149,7 @@ tasks { val outputDir = projectDir.dir("distribution") val compileDistribution by registering { - dependsOn("copyRuntimeClass", "copyCatalogRuntimeClass", "copySubmoduleClass") + dependsOn("copyRuntimeClass", "copyCatalogRuntimeClass", "copySubmoduleClass", "copyCatalogModuleClass") group = "graviton distribution" outputs.dir(projectDir.dir("distribution/package")) @@ -172,8 +172,7 @@ tasks { group = "graviton distribution" finalizedBy("checksumDistribution") from(compileDistribution.map { it.outputs.files.single() }) - archiveBaseName.set("datastrato") - archiveAppendix.set(rootProject.name.lowercase()) + archiveBaseName.set(rootProject.name.lowercase()) archiveVersion.set("${version}") archiveClassifier.set("bin") destinationDirectory.set(outputDir) @@ -204,10 +203,10 @@ tasks { val copyRuntimeClass by registering(Copy::class) { subprojects.forEach() { - if (it.name != "catalog-hive" && it.name != "client-java") { - // println("copyRuntimeClass: ${it.name}") + if (it.name != "catalog-hive" && it.name != "client-java" && it.name != "integration-test") { + println("copyRuntimeClass: ${it.name}") from(it.configurations.runtimeClasspath) - into("distribution/package/lib") + into("distribution/package/libs") } } } @@ -217,7 +216,7 @@ tasks { if (it.name == "catalog-hive") { // println("copyCatalogRuntimeClass: ${it.name}") from(it.configurations.runtimeClasspath) - into("distribution/package/catalogs/catalog-hive/lib") + into("distribution/package/catalogs/hive/libs") } } } @@ -225,16 +224,24 @@ tasks { val copySubmoduleClass by registering(Copy::class) { dependsOn("copyRuntimeClass", "copyCatalogRuntimeClass") subprojects.forEach() { - // println("copySubmoduleClass: ${it.name}") - if (it.name != "client-java") { + if (it.name != "client-java" && it.name != "integration-test" && it.name != "catalog-hive") { from("${it.name}/build/libs") - into("distribution/package/lib") + into("distribution/package/libs") include("*.jar") setDuplicatesStrategy(DuplicatesStrategy.INCLUDE) } } } + val copyCatalogModuleClass by registering(Copy::class) { + subprojects.forEach() { + if (it.name == "catalog-hive") { + from("${it.name}/build/libs") + into("distribution/package/catalogs/hive/libs") + } + } + } + task("integrationTest") { dependsOn(":integration-test:integrationTest") } diff --git a/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveClientPool.java b/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveClientPool.java index a9c3b7fc627..6ab417cc3bf 100644 --- a/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveClientPool.java +++ b/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveClientPool.java @@ -30,12 +30,15 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; // hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java /** Represents a client pool for managing connections to the Hive Metastore service. */ public class HiveClientPool extends ClientPoolImpl { + private static final Logger LOG = LoggerFactory.getLogger(HiveClientPool.class); private static final DynMethods.StaticMethod GET_CLIENT = DynMethods.builder("getProxy") .impl( @@ -96,6 +99,7 @@ protected IMetaStoreClient newClient() { @Override protected IMetaStoreClient reconnect(IMetaStoreClient client) { + LOG.warn("Reconnecting to Hive Metastore"); try { client.close(); client.reconnect(); @@ -116,6 +120,7 @@ protected boolean isConnectionException(Exception e) { @Override protected void close(IMetaStoreClient client) { + LOG.info("Closing Hive Metastore client"); client.close(); } diff --git a/core/src/main/java/com/datastrato/graviton/catalog/CatalogManager.java b/core/src/main/java/com/datastrato/graviton/catalog/CatalogManager.java index 450ee9a3d6d..70d8887ca86 100644 --- a/core/src/main/java/com/datastrato/graviton/catalog/CatalogManager.java +++ b/core/src/main/java/com/datastrato/graviton/catalog/CatalogManager.java @@ -71,19 +71,23 @@ public CatalogWrapper(BaseCatalog catalog, IsolatedClassLoader classLoader) { } public R doWithSchemaOps(ThrowableFunction fn) throws Exception { - if (asSchemas() == null) { - throw new UnsupportedOperationException("Catalog does not support schema operations"); - } - - return classLoader.withClassLoader(cl -> fn.apply(asSchemas())); + return classLoader.withClassLoader( + cl -> { + if (asSchemas() == null) { + throw new UnsupportedOperationException("Catalog does not support schema operations"); + } + return fn.apply(asSchemas()); + }); } public R doWithTableOps(ThrowableFunction fn) throws Exception { - if (asTables() == null) { - throw new UnsupportedOperationException("Catalog does not support table operations"); - } - - return classLoader.withClassLoader(cl -> fn.apply(asTables())); + return classLoader.withClassLoader( + cl -> { + if (asTables() == null) { + throw new UnsupportedOperationException("Catalog does not support table operations"); + } + return fn.apply(asTables()); + }); } public void close() { @@ -447,7 +451,14 @@ private String buildPkgPath(Map conf, String provider) { if (pkg != null) { pkgPath = pkg; } else if (!testEnv) { - pkgPath = gravitonHome + File.separator + "catalogs" + File.separator + provider; + pkgPath = + gravitonHome + + File.separator + + "catalogs" + + File.separator + + provider + + File.separator + + "libs"; } else { pkgPath = new StringBuilder() diff --git a/core/src/main/java/com/datastrato/graviton/catalog/CatalogOperationDispatcher.java b/core/src/main/java/com/datastrato/graviton/catalog/CatalogOperationDispatcher.java index 88daea35def..0f7b629fbae 100644 --- a/core/src/main/java/com/datastrato/graviton/catalog/CatalogOperationDispatcher.java +++ b/core/src/main/java/com/datastrato/graviton/catalog/CatalogOperationDispatcher.java @@ -240,7 +240,8 @@ private R doWithCatalog( NameIdentifier ident, ThrowableFunction fn, Class ex) throws E { try { - CatalogManager.CatalogWrapper c = catalogManager.loadCatalogAndWrap(ident); + NameIdentifier catalogIdent = getCatalogIdentifier(ident); + CatalogManager.CatalogWrapper c = catalogManager.loadCatalogAndWrap(catalogIdent); return fn.apply(c); } catch (Throwable throwable) { if (ex.isInstance(throwable)) { diff --git a/core/src/main/java/com/datastrato/graviton/utils/IsolatedClassLoader.java b/core/src/main/java/com/datastrato/graviton/utils/IsolatedClassLoader.java index bdc6391826f..f913ef6524a 100644 --- a/core/src/main/java/com/datastrato/graviton/utils/IsolatedClassLoader.java +++ b/core/src/main/java/com/datastrato/graviton/utils/IsolatedClassLoader.java @@ -4,8 +4,6 @@ */ package com.datastrato.graviton.utils; -import com.datastrato.graviton.meta.AuditInfo; -import com.datastrato.graviton.meta.rel.BaseSchema; import java.io.Closeable; import java.io.InputStream; import java.net.URL; @@ -151,9 +149,7 @@ private boolean isSharedClass(String name) { */ private boolean isBarrierClass(String name) { // We need to add more later on when we have more catalog implementations. - return name.startsWith(BaseSchema.class.getName()) - || name.startsWith(AuditInfo.class.getName()) - || barrierClasses.stream().anyMatch(name::startsWith); + return barrierClasses.stream().anyMatch(name::startsWith); } private ClassLoader getRootClassLoader() throws Exception { diff --git a/dev/docker/hive2/README.md b/dev/docker/hive2/README.md index 711c6e01e2d..61a5193622c 100644 --- a/dev/docker/hive2/README.md +++ b/dev/docker/hive2/README.md @@ -11,7 +11,7 @@ Build Image Run container ============= -docker run --rm -m -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 datastrato/hive2:0.1.0 +docker run --rm -d -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 datastrato/hive2:0.1.0 Login to the server ============= diff --git a/dev/docker/hive2/build-docker.sh b/dev/docker/hive2/build-docker.sh index 9f1b2ea3356..3b3d959e779 100755 --- a/dev/docker/hive2/build-docker.sh +++ b/dev/docker/hive2/build-docker.sh @@ -18,6 +18,43 @@ HADOOP_DOWNLOAD_URL="http://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_ HIVE_PACKAGE_NAME="apache-hive-${HIVE_VERSION}-bin.tar.gz" HIVE_DOWNLOAD_URL="https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/${HIVE_PACKAGE_NAME}" +# Build docker image for multi-arch +USAGE="-e Usage: ./build-docker.sh --platform [all|linux/amd64|linux/arm64] --image {image_name} --tag {tag_name}" + +# Get platform type +if [[ "$1" == "--platform" ]]; then + shift + platform_type="$1" + if [[ "${platform_type}" == "linux/amd64" || "${platform_type}" == "linux/arm64" || "${platform_type}" == "all" ]]; then + echo "INFO : platform type is ${platform_type}" + else + echo "ERROR : ${platform_type} is not a valid platform type" + echo ${USAGE} + exit 1 + fi + shift +else + platform_type="all" +fi + +# Get docker image name +if [[ "$1" == "--image" ]]; then + shift + image_name="$1" + shift +else + echo "ERROR : must specify image name" + echo ${USAGE} + exit 1 +fi + +# Get docker image tag +if [[ "$1" == "--tag" ]]; then + shift + tag_name="$1" + shift +fi + # Prepare download packages if [[ ! -d "${bin}/packages" ]]; then mkdir -p "${bin}/packages" @@ -41,5 +78,13 @@ else docker buildx create --platform linux/amd64,linux/arm64 --use --name hive2 fi -# Option params --no-cache --push -docker buildx build --platform=linux/amd64,linux/arm64 --build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --output type=docker --progress plain -t ${IMAGE_NAME} . +cd ${bin} +if [[ "${platform_type}" == "all" ]]; then + if [[ "${tag_name}" == "" ]]; then + docker buildx build --platform=linux/amd64,linux/arm64 --build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --push --progress plain -f Dockerfile -t ${image_name} . + else + docker buildx build --platform=linux/amd64,linux/arm64 --build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --push --tag ${tag_name} --progress plain -f Dockerfile -t ${image_name} . + fi +else + docker buildx build --platform=${platform_type} --build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --output type=docker --progress plain -f Dockerfile -t ${image_name} . +fi \ No newline at end of file diff --git a/docs/how-to-build.md b/docs/how-to-build.md new file mode 100644 index 00000000000..f39859a2dcd --- /dev/null +++ b/docs/how-to-build.md @@ -0,0 +1,71 @@ + +# How to Build Graviton + +## Prerequisites ++ JDK 1.8 ++ Git + +## Quick Start +1. Clone the Graviton project. + + ```shell + git clone git@github.com:datastrato/graviton.git + ``` + +2. Build the Graviton project. + + ```shell + cd graviton + ./gradlew build + ``` + > Note: The first time you build the project, it may take a while to download the dependencies. + +3. Deploy the Graviton project in your local environment. + + ```shell + ./gradlew compileDistribution + ``` + + The `compileDistribution` command will create a `distribution` directory in the Graviton root directory. + + The directory structure of the `distribution` directory is as follows: + ``` + ├── ... + └── distribution/package + ├── bin/graviton.sh # Graviton Server Launching scripts + ├── catalogs + │ └── hive/libs/ # Hive catalog dependencies + ├── conf/ # All configuration for Graviton + | ├── graviton.conf # Graviton Server configuration + | ├── graviton-env.sh # Environment variables, etc., JAVA_HOME, GRAVITON_HOME, and more. + | └── log4j2.properties # log4j configuration for Graviton Server. + ├── libs/ # Graviton Server dependencies lib + └── logs/ # Graviton Server logs + ``` + > Note: The `./gradlew clean` command will delete the `distribution` directory. + +4. Run Graviton Server. + + ```shell + distribution/package/bin/graviton.sh start + ``` + > Note: If you need to debug the Graviton Server, you can enable the `GRAVITON_DEBUG_OPTS` environment variable in the `conf/graviton-env.sh` file. + Then you can create a `Remote JVM Debug` configuration in `IntelliJ IDEA` and debug `graviton.server.main`. + +5. Stop Graviton Server. + + ```shell + distribution/package/bin/graviton.sh stop + ``` + +6. Assemble Graviton distribution package. + + ```shell + ./gradlew assembleDistribution + ``` + The `assembleDistribution` command will create `graviton-{version}-bin.tar` and `graviton-{version}-bin.tar.sha256` files in the `distribution/package` directory. + You can deploy the `graviton-{version}-bin.tar` file to your production environment. + > Note: The `graviton-{version}-bin.tar` file is the Graviton Server distribution package, and the `graviton-{version}-bin.tar.sha256` file is the sha256 checksum file for the Graviton Server distribution package. \ No newline at end of file diff --git a/docs/integration-test.md b/docs/integration-test.md new file mode 100644 index 00000000000..0a5b43df770 --- /dev/null +++ b/docs/integration-test.md @@ -0,0 +1,35 @@ + +# How to Run Integration Tests + +## Introduction +The `integration-test` module contains test cases that serve as integration tests to ensure the correctness of the Graviton Server, API, and Client. +You can run these tests locally or on GitHub Actions. + +## Running on GitHub Actions +When you submit a pull request to the `main` branch, GitHub Actions will automatically run the integration tests. +You can view the test results in the `Actions` tab of the pull request page. +The integration tests are executed in the following steps: + +1. If you set the `build docker image` label in the pull request, GitHub Actions will trigger the build of all Docker images under the `./dev/docker/` directory. This step usually takes around 10 minutes. If you have made changes to the Dockerfile, you need to set the `build docker image` label in the pull request. +2. If you do not set the `build docker image` label in the pull request, GitHub Actions will pull the Docker image `datastrato/hive2:latest` from the Docker Hub repository. This step usually takes around 15 seconds. +3. The Docker image is then run in the GitHub Actions environment. +4. If you set the `debug action` label in the pull request, GitHub Actions will run an SSH server with `csexton/debugger-action@master`, allowing you to remotely log in to the Actions environment for debugging purposes. +5. The Graviton project is compiled and packaged in the `distribution` directory using the `./gradlew compileDistribution` command. +6. The integration test cases in the `integration-test` module are executed using the `./gradlew integrationTest` command. +7. The Docker image is stopped. +8. The test environment is cleaned up. + +## Running Locally +Before running the tests, make sure Docker is installed. +Then, execute blow steps: +1. Execute the `./gradlew clean build` command to build Graviton project. +2. The Graviton project is compiled and packaged in the `distribution` directory using the `./gradlew compileDistribution` command. +3. Run a hive2 docker container in the local using the `docker run --rm -d -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 datastrato/hive2:0.1.0` command. +4. The integration test cases in the `integration-test` module are executed using the `./gradlew integrationTest` command. + +Additionally, the Graviton Server and third-party data source Docker runtime environments will use certain ports. Ensure that these ports are not already in use: +- Graviton Server: Port `8088` +- Hive Docker runtime environment: Ports `50070`, `50075`, `10002`, `10000`, `8888`, `9083`, `7180`, and `22` \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 9743e2ad214..ddcf0ced28e 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -63,6 +63,7 @@ jersey-test-framework-provider-jetty = { group = "org.glassfish.jersey.test-fram mockito-core = { group = "org.mockito", name = "mockito-core", version.ref = "mockito" } hive2-metastore = { group = "org.apache.hive", name = "hive-metastore", version.ref = "hive2"} hive2-exec = { group = "org.apache.hive", name = "hive-exec", version.ref = "hive2"} +hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref = "hive2"} hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop2"} hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop2"} airlift-units = { group = "io.airlift", name = "units", version.ref = "airlift-units"} diff --git a/integration-test/build.gradle.kts b/integration-test/build.gradle.kts index f6131a24c40..3a84b2a885c 100644 --- a/integration-test/build.gradle.kts +++ b/integration-test/build.gradle.kts @@ -11,26 +11,80 @@ plugins { } dependencies { - implementation(project(":api")) - implementation(project(":common")) - implementation(project(":core")) - implementation(project(":client-java")) - implementation(project(":server")) + testImplementation(project(":api")) + testImplementation(project(":core")) + testImplementation(project(":common")) + testImplementation(project(":client-java")) + testImplementation(project(":catalog-hive")) + testImplementation(project(":server")) + + testImplementation(libs.hive2.metastore) { + exclude("org.apache.hbase") + exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager") + exclude("co.cask.tephra") + exclude("org.apache.avro") + exclude("org.apache.zookeeper") + exclude("org.apache.logging.log4j") + exclude("com.google.code.findbugs", "sr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.apache.curator") + exclude("com.github.joshelser") + exclude("io.dropwizard.metricss") + exclude("org.slf4j") + } + + testImplementation(libs.hive2.exec) { + artifact { + classifier = "core" + } + exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager") + exclude("org.apache.avro") + exclude("org.apache.zookeeper") + exclude("com.google.protobuf") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.apache.logging.log4j") + exclude("org.apache.curator") + exclude("org.pentaho") + exclude("org.slf4j") + } + + testImplementation(libs.hadoop2.mapreduce.client.core) { + exclude("*") + } + testImplementation(libs.hadoop2.common) { + exclude("*") + } + + testImplementation(libs.substrait.java.core) { + exclude("org.slf4j") + exclude("com.fasterxml.jackson.core") + exclude("com.fasterxml.jackson.datatype") + } - testCompileOnly(libs.commons.io) testCompileOnly(libs.lombok) testAnnotationProcessor(libs.lombok) testImplementation(libs.guava) + testImplementation(libs.hive2.common) testImplementation(libs.commons.lang3) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.params) testImplementation(libs.httpclient5) testRuntimeOnly(libs.junit.jupiter.engine) + testImplementation(libs.mockito.core) } tasks { val integrationTest by creating(Test::class) { environment("GRAVITON_HOME", rootDir.path + "/distribution/package") + environment("HADOOP_USER_NAME", "hive") useJUnitPlatform() } } \ No newline at end of file diff --git a/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/CatalogHiveIT.java b/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/CatalogHiveIT.java new file mode 100644 index 00000000000..31cc306bbf4 --- /dev/null +++ b/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/CatalogHiveIT.java @@ -0,0 +1,250 @@ +/* + * Copyright 2023 Datastrato. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.graviton.integration.e2e; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.datastrato.graviton.Catalog; +import com.datastrato.graviton.NameIdentifier; +import com.datastrato.graviton.catalog.hive.HiveClientPool; +import com.datastrato.graviton.client.GravitonMetaLake; +import com.datastrato.graviton.dto.rel.ColumnDTO; +import com.datastrato.graviton.integration.util.AbstractIT; +import com.datastrato.graviton.integration.util.GravitonITUtils; +import com.datastrato.graviton.rel.SchemaChange; +import com.datastrato.graviton.rel.TableChange; +import com.google.common.collect.Maps; +import io.substrait.type.TypeCreator; +import java.util.Collections; +import java.util.Map; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.thrift.TException; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class CatalogHiveIT extends AbstractIT { + public static String metalakeName = GravitonITUtils.genRandomName("CatalogHiveIT_metalake"); + public static String catalogName = GravitonITUtils.genRandomName("CatalogHiveIT_catalog"); + public static String schemaName = GravitonITUtils.genRandomName("CatalogHiveIT_schema"); + public static String tableName = GravitonITUtils.genRandomName("CatalogHiveIT_table"); + public static String alertTableName = "alert_table_name"; + public static String table_comment = "table_comment"; + public static String HIVE_COL_NAME1 = "hive_col_name1"; + public static String HIVE_COL_NAME2 = "hive_col_name2"; + public static String HIVE_COL_NAME3 = "hive_col_name3"; + + static String HIVE_METASTORE_URIS = "thrift://localhost:9083"; + static String HADOOP_USER_NAME = "hive"; + + private static HiveClientPool hiveClientPool; + + @BeforeAll + public static void startup() throws Exception { + HiveConf hiveConf = new HiveConf(); + hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, HIVE_METASTORE_URIS); + + GravitonMetaLake[] gravitonMetaLakes = client.listMetalakes(); + + hiveClientPool = new HiveClientPool(1, hiveConf); + client.createMetalake(NameIdentifier.of(metalakeName), "comment", Collections.emptyMap()); + createHiveTable(); + } + + @AfterAll + public static void stop() { + client.dropMetalake(NameIdentifier.of(metalakeName)); + if (hiveClientPool != null) { + hiveClientPool.close(); + } + } + + public static void createHiveTable() throws TException, InterruptedException { + GravitonMetaLake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); + + // Create catalog from Graviton API + Map properties = Maps.newHashMap(); + properties.put("provider", "hive"); + properties.put(HiveConf.ConfVars.METASTOREURIS.varname, HIVE_METASTORE_URIS); + + Catalog catalog = + metalake.createCatalog( + NameIdentifier.of(metalakeName, catalogName), + Catalog.Type.RELATIONAL, + "comment", + properties); + + NameIdentifier ident = NameIdentifier.of(metalakeName, catalogName, schemaName); + Map properties1 = Maps.newHashMap(); + properties1.put("key1", "val1"); + properties1.put("key2", "val2"); + String comment = "comment"; + + catalog.asSchemas().createSchema(ident, comment, properties1); + + // Directly get database from hive metastore to verify the schema creation + Database database = hiveClientPool.run(client -> client.getDatabase(schemaName)); + Assertions.assertEquals(schemaName.toLowerCase(), database.getName()); + Assertions.assertEquals(comment, database.getDescription()); + Assertions.assertEquals("val1", database.getParameters().get("key1")); + Assertions.assertEquals("val2", database.getParameters().get("key2")); + + // Create table from Graviton API + ColumnDTO col1 = + new ColumnDTO.Builder() + .withName(HIVE_COL_NAME1) + .withDataType(TypeCreator.NULLABLE.I8) + .withComment("col_1_comment") + .build(); + ColumnDTO col2 = + new ColumnDTO.Builder() + .withName(HIVE_COL_NAME2) + .withDataType(TypeCreator.NULLABLE.DATE) + .withComment("col_2_comment") + .build(); + ColumnDTO col3 = + new ColumnDTO.Builder() + .withName(HIVE_COL_NAME3) + .withDataType(TypeCreator.NULLABLE.STRING) + .withComment("col_3_comment") + .build(); + ColumnDTO[] columns = new ColumnDTO[] {col1, col2, col3}; + + NameIdentifier nameIdentifier = + NameIdentifier.of(metalakeName, catalogName, schemaName, tableName); + Map properties2 = Maps.newHashMap(); + properties1.put("key2-1", "val1"); + properties1.put("key2-2", "val2"); + catalog.asTableCatalog().createTable(nameIdentifier, columns, table_comment, properties2); + + // Directly get table from hive metastore to check if the table is created successfully. + org.apache.hadoop.hive.metastore.api.Table hiveTab = + hiveClientPool.run(client -> client.getTable(schemaName, tableName)); + Assertions.assertEquals(schemaName.toLowerCase(), hiveTab.getDbName()); + Assertions.assertEquals(tableName.toLowerCase(), hiveTab.getTableName()); + Assertions.assertEquals("MANAGED_TABLE", hiveTab.getTableType()); + Assertions.assertEquals(table_comment, hiveTab.getParameters().get("comment")); + + Assertions.assertEquals(HIVE_COL_NAME1, hiveTab.getSd().getCols().get(0).getName()); + Assertions.assertEquals("tinyint", hiveTab.getSd().getCols().get(0).getType()); + Assertions.assertEquals("col_1_comment", hiveTab.getSd().getCols().get(0).getComment()); + + Assertions.assertEquals(HIVE_COL_NAME2, hiveTab.getSd().getCols().get(1).getName()); + Assertions.assertEquals("date", hiveTab.getSd().getCols().get(1).getType()); + Assertions.assertEquals("col_2_comment", hiveTab.getSd().getCols().get(1).getComment()); + + Assertions.assertEquals(HIVE_COL_NAME3, hiveTab.getSd().getCols().get(2).getName()); + Assertions.assertEquals("string", hiveTab.getSd().getCols().get(2).getType()); + Assertions.assertEquals("col_3_comment", hiveTab.getSd().getCols().get(2).getComment()); + } + + @Order(1) + @Test + public void testAlterHiveTable() throws TException, InterruptedException { + GravitonMetaLake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); + Catalog catalog = metalake.loadCatalog(NameIdentifier.of(metalakeName, catalogName)); + catalog + .asTableCatalog() + .alterTable( + NameIdentifier.of(metalakeName, catalogName, schemaName, tableName), + TableChange.rename(alertTableName), + TableChange.updateComment(table_comment + "_new"), + TableChange.removeProperty("key1"), + TableChange.setProperty("key2", "val2_new"), + TableChange.addColumn(new String[] {"col_4"}, TypeCreator.NULLABLE.STRING), + TableChange.renameColumn(new String[] {HIVE_COL_NAME2}, "col_2_new"), + TableChange.updateColumnComment(new String[] {HIVE_COL_NAME1}, "comment_new"), + TableChange.updateColumnType(new String[] {HIVE_COL_NAME1}, TypeCreator.NULLABLE.I32)); + + // Direct get table from hive metastore to check if the table is altered successfully. + org.apache.hadoop.hive.metastore.api.Table hiveTab = + hiveClientPool.run(client -> client.getTable(schemaName, alertTableName)); + Assertions.assertEquals(schemaName.toLowerCase(), hiveTab.getDbName()); + Assertions.assertEquals(alertTableName, hiveTab.getTableName()); + Assertions.assertEquals("val2_new", hiveTab.getParameters().get("key2")); + + Assertions.assertEquals(HIVE_COL_NAME1, hiveTab.getSd().getCols().get(0).getName()); + Assertions.assertEquals("int", hiveTab.getSd().getCols().get(0).getType()); + Assertions.assertEquals("comment_new", hiveTab.getSd().getCols().get(0).getComment()); + + Assertions.assertEquals("col_2_new", hiveTab.getSd().getCols().get(1).getName()); + Assertions.assertEquals("date", hiveTab.getSd().getCols().get(1).getType()); + Assertions.assertEquals("col_2_comment", hiveTab.getSd().getCols().get(1).getComment()); + + Assertions.assertEquals(HIVE_COL_NAME3, hiveTab.getSd().getCols().get(2).getName()); + Assertions.assertEquals("string", hiveTab.getSd().getCols().get(2).getType()); + Assertions.assertEquals("col_3_comment", hiveTab.getSd().getCols().get(2).getComment()); + + Assertions.assertEquals("col_4", hiveTab.getSd().getCols().get(3).getName()); + Assertions.assertEquals("string", hiveTab.getSd().getCols().get(3).getType()); + Assertions.assertEquals(null, hiveTab.getSd().getCols().get(3).getComment()); + } + + @Order(2) + @Test + public void testDropHiveTable() { + GravitonMetaLake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); + Catalog catalog = metalake.loadCatalog(NameIdentifier.of(metalakeName, catalogName)); + catalog + .asTableCatalog() + .dropTable(NameIdentifier.of(metalakeName, catalogName, schemaName, alertTableName)); + + // Directly get table from hive metastore to check if the table is dropped successfully. + assertThrows( + NoSuchObjectException.class, + () -> hiveClientPool.run(client -> client.getTable(schemaName, alertTableName))); + } + + // TODO (xun) enable this test waiting for fixed [#316] [Bug report] alterSchema throw + // NoSuchSchemaException + // @Order(3) + // @Test + public void testAlterSchema() throws TException, InterruptedException { + NameIdentifier ident = NameIdentifier.of(metalakeName, catalogName, schemaName); + Map properties = Maps.newHashMap(); + properties.put("key1", "val1"); + properties.put("key2", "val2"); + String comment = "comment"; + + GravitonMetaLake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); + Catalog catalog = metalake.loadCatalog(NameIdentifier.of(metalakeName, catalogName)); + catalog + .asSchemas() + .alterSchema( + ident, + SchemaChange.removeProperty("key1"), + SchemaChange.setProperty("key2", "val2-alter")); + + NameIdentifier[] nameIdentifiers = catalog.asSchemas().listSchemas(ident.namespace()); + + Map properties2 = catalog.asSchemas().loadSchema(ident).properties(); + Assertions.assertFalse(properties2.containsKey("key1")); + Assertions.assertEquals("val2-alter", properties2.get("key2")); + + Database database = hiveClientPool.run(client -> client.getDatabase(schemaName)); + Map properties3 = database.getParameters(); + Assertions.assertFalse(properties3.containsKey("key1")); + Assertions.assertEquals("val2-alter", properties3.get("key2")); + } + + @Order(4) + @Test + public void testDropHiveDB() { + GravitonMetaLake metalake = client.loadMetalake(NameIdentifier.of(metalakeName)); + Catalog catalog = metalake.loadCatalog(NameIdentifier.of(metalakeName, catalogName)); + catalog.asSchemas().dropSchema(NameIdentifier.of(metalakeName, catalogName, schemaName), true); + + assertThrows( + NoSuchObjectException.class, + () -> hiveClientPool.run(client -> client.getDatabase(schemaName))); + } +} diff --git a/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/MetalakeIT.java b/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/MetalakeIT.java index fa3a7de68f3..43fa4eb0ac0 100644 --- a/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/MetalakeIT.java +++ b/integration-test/src/test/java/com/datastrato/graviton/integration/e2e/MetalakeIT.java @@ -26,7 +26,7 @@ @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class MetalakeIT extends AbstractIT { - public static String metalakeName = GravitonITUtils.genRandomName(); + public static String metalakeName = GravitonITUtils.genRandomName("metalake"); @BeforeAll private static void start() { @@ -66,7 +66,7 @@ public void testLoadMetalake() { @Order(3) @Test public void testAlterMetalake() { - String alterMetalakeName = GravitonITUtils.genRandomName(); + String alterMetalakeName = GravitonITUtils.genRandomName("metalake"); // TODO: Add more test cases for alter metalake MetalakeChange[] changes1 = diff --git a/integration-test/src/test/java/com/datastrato/graviton/integration/util/GravitonITUtils.java b/integration-test/src/test/java/com/datastrato/graviton/integration/util/GravitonITUtils.java index eeac3121443..278f695acc9 100644 --- a/integration-test/src/test/java/com/datastrato/graviton/integration/util/GravitonITUtils.java +++ b/integration-test/src/test/java/com/datastrato/graviton/integration/util/GravitonITUtils.java @@ -44,7 +44,7 @@ public static void sleep(long millis, boolean logOutput) { } } - public static String genRandomName() { - return UUID.randomUUID().toString().replace("-", ""); + public static String genRandomName(String prefix) { + return prefix + "_" + UUID.randomUUID().toString().replace("-", ""); } }