apache · jerryshao · Sep 5, 2023 · Aug 30, 2023 · Sep 3, 2023 · Sep 3, 2023
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -0,0 +1,90 @@
+name: Integration Test
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the "main" branch
+  push:
+    branches: [ "main", "branch-*" ]
+  pull_request:
+    branches: [ "main", "branch-*" ]
+
+env:
+  HIVE2_IMAGE_NAME: datastrato/hive2
+  HIVE2_IMAGE_VERSION: 0.1.0
+  HIVE2_IMAGE_LATEST: latest
+
+concurrency:
+  group: ${{ github.worklfow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_requests' }}
+
+jobs:
+  # Integration test for AMD64 architecture
+  test-amd64-arch:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    strategy:
+      matrix:
+        architecture: [linux/amd64]
+    env:
+      DOCKER_RUN_NAME: hive2-amd64
+      PLATFORM: ${{ matrix.architecture }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'temurin'
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v1
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: Build the hive2 Docker image for AMD64
+        if: ${{ contains(github.event.pull_request.labels.*.name, 'build docker image') }}
+        run: ./dev/docker/hive2/build-docker.sh --platform ${PLATFORM} --image ${HIVE2_IMAGE_NAME}:${HIVE2_IMAGE_LATEST}
+
+      - name: Run AMD64 container
+        run: |
+          docker run --rm --name ${DOCKER_RUN_NAME} --platform ${PLATFORM} -d -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 ${HIVE2_IMAGE_NAME}:${HIVE2_IMAGE_LATEST}
+          docker ps -a
+
+      - name: Setup Gradle
+        uses: gradle/gradle-build-action@v2
+        with:
+          gradle-version: '8.1.1'
+
+      - name: Show gradle version
+        run: gradle --version
+
+      - name: Package Graviton
+        run: |
+          gradle build
+          gradle compileDistribution
+
+      - name: Setup Debug Action
+        if: ${{ contains(github.event.pull_request.labels.*.name, 'debug action') }}
+        uses: csexton/debugger-action@master
+
+      - name: Integration Test
+        run: |
+          gradle integrationTest
+
+      - name: Print logs when Graviton integration tests failure
+        if: ${{ failure() }}
+        run: |
+          if [ -f "distribution/package/logs/graviton-server.out" ]; then
+            cat distribution/package/logs/graviton-server.out
+          fi
+          if [ -f "distribution/package/logs/graviton-server.log" ]; then
+            cat distribution/package/logs/graviton-server.log
+          fi
+
+      - name: Stop and remove container
+        run: |
+          docker stop ${DOCKER_RUN_NAME}
+          sleep 3
+          docker ps -a
+          docker rmi ${HIVE2_IMAGE_NAME}:${HIVE2_IMAGE_LATEST}
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
diff --git a/README.md b/README.md
@@ -2,4 +2,13 @@
   Copyright 2023 Datastrato.
   This software is licensed under the Apache License version 2.
 -->
-# Graviton
+# Graviton
+## Introduction
+
+Graviton is a high-performance, geo-distributed and federated metadata lake.
+
+## Development Guide
+
+1. [How to build Graviton](docs/how-to-build.md)
+2. [How to Run Integration Test](docs/integration-test.md)
+3. [How to publish Docker images](docs/publish-docker-images.md)
diff --git a/bin/graviton.sh b/bin/graviton.sh
@@ -124,14 +124,14 @@ function stop() {
 }
 
 HOSTNAME=$(hostname)
-GRAVITON_OUTFILE="${GRAVITON_LOG_DIR}/graviton-${HOSTNAME}.out"
+GRAVITON_OUTFILE="${GRAVITON_LOG_DIR}/graviton-server.out"
 GRAVITON_SERVER_NAME=com.datastrato.graviton.server.GravitonServer
 
 JAVA_OPTS+=" -Dfile.encoding=UTF-8"
 JAVA_OPTS+=" -Dlog4j2.configurationFile=file://${GRAVITON_CONF_DIR}/log4j2.properties"
 JAVA_OPTS+=" -Dgraviton.log.path=${GRAVITON_LOG_DIR} ${GRAVITON_MEM}"
 
-addJarInDir "${GRAVITON_HOME}/lib"
+addJarInDir "${GRAVITON_HOME}/libs"
 
 case "${1}" in
   start)

diff --git a/build.gradle.kts b/build.gradle.kts
@@ -149,7 +149,7 @@ tasks {
   val outputDir = projectDir.dir("distribution")
 
   val compileDistribution by registering {
-    dependsOn("copyRuntimeClass", "copyCatalogRuntimeClass", "copySubmoduleClass")
+    dependsOn("copyRuntimeClass", "copyCatalogRuntimeClass", "copySubmoduleClass", "copyCatalogModuleClass")
 
     group = "graviton distribution"
     outputs.dir(projectDir.dir("distribution/package"))
@@ -172,8 +172,7 @@ tasks {
     group = "graviton distribution"
     finalizedBy("checksumDistribution")
     from(compileDistribution.map { it.outputs.files.single() })
-    archiveBaseName.set("datastrato")
-    archiveAppendix.set(rootProject.name.lowercase())
+    archiveBaseName.set(rootProject.name.lowercase())
     archiveVersion.set("${version}")
     archiveClassifier.set("bin")
     destinationDirectory.set(outputDir)
@@ -204,10 +203,10 @@ tasks {
 
   val copyRuntimeClass by registering(Copy::class) {
     subprojects.forEach() {
-      if (it.name != "catalog-hive" && it.name != "client-java") {
-        // println("copyRuntimeClass: ${it.name}")
+      if (it.name != "catalog-hive" && it.name != "client-java" && it.name != "integration-test") {
+        println("copyRuntimeClass: ${it.name}")
         from(it.configurations.runtimeClasspath)
-        into("distribution/package/lib")
+        into("distribution/package/libs")
       }
     }
   }
@@ -217,24 +216,32 @@ tasks {
       if (it.name == "catalog-hive") {
         // println("copyCatalogRuntimeClass: ${it.name}")
         from(it.configurations.runtimeClasspath)
-        into("distribution/package/catalogs/catalog-hive/lib")
+        into("distribution/package/catalogs/hive/libs")
       }
     }
   }
 
   val copySubmoduleClass by registering(Copy::class) {
     dependsOn("copyRuntimeClass", "copyCatalogRuntimeClass")
     subprojects.forEach() {
-      // println("copySubmoduleClass: ${it.name}")
-      if (it.name != "client-java") {
+      if (it.name != "client-java" && it.name != "integration-test" && it.name != "catalog-hive") {
         from("${it.name}/build/libs")
-        into("distribution/package/lib")
+        into("distribution/package/libs")
         include("*.jar")
         setDuplicatesStrategy(DuplicatesStrategy.INCLUDE)
       }
     }
   }
 
+  val copyCatalogModuleClass by registering(Copy::class) {
+    subprojects.forEach() {
+      if (it.name == "catalog-hive") {
+        from("${it.name}/build/libs")
+        into("distribution/package/catalogs/hive/libs")
+      }
+    }
+  }
+
   task("integrationTest") {
     dependsOn(":integration-test:integrationTest")
   }

diff --git a/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogConfig.java b/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogConfig.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2023 Datastrato.
+ * This software is licensed under the Apache License version 2.
+ */
+package com.datastrato.graviton.catalog.hive;
+
+import com.datastrato.graviton.Config;
+import com.datastrato.graviton.config.ConfigBuilder;
+import com.datastrato.graviton.config.ConfigEntry;
+
+public class HiveCatalogConfig extends Config {
+  public static final ConfigEntry<String> HADOOP_USER_NAME =
+      new ConfigBuilder("graviton.hadoop.user.name")
+          .doc(
+              "The specify Hadoop user name that will be used when accessing Hadoop Distributed File System (HDFS).")
+          .version("0.1.0")
+          .stringConf()
+          .createWithDefault("hive");
+}
diff --git a/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogOperations.java b/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogOperations.java
@@ -88,6 +88,13 @@ public void initialize(Map<String, String> conf) throws RuntimeException {
     conf.forEach(hadoopConf::set);
     hiveConf = new HiveConf(hadoopConf, HiveCatalogOperations.class);
 
+    // TODO(xun): Wait add Graviton User Account System to manage user and group
+    // The specify Hadoop user name that will be used when accessing Hadoop
+    // Distributed File System (HDFS).
+    if (conf.containsKey(HiveCatalogConfig.HADOOP_USER_NAME.getKey())) {
+      System.setProperty("HADOOP_USER_NAME", conf.get(HiveCatalogConfig.HADOOP_USER_NAME.getKey()));
+    }
+
     // todo(xun): add hive client pool size in config
     this.clientPool = new HiveClientPool(1, hiveConf);
   }

diff --git a/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveClientPool.java b/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveClientPool.java
@@ -30,12 +30,15 @@
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.thrift.TException;
 import org.apache.thrift.transport.TTransportException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 // hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java
 
 /** Represents a client pool for managing connections to the Hive Metastore service. */
 public class HiveClientPool extends ClientPoolImpl<IMetaStoreClient, TException> {
 
+  private static final Logger LOG = LoggerFactory.getLogger(HiveClientPool.class);
   private static final DynMethods.StaticMethod GET_CLIENT =
       DynMethods.builder("getProxy")
           .impl(
@@ -96,6 +99,7 @@ protected IMetaStoreClient newClient() {
 
   @Override
   protected IMetaStoreClient reconnect(IMetaStoreClient client) {
+    LOG.warn("Reconnecting to Hive Metastore");
     try {
       client.close();
       client.reconnect();
@@ -116,6 +120,7 @@ protected boolean isConnectionException(Exception e) {
 
   @Override
   protected void close(IMetaStoreClient client) {
+    LOG.info("Closing Hive Metastore client");
     client.close();
   }
 

diff --git a/core/src/main/java/com/datastrato/graviton/catalog/CatalogManager.java b/core/src/main/java/com/datastrato/graviton/catalog/CatalogManager.java
@@ -71,19 +71,23 @@ public CatalogWrapper(BaseCatalog catalog, IsolatedClassLoader classLoader) {
     }
 
     public <R> R doWithSchemaOps(ThrowableFunction<SupportsSchemas, R> fn) throws Exception {
-      if (asSchemas() == null) {
-        throw new UnsupportedOperationException("Catalog does not support schema operations");
-      }
-
-      return classLoader.withClassLoader(cl -> fn.apply(asSchemas()));
+      return classLoader.withClassLoader(
+          cl -> {
+            if (asSchemas() == null) {
+              throw new UnsupportedOperationException("Catalog does not support schema operations");
+            }
+            return fn.apply(asSchemas());
+          });
     }
 
     public <R> R doWithTableOps(ThrowableFunction<TableCatalog, R> fn) throws Exception {
-      if (asTables() == null) {
-        throw new UnsupportedOperationException("Catalog does not support table operations");
-      }
-
-      return classLoader.withClassLoader(cl -> fn.apply(asTables()));
+      return classLoader.withClassLoader(
+          cl -> {
+            if (asTables() == null) {
+              throw new UnsupportedOperationException("Catalog does not support table operations");
+            }
+            return fn.apply(asTables());
+          });
     }
 
     public void close() {
@@ -447,7 +451,14 @@ private String buildPkgPath(Map<String, String> conf, String provider) {
     if (pkg != null) {
       pkgPath = pkg;
     } else if (!testEnv) {
-      pkgPath = gravitonHome + File.separator + "catalogs" + File.separator + provider;
+      pkgPath =
+          gravitonHome
+              + File.separator
+              + "catalogs"
+              + File.separator
+              + provider
+              + File.separator
+              + "libs";
     } else {
       pkgPath =
           new StringBuilder()

diff --git a/core/src/main/java/com/datastrato/graviton/catalog/CatalogOperationDispatcher.java b/core/src/main/java/com/datastrato/graviton/catalog/CatalogOperationDispatcher.java
@@ -240,7 +240,8 @@ private <R, E extends Throwable> R doWithCatalog(
       NameIdentifier ident, ThrowableFunction<CatalogManager.CatalogWrapper, R> fn, Class<E> ex)
       throws E {
     try {
-      CatalogManager.CatalogWrapper c = catalogManager.loadCatalogAndWrap(ident);
+      NameIdentifier catalogIdent = getCatalogIdentifier(ident);
+      CatalogManager.CatalogWrapper c = catalogManager.loadCatalogAndWrap(catalogIdent);
       return fn.apply(c);
     } catch (Throwable throwable) {
       if (ex.isInstance(throwable)) {

diff --git a/core/src/main/java/com/datastrato/graviton/utils/IsolatedClassLoader.java b/core/src/main/java/com/datastrato/graviton/utils/IsolatedClassLoader.java
@@ -4,8 +4,6 @@
  */
 package com.datastrato.graviton.utils;
 
-import com.datastrato.graviton.meta.AuditInfo;
-import com.datastrato.graviton.meta.rel.BaseSchema;
 import java.io.Closeable;
 import java.io.InputStream;
 import java.net.URL;
@@ -151,9 +149,7 @@ private boolean isSharedClass(String name) {
    */
   private boolean isBarrierClass(String name) {
     // We need to add more later on when we have more catalog implementations.
-    return name.startsWith(BaseSchema.class.getName())
-        || name.startsWith(AuditInfo.class.getName())
-        || barrierClasses.stream().anyMatch(name::startsWith);
+    return barrierClasses.stream().anyMatch(name::startsWith);
   }
 
   private ClassLoader getRootClassLoader() throws Exception {

diff --git a/dev/docker/hive2/README.md b/dev/docker/hive2/README.md
@@ -11,7 +11,7 @@ Build Image
 
 Run container
 =============
-docker run --rm -m -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 datastrato/hive2:0.1.0
+docker run --rm -d -p 8088:8088 -p 50070:50070 -p 50075:50075 -p 10000:10000 -p 10002:10002 -p 8888:8888 -p 9083:9083 -p 8022:22 datastrato/hive2:0.1.0
 
 Login to the server
 =============