Skip to content

Commit

Permalink
Add ranger hive IT
Browse files Browse the repository at this point in the history
  • Loading branch information
xunliu committed Jul 3, 2024
1 parent 7e9bc69 commit 6e9d239
Show file tree
Hide file tree
Showing 26 changed files with 569 additions and 52 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ on:
description: 'Docker tag to apply to this image'
required: true
type: string
environments:
description: 'Environments variables for the build image'
required: false
type: string
token:
description: 'Publish Docker token'
required: true
Expand Down Expand Up @@ -93,4 +97,7 @@ jobs:
distribution: 'temurin'

- name: Build and Push the Docker image
run: sudo rm -rf /usr/local/lib/android && sudo rm -rf /opt/hostedtoolcache/CodeQL && ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }} --latest
run: |
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/hostedtoolcache/CodeQL
${{ github.event.inputs.environments }} ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }} --latest
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -694,9 +694,9 @@ fun printDockerCheckInfo() {

val dockerTest = project.extra["dockerTest"] as? Boolean ?: false
if (dockerTest) {
println("Using Docker container to run all tests. [$testMode test]")
println("Using Docker container to run all tests ......................... [$testMode test]")
} else {
println("Run test cases without `gravitino-docker-test` tag ................ [$testMode test]")
println("Run test cases without `gravitino-docker-test` tag .............. [$testMode test]")
}
println("-----------------------------------------------------------------")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public class HadoopCatalogIT extends AbstractIT {

@BeforeAll
public static void setup() throws IOException {
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());

Configuration conf = new Configuration();
conf.set("fs.defaultFS", defaultBaseLocation());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ private static String getInsertWithPartitionSql(

@BeforeAll
public static void startup() throws Exception {
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());

HIVE_METASTORE_URIS =
String.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public static void startIntegrationTest() throws Exception {
configs.put(Configs.AUTHENTICATOR.getKey(), AuthenticatorType.SIMPLE.name().toLowerCase());
registerCustomConfigs(configs);
AbstractIT.startIntegrationTest();
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());
HIVE_METASTORE_URIS =
String.format(
"thrift://%s:%d",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public abstract class CatalogIcebergBaseIT extends AbstractIT {
public void startup() throws Exception {
ignoreIcebergRestService = false;
AbstractIT.startIntegrationTest();
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());
initIcebergCatalogProperties();
createMetalake();
createCatalog();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.datastrato.gravitino.integration.test.container.ContainerSuite;
import com.datastrato.gravitino.integration.test.container.HiveContainer;
import com.datastrato.gravitino.integration.test.util.GravitinoITUtils;
import com.google.common.collect.ImmutableMap;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Tag;
Expand All @@ -31,7 +32,7 @@ public IcebergRESTHiveCatalogIT() {

@Override
void initEnv() {
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.datastrato.gravitino.integration.test.container.ContainerSuite;
import com.datastrato.gravitino.integration.test.container.HiveContainer;
import com.datastrato.gravitino.integration.test.util.GravitinoITUtils;
import com.google.common.collect.ImmutableMap;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Tag;
Expand All @@ -29,7 +30,7 @@ public IcebergRESTJdbcCatalogIT() {

@Override
void initEnv() {
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());
}

public Map<String, String> getCatalogConfig() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.datastrato.gravitino.rel.expressions.transforms.Transforms;
import com.datastrato.gravitino.rel.types.Types;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
Expand Down Expand Up @@ -88,7 +89,7 @@ public abstract class CatalogPaimonBaseIT extends AbstractIT {

@BeforeAll
public void startup() {
containerSuite.startHiveContainer();
containerSuite.startHiveContainer(false, ImmutableMap.of());
catalogProperties = initPaimonCatalogProperties();
createMetalake();
createCatalog();
Expand Down
2 changes: 1 addition & 1 deletion dev/docker/hive/hive-dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ HADOOP_VERSION=${HADOOP_VERSION:-"2.7.3"}
HIVE_VERSION=${HIVE_VERSION:-"2.3.9"}
MYSQL_JDBC_DRIVER_VERSION=${MYSQL_VERSION:-"8.0.15"}
ZOOKEEPER_VERSION=${ZOOKEEPER_VERSION:-"3.4.13"}
RANGER_VERSION=${RANGER_VERSION:-"2.4.0"} # NOTE: Currently only tested Ranger 2.4.0 in the Hadoop 2.7.3 and Hive 2.3.9
RANGER_VERSION=${RANGER_VERSION:-"2.4.0"} # Notice: Currently only tested Ranger plugin 2.4.0 in the Hadoop 3.1.0 and Hive 3.1.3

HADOOP_PACKAGE_NAME="hadoop-${HADOOP_VERSION}.tar.gz"
HADOOP_DOWNLOAD_URL="https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE_NAME}"
Expand Down
4 changes: 2 additions & 2 deletions dev/docker/hive/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if [[ -n "${RANGER_HIVE_REPOSITORY_NAME}" && -n "${RANGER_SERVER_URL}" ]]; then
${RANGER_HIVE_PLUGIN_HOME}/enable-hive-plugin.sh

# Reduce poll policy interval in the ranger plugin configuration
sed -i '/<name>ranger.plugin.hive.policy.pollIntervalMs<\/name>/{n;s/<value>30000<\/value>/<value>1000<\/value>/}' ${HIVE_HOME}/conf/ranger-hive-security.xml
sed -i '/<name>ranger.plugin.hive.policy.pollIntervalMs<\/name>/{n;s/<value>30000<\/value>/<value>500<\/value>/}' ${HIVE_HOME}/conf/ranger-hive-security.xml

# Enable audit log in hive
cp ${HIVE_HOME}/conf/hive-log4j2.properties.template ${HIVE_HOME}/conf/hive-log4j2.properties
Expand Down Expand Up @@ -54,7 +54,7 @@ if [[ -n "${RANGER_HDFS_REPOSITORY_NAME}" && -n "${RANGER_SERVER_URL}" ]]; then
${RANGER_HDFS_PLUGIN_HOME}/enable-hdfs-plugin.sh

# Reduce poll policy interval in the ranger plugin configuration
sed -i '/<name>ranger.plugin.hive.policy.pollIntervalMs<\/name>/{n;s/<value>30000<\/value>/<value>1000<\/value>/}' ${HADOOP_HOME}/etc/hadoop/ranger-hdfs-security.xml
sed -i '/<name>ranger.plugin.hdfs.policy.pollIntervalMs<\/name>/{n;s/<value>30000<\/value>/<value>500<\/value>/}' ${HADOOP_HOME}/etc/hadoop/ranger-hdfs-security.xml

# Enable Ranger audit log in hdfs
cat <<'EOF' >> ${HADOOP_HOME}/etc/hadoop/log4j.properties
Expand Down
11 changes: 11 additions & 0 deletions docs/docker-image-details.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,17 @@ You can use this kind of image to test the catalog of Apache Hive.

Changelog

- gravitino-ci-hive:0.1.13
- Enable Ranger plugin in the Hive and HDFS
- Hive version 3.1.3
- HDFS version 3.1.0
- Ranger plugin version 2.4.0
- Support Docker environment variables:
- `RANGER_SERVER_URL`: Ranger admin URL
- `RANGER_HIVE_REPOSITORY_NAME`: Hive repository name in Ranger
- `RANGER_HDFS_REPOSITORY_NAME`: HDFS repository name in Ranger
- Example: docker run -e RANGER_SERVER_URL='http://ranger-server:6080' -e RANGER_HIVE_REPOSITORY_NAME='hiveDev' -e RANGER_HDFS_REPOSITORY_NAME='hdfsDev' ... datastrato/gravitino-ci-hive:0.1.13

- gravitino-ci-hive:0.1.12
- Shrink hive Docker image size by 420MB

Expand Down
5 changes: 3 additions & 2 deletions docs/publish-docker-images.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ You can use GitHub actions to publish Docker images to the Docker Hub repository
+ `datastrato/gravitino-ci-trino`.
+ Future plans include support for other data sources.
5. Input the `tag name`, for example: `0.1.0`, Then build and push the Docker image name as `datastrato/{image-name}:0.1.0`.
6. You must enter the correct `publish docker token` before you can execute run `Publish Docker Image` workflow.
7. Wait for the workflow to complete. You can see a new Docker image shown in the [Datastrato Docker Hub](https://hub.docker.com/u/datastrato) repository.
6. Input the `envoriments`, for example: `HADOOP_VERSION=3.1.0 HIVE_VERSION=3.1.3`, Then use special version to install HDFS and Hive.
7. You must enter the correct `publish docker token` before you can execute run `Publish Docker Image` workflow.
8. Wait for the workflow to complete. You can see a new Docker image shown in the [Datastrato Docker Hub](https://hub.docker.com/u/datastrato) repository.

![Publish Docker image](assets/publish-docker-image.png)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.datastrato.gravitino.integration.test.container.HiveContainer;
import com.datastrato.gravitino.integration.test.util.AbstractIT;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.errorprone.annotations.FormatMethod;
import com.google.errorprone.annotations.FormatString;
import java.io.IOException;
Expand Down Expand Up @@ -77,7 +78,7 @@ private static void initMetalake() {
}

private static void initHiveEnv() {
CONTAINER_SUITE.startHiveContainer();
CONTAINER_SUITE.startHiveContainer(false, ImmutableMap.of());
hiveMetastoreUri =
String.format(
"thrift://%s:%d",
Expand Down
1 change: 1 addition & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ mockito-core = { group = "org.mockito", name = "mockito-core", version.ref = "mo
hive2-metastore = { group = "org.apache.hive", name = "hive-metastore", version.ref = "hive2"}
hive2-exec = { group = "org.apache.hive", name = "hive-exec", version.ref = "hive2"}
hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref = "hive2"}
hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref = "hive2"}
hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop2" }
hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop2"}
hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop2"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,19 @@ public Network getNetwork() {
return network;
}

public void startHiveContainer() {
public void startHiveContainer(boolean enableRangerPlugin, Map<String, String> envVars) {
if (hiveContainer == null) {
synchronized (ContainerSuite.class) {
if (hiveContainer == null) {
// Start Hive container
HiveContainer.Builder hiveBuilder =
HiveContainer.builder()
.withHostName("gravitino-ci-hive")
.withRangerEnable(enableRangerPlugin)
.withEnvVars(
ImmutableMap.<String, String>builder()
.put("HADOOP_USER_NAME", "datastrato")
.putAll(envVars)
.build())
.withNetwork(network);
HiveContainer container = closer.register(hiveBuilder.build());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@ public class HiveContainer extends BaseContainer {
public static final String DEFAULT_IMAGE = System.getenv("GRAVITINO_CI_HIVE_DOCKER_IMAGE");
public static final String KERBEROS_IMAGE =
System.getenv("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE");

public static final String RANGER_IMAGE = System.getenv("GRAVITINO_CI_RANGER_HIVE_DOCKER_IMAGE");
public static final String HOST_NAME = "gravitino-ci-hive";
private static final int MYSQL_PORT = 3306;
public static final int HDFS_DEFAULTFS_PORT = 9000;
public static final int HIVE_METASTORE_PORT = 9083;
public static final int HIVE_SERVICE_PORT = 10000;

private static final String HIVE_LOG_PATH = "/tmp/root/";
private static final String HDFS_LOG_PATH = "/usr/local/hadoop/logs/";
Expand Down Expand Up @@ -187,17 +190,45 @@ protected boolean checkContainerStatus(int retryLimit) {
}

public static class Builder extends BaseContainer.Builder<Builder, HiveContainer> {
boolean rangerEnable = false;

private Builder() {
this.image = DEFAULT_IMAGE;
this.hostName = HOST_NAME;
this.exposePorts = ImmutableSet.of(MYSQL_PORT, HDFS_DEFAULTFS_PORT, HIVE_METASTORE_PORT);
this.exposePorts =
ImmutableSet.of(MYSQL_PORT, HDFS_DEFAULTFS_PORT, HIVE_METASTORE_PORT, HIVE_SERVICE_PORT);
}

public Builder withRangerEnable(Boolean enable) {
this.rangerEnable = enable;
return this;
}

private String generateImageName() {
String hiveDockerImageName = image;
if (kerberosEnabled) {
hiveDockerImageName = KERBEROS_IMAGE;
} else if (rangerEnable) {
hiveDockerImageName = RANGER_IMAGE;
}
return hiveDockerImageName;
}

private String generateHostName() {
String hiveContainerHostName = hostName;
if (kerberosEnabled) {
hiveContainerHostName = "kerberos-" + hostName;
} else if (rangerEnable) {
hiveContainerHostName = "ranger-" + hostName;
}
return hiveContainerHostName;
}

@Override
public HiveContainer build() {
return new HiveContainer(
kerberosEnabled ? KERBEROS_IMAGE : image,
kerberosEnabled ? "kerberos-" + hostName : hostName,
generateImageName(),
generateHostName(),
exposePorts,
extraHosts,
filesToMount,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class RangerContainer extends BaseContainer {

public static final String DEFAULT_IMAGE = System.getenv("GRAVITINO_CI_RANGER_DOCKER_IMAGE");
public static final String HOST_NAME = "gravitino-ci-ranger";
public static final int RANGER_PORT = 6080;
public static final int RANGER_SERVER_PORT = 6080;
public RangerClient rangerClient;
private String rangerUrl;
private static final String username = "admin";
Expand All @@ -34,6 +34,10 @@ public class RangerContainer extends BaseContainer {
username = principal
password = path of the keytab file */
private static final String authType = "simple";
// Ranger hive/hdfs Docker startup environment variable name
public static final String DOCKER_ENV_RANGER_SERVER_URL = "RANGER_SERVER_URL";
public static final String DOCKER_ENV_RANGER_HDFS_REPOSITORY_NAME = "RANGER_HDFS_REPOSITORY_NAME";
public static final String DOCKER_ENV_RANGER_HIVE_REPOSITORY_NAME = "RANGER_HIVE_REPOSITORY_NAME";

public static Builder builder() {
return new Builder();
Expand All @@ -60,7 +64,7 @@ protected void setupContainer() {
public void start() {
super.start();

rangerUrl = String.format("http://localhost:%s", this.getMappedPort(6080));
rangerUrl = String.format("http://localhost:%s", this.getMappedPort(RANGER_SERVER_PORT));
rangerClient = new RangerClient(rangerUrl, authType, username, password, null);

Preconditions.check("Ranger container startup failed!", checkContainerStatus(10));
Expand Down Expand Up @@ -104,7 +108,7 @@ public static class Builder
private Builder() {
this.image = DEFAULT_IMAGE;
this.hostName = HOST_NAME;
this.exposePorts = ImmutableSet.of(RANGER_PORT);
this.exposePorts = ImmutableSet.of(RANGER_SERVER_PORT);
this.envVars =
ImmutableMap.<String, String>builder().put("RANGER_PASSWORD", password).build();
}
Expand Down
6 changes: 4 additions & 2 deletions integration-test/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ dependencies {
testImplementation(libs.trino.client) {
exclude("jakarta.annotation")
}
testImplementation(libs.hive2.jdbc)
testImplementation(libs.trino.jdbc)
testImplementation(libs.ranger.intg) {
exclude("org.apache.hadoop", "hadoop-common")
Expand Down Expand Up @@ -144,10 +145,11 @@ tasks.test {
doFirst {
// Gravitino CI Docker image
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
environment("GRAVITINO_CI_RANGER_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") // Support Ranger plugin
environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE", "datastrato/gravitino-ci-trino:0.1.5")
environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0")
environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "datastrato/gravitino-ci-doris:0.1.4")
environment("GRAVITINO_CI_RANGER_DOCKER_IMAGE", "datastrato/gravitino-ci-ranger:0.1.0")
environment("GRAVITINO_CI_RANGER_DOCKER_IMAGE", "datastrato/gravitino-ci-ranger:0.1.1")

copy {
from("${project.rootDir}/dev/docker/trino/conf")
Expand All @@ -162,7 +164,7 @@ tasks.test {
// Check whether this module has already built
val trinoConnectorBuildDir = project(":trino-connector").buildDir
if (trinoConnectorBuildDir.exists()) {
// Check the version gravitino related jars in build equal to the current project version
// Check the version Gravitino related jars in build equal to the current project version
val invalidGravitinoJars = trinoConnectorBuildDir.resolve("libs").listFiles { _, name -> name.startsWith("gravitino") }?.filter {
val name = it.name
!name.endsWith(version + ".jar")
Expand Down
Loading

0 comments on commit 6e9d239

Please sign in to comment.