Skip to content

Commit

Permalink
Improve hive2 and hive3
Browse files Browse the repository at this point in the history
  • Loading branch information
xunliu committed Jul 18, 2024
1 parent f743ee4 commit e2dc83b
Show file tree
Hide file tree
Showing 20 changed files with 78 additions and 132 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ on:
description: 'Docker tag to apply to this image'
required: true
type: string
environments:
description: 'Environments variables for the build image'
required: false
type: string
token:
description: 'Publish Docker token'
required: true
Expand Down Expand Up @@ -100,4 +96,4 @@ jobs:
run: |
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/hostedtoolcache/CodeQL
${{ github.event.inputs.environments }} ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }} --latest
./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }} --latest
2 changes: 1 addition & 1 deletion catalogs/catalog-hadoop/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ tasks.test {
dependsOn(tasks.jar)

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.2")
}

Expand Down
2 changes: 1 addition & 1 deletion catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ tasks.test {
dependsOn(tasks.jar)

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.2")
}

Expand Down
2 changes: 1 addition & 1 deletion catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ tasks.test {
dependsOn(tasks.jar)

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.2")
}

Expand Down
2 changes: 1 addition & 1 deletion catalogs/catalog-lakehouse-paimon/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ tasks.test {
dependsOn(tasks.jar)

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.3")
}

Expand Down
2 changes: 1 addition & 1 deletion clients/client-python/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ tasks {
"GRAVITINO_HOME" to project.rootDir.path + "/distribution/package",
"START_EXTERNAL_GRAVITINO" to "true",
"DOCKER_TEST" to dockerTest.toString(),
"GRAVITINO_CI_HIVE_DOCKER_IMAGE" to "datastrato/gravitino-ci-hive:0.1.12",
"GRAVITINO_CI_HIVE_DOCKER_IMAGE" to "datastrato/gravitino-ci-hive:0.1.13",
))
environment = envMap

Expand Down
28 changes: 11 additions & 17 deletions dev/docker/hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ ENV HIVE_HOME=/usr/local/hive
ENV HIVE2_HOME=/usr/local/hive2
ENV HIVE3_HOME=/usr/local/hive3
ENV HIVE_CONF_DIR=${HIVE_HOME}/conf
ENV HIVE_TMP_CONF_DIR=/tmp/hive-conf
ENV HADOOP_HOME=/usr/local/hadoop
ENV HADOOP2_HOME=/usr/local/hadoop2
ENV HADOOP3_HOME=/usr/local/hadoop3
Expand All @@ -91,6 +92,7 @@ ENV HADOOP_MAPRED_HOME=${HADOOP_INSTALL}
ENV HADOOP_COMMON_HOME=${HADOOP_INSTALL}
ENV HADOOP_HDFS_HOME=${HADOOP_INSTALL}
ENV HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
ENV HADOOP_TMP_CONF_DIR=/tmp/hadoop-conf
ENV YARN_HOME=${HADOOP_INSTALL}
ENV ZK_HOME=/usr/local/zookeeper
ENV RANGER_HDFS_PLUGIN_HOME=/usr/local/ranger-hdfs-plugin
Expand Down Expand Up @@ -142,40 +144,32 @@ ADD packages/ranger-${RANGER_VERSION}-hive-plugin.tar.gz /opt/
RUN ln -s /opt/ranger-${RANGER_VERSION}-hive-plugin ${RANGER_HIVE_PLUGIN_HOME}

################################################################################
# install hadoop
# install hadoop2 and hadoop3
ADD packages/hadoop-${HADOOP2_VERSION}.tar.gz /opt/
RUN ln -s /opt/hadoop-${HADOOP2_VERSION} ${HADOOP2_HOME}
ADD packages/hadoop-${HADOOP3_VERSION}.tar.gz /opt/
RUN ln -s /opt/hadoop-${HADOOP3_VERSION} ${HADOOP3_HOME}

# replace configuration templates
RUN rm -f ${HADOOP_CONF_DIR}/core-site.xml
RUN rm -f ${HADOOP_CONF_DIR}/hadoop-env.sh
RUN rm -f ${HADOOP_CONF_DIR}/hdfs-site.xml
RUN rm -f ${HADOOP_CONF_DIR}/mapred-site.xml

ADD core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
ADD hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
ADD hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
ADD mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
# Add hadoop configuration to temporary directory
ADD core-site.xml ${HADOOP_TMP_CONF_DIR}/core-site.xml
ADD hadoop-env.sh ${HADOOP_TMP_CONF_DIR}/hadoop-env.sh
ADD hdfs-site.xml ${HADOOP_TMP_CONF_DIR}/hdfs-site.xml
ADD mapred-site.xml ${HADOOP_TMP_CONF_DIR}/mapred-site.xml
ADD check-status.sh /tmp/check-status.sh

################################################################################

# install hive2
# install hive2 and hive3
ADD packages/apache-hive-${HIVE2_VERSION}-bin.tar.gz /opt/
RUN ln -s /opt/apache-hive-${HIVE2_VERSION}-bin ${HIVE2_HOME}

# install hive3
ADD packages/apache-hive-${HIVE3_VERSION}-bin.tar.gz /opt/
RUN ln -s /opt/apache-hive-${HIVE3_VERSION}-bin ${HIVE3_HOME}

ADD hive-site.xml ${HIVE_CONF_DIR}/hive-site.xml
# Add hive configuration to temporary directory
ADD hive-site.xml ${HIVE_TMP_CONF_DIR}/hive-site.xml

################################################################################
# add mysql jdbc driver
ADD packages/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.tar.gz /opt/
#RUN ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/* ${HIVE_HOME}/lib

################################################################################
# add users and groups
Expand Down
4 changes: 1 addition & 3 deletions dev/docker/hive/hive-dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,14 @@ hive_dir="$(dirname "${BASH_SOURCE-$0}")"
hive_dir="$(cd "${hive_dir}">/dev/null; pwd)"

# Environment variables definition
#HADOOP_VERSION=${HADOOP_VERSION:-"2.7.3"}
HADOOP2_VERSION="2.7.3"
HADOOP3_VERSION="3.1.0"
#HIVE_VERSION=${HIVE_VERSION:-"2.3.9"}

HIVE2_VERSION="2.3.9"
HIVE3_VERSION="3.1.3"
MYSQL_JDBC_DRIVER_VERSION=${MYSQL_VERSION:-"8.0.15"}
ZOOKEEPER_VERSION=${ZOOKEEPER_VERSION:-"3.4.13"}
RANGER_VERSION=${RANGER_VERSION:-"2.4.0"} # Notice: Currently only tested Ranger plugin 2.4.0 in the Hadoop 3.1.0 and Hive 3.1.3
RANGER_VERSION="2.4.0" # Notice: Currently only tested Ranger plugin 2.4.0 in the Hadoop 3.1.0 and Hive 3.1.3

HADOOP2_PACKAGE_NAME="hadoop-${HADOOP2_VERSION}.tar.gz"
HADOOP2_DOWNLOAD_URL="https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP2_VERSION}/${HADOOP2_PACKAGE_NAME}"
Expand Down
29 changes: 8 additions & 21 deletions dev/docker/hive/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,25 @@
# under the License.
#

# backup config dir, and after symbolic linking, copied content back to config dir
cp -r ${HIVE_CONF_DIR} /opt/hive-conf
cp -r ${HADOOP_CONF_DIR} /opt/hadoop-conf

rm -r ${HIVE_HOME}
rm -r ${HADOOP_HOME}

if [[ "${HIVE_VERSION}" =~ 3.* ]]; then
# Special Hive runtime version environment variable to decide which version of Hive to install
if [[ "${HIVE_RUNTIME_VERSION}" == "hive3" ]]; then
ln -s ${HIVE3_HOME} ${HIVE_HOME}
ln -s ${HADOOP3_HOME} ${HADOOP_HOME}
else
ln -s ${HIVE2_HOME} ${HIVE_HOME}
ln -s ${HADOOP2_HOME} ${HADOOP_HOME}
fi

# Add back hive configuration
cp /opt/hive-conf/* ${HIVE_CONF_DIR}
cp /opt/hadoop-conf/* ${HADOOP_CONF_DIR}
# Copy Hadoop and Hive configuration file and update hostname
cp -f ${HADOOP_TMP_CONF_DIR}/* ${HADOOP_CONF_DIR}
cp -f ${HIVE_TMP_CONF_DIR}/* ${HIVE_CONF_DIR}
sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/core-site.xml
sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/hdfs-site.xml
sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HIVE_CONF_DIR}/hive-site.xml

# Link mysql-connector-java after deciding where HIVE_HOME symbolic link points to.
ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.jar ${HIVE_HOME}/lib

# Fix guava problem
# See https://issues.apache.org/jira/browse/HIVE-22915
rm ${HIVE_HOME}/lib/guava-*.jar
cp ${HADOOP_HOME}/share/hadoop/hdfs/lib/guava-*-jre.jar ${HIVE_HOME}/lib/

# install Ranger hive plugin
if [[ -n "${RANGER_HIVE_REPOSITORY_NAME}" && -n "${RANGER_SERVER_URL}" ]]; then
# If Hive enable Ranger plugin need requires zookeeper
Expand Down Expand Up @@ -111,11 +103,6 @@ log4j.appender.RANGERAUDIT.DatePattern=.yyyy-MM-dd
EOF
fi

# update hadoop config use hostname
sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/core-site.xml
sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/hdfs-site.xml
sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HIVE_HOME}/conf/hive-site.xml

# start hdfs
echo "Starting HDFS..."
echo "Format NameNode..."
Expand Down
25 changes: 12 additions & 13 deletions docs/docker-image-details.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,18 @@ You can use this kind of image to test the catalog of Apache Hive.
Changelog

- gravitino-ci-hive:0.1.13
- Enable Ranger plugin in the Hive and HDFS
- Hive version 3.1.3
- HDFS version 3.1.0
- Ranger plugin version 2.4.0
- Support Docker environment variables:
- `RANGER_SERVER_URL`: Ranger admin URL
- `RANGER_HIVE_REPOSITORY_NAME`: Hive repository name in Ranger admin
- `RANGER_HDFS_REPOSITORY_NAME`: HDFS repository name in Ranger admin
- If you want to enable Hive Ranger plugin, you need to set the `RANGER_SERVER_URL` and `RANGER_HIVE_REPOSITORY_NAME` environment variables.
- If you want to enable HDFS Ranger plugin, you need to set the `RANGER_SERVER_URL` and `RANGER_HDFS_REPOSITORY_NAME` environment variables.
- Example: docker run -e RANGER_SERVER_URL='http://ranger-server:6080' -e RANGER_HIVE_REPOSITORY_NAME='hiveDev' -e RANGER_HDFS_REPOSITORY_NAME='hdfsDev' ... datastrato/gravitino-ci-hive:0.1.13
- Hive Ranger audit logs are stored in the `/tmp/root/ranger-hive-audit.log` file
- HDFS Ranger audit logs are stored in the `/usr/local/hadoop/logs/ranger-hdfs-audit.log` file
- Support Hive 2.3.9 and HDFS 2.7.3
- Docker environment variables:
- `HIVE_RUNTIME_VERSION`: `hive2` (default)
- Support Hive 3.1.3, HDFS 3.1.0 and Ranger plugin version 2.4.0
- Docker environment variables:
- `HIVE_RUNTIME_VERSION`: `hive3`
- `RANGER_SERVER_URL`: Ranger admin URL
- `RANGER_HIVE_REPOSITORY_NAME`: Hive repository name in Ranger admin
- `RANGER_HDFS_REPOSITORY_NAME`: HDFS repository name in Ranger admin
- If you want to enable Hive Ranger plugin, you need both set the `RANGER_SERVER_URL` and `RANGER_HIVE_REPOSITORY_NAME` environment variables. Hive Ranger audit logs are stored in the `/tmp/root/ranger-hive-audit.log`.
- If you want to enable HDFS Ranger plugin, you need both set the `RANGER_SERVER_URL` and `RANGER_HDFS_REPOSITORY_NAME` environment variables. HDFS Ranger audit logs are stored in the `/usr/local/hadoop/logs/ranger-hdfs-audit.log`
- Example: docker run -e HIVE_RUNTIME_VERSION='hive3' -e RANGER_SERVER_URL='http://ranger-server:6080' -e RANGER_HIVE_REPOSITORY_NAME='hiveDev' -e RANGER_HDFS_REPOSITORY_NAME='hdfsDev' ... datastrato/gravitino-ci-hive:0.1.13

- gravitino-ci-hive:0.1.12
- Shrink hive Docker image size by 420MB
Expand Down
2 changes: 1 addition & 1 deletion flink-connector/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ tasks.test {
dependsOn(tasks.jar)

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
}

val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ public class ContainerSuite implements Closeable {

private static Network network = null;
private static volatile HiveContainer hiveContainer;
// Enable Ranger plugin in the Hive container
private static volatile HiveContainer hiveRangerContainer;
private static volatile TrinoContainer trinoContainer;
private static volatile TrinoITContainers trinoITContainers;
private static volatile RangerContainer rangerContainer;
Expand Down Expand Up @@ -100,45 +98,28 @@ public Network getNetwork() {
}

public void startHiveContainer() {
startHiveContainer(ImmutableMap.of());
}

/**
* To start the Hive container, you can to specify environment variables: HIVE_RUNTIME_VERSION:
* Hive version, currently support `hive2`(default) and `hive3` DOCKER_ENV_RANGER_SERVER_URL:
* Ranger server URL DOCKER_ENV_RANGER_HIVE_REPOSITORY_NAME: Ranger Hive repository name
* DOCKER_ENV_RANGER_HDFS_REPOSITORY_NAME: Ranger HDFS repository name
*/
public void startHiveContainer(Map<String, String> envVars) {
if (hiveContainer == null) {
synchronized (ContainerSuite.class) {
if (hiveContainer == null) {
// Start Hive container
HiveContainer.Builder hiveBuilder =
HiveContainer.builder()
.withHostName("gravitino-ci-hive")
.withEnvVars(
ImmutableMap.<String, String>builder()
.put("HADOOP_USER_NAME", "datastrato")
.build())
.withNetwork(network);
HiveContainer container = closer.register(hiveBuilder.build());
container.start();
hiveContainer = container;
}
}
}
}

public void startHiveRangerContainer(Map<String, String> envVars) {
if (hiveRangerContainer == null) {
synchronized (ContainerSuite.class) {
if (hiveRangerContainer == null) {
if (!envVars.containsKey(HiveContainer.HADOOP_USER_NAME)) {
// Set default HADOOP_USER_NAME
envVars.put(HiveContainer.HADOOP_USER_NAME, "gravitino");
}

// Start Hive container
HiveContainer.Builder hiveBuilder =
HiveContainer.builder()
.withHostName("gravitino-ci-hive-ranger")
.withEnableRangerPlugin(true)
.withEnvVars(envVars)
.withNetwork(network);
HiveContainer container = closer.register(hiveBuilder.build());
container.start();
hiveRangerContainer = container;
hiveContainer = container;
}
}
}
Expand Down Expand Up @@ -345,10 +326,6 @@ public HiveContainer getHiveContainer() {
return hiveContainer;
}

public HiveContainer getHiveRangerContainer() {
return hiveRangerContainer;
}

public void startRangerContainer() {
if (rangerContainer == null) {
synchronized (ContainerSuite.class) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,13 @@ public class HiveContainer extends BaseContainer {
public static final String KERBEROS_IMAGE =
System.getenv("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE");

public static final String RANGER_IMAGE = System.getenv("GRAVITINO_CI_RANGER_HIVE_DOCKER_IMAGE");
public static final String HOST_NAME = "gravitino-ci-hive";
public static final String HADOOP_USER_NAME = "HADOOP_USER_NAME";
// Specify the Hive version to start the Hive container, currently support `hive2`(default) and
// `hive3`
public static final String HIVE_RUNTIME_VERSION = "HIVE_RUNTIME_VERSION";
public static final String HIVE2 = "hive2"; // The Hive container default version
public static final String HIVE3 = "hive3";
private static final int MYSQL_PORT = 3306;
public static final int HDFS_DEFAULTFS_PORT = 9000;
public static final int HIVE_METASTORE_PORT = 9083;
Expand Down Expand Up @@ -205,26 +209,17 @@ protected boolean checkContainerStatus(int retryLimit) {
}

public static class Builder extends BaseContainer.Builder<Builder, HiveContainer> {
boolean rangerEnablePlugin = false;

private Builder() {
this.image = DEFAULT_IMAGE;
this.hostName = HOST_NAME;
this.exposePorts =
ImmutableSet.of(MYSQL_PORT, HDFS_DEFAULTFS_PORT, HIVE_METASTORE_PORT, HIVE_SERVICE_PORT);
}

public Builder withEnableRangerPlugin(Boolean enable) {
this.rangerEnablePlugin = enable;
return this;
}

private String generateImageName() {
String hiveDockerImageName = image;
if (kerberosEnabled) {
hiveDockerImageName = KERBEROS_IMAGE;
} else if (rangerEnablePlugin) {
hiveDockerImageName = RANGER_IMAGE;
}
return hiveDockerImageName;
}
Expand Down
10 changes: 4 additions & 6 deletions integration-test/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ dependencies {
exclude("org.pentaho")
exclude("org.slf4j")
}
testImplementation(libs.hive2.jdbc) {
exclude("org.slf4j")
}
testImplementation(libs.hive2.metastore) {
exclude("co.cask.tephra")
exclude("com.github.joshelser")
Expand Down Expand Up @@ -122,9 +125,6 @@ dependencies {
testImplementation(libs.trino.client) {
exclude("jakarta.annotation")
}
testImplementation(libs.hive2.jdbc) {
exclude("org.slf4j")
}
testImplementation(libs.trino.jdbc)
testImplementation(libs.ranger.intg) {
exclude("org.apache.hadoop", "hadoop-common")
Expand Down Expand Up @@ -160,9 +160,7 @@ tasks.test {

doFirst {
// Gravitino CI Docker image
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12")
// environment("GRAVITINO_CI_RANGER_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13") // Support Ranger plugin
environment("GRAVITINO_CI_RANGER_HIVE_DOCKER_IMAGE", "unknowntpo/gravitino-ci-hive:ranger-plugin") // Support Ranger plugin
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE", "datastrato/gravitino-ci-trino:0.1.5")
environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0")
environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "datastrato/gravitino-ci-doris:0.1.5")
Expand Down
Loading

0 comments on commit e2dc83b

Please sign in to comment.