Skip to content

Commit

Permalink
[#4111] Limit port ranges in docker to reduce possible port conflicts…
Browse files Browse the repository at this point in the history
… for Hive docker. (#4106)

### What changes were proposed in this pull request?

1. Increase the timeout time for checking if Hive is writable from the
30s to 150 seconds.
2. Add some log information.  
3. Fix potential issues with port occupation for DataNode and NameNode.

### Why are the changes needed?

To improve CI stability. 
Fix: #4111 

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

Existing test.
  • Loading branch information
yuqi1129 authored Jul 22, 2024
1 parent 7d8f434 commit 2a2a190
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 12 deletions.
2 changes: 1 addition & 1 deletion catalogs/catalog-hadoop/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ tasks.test {

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.2")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.4")
}

val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
Expand Down
2 changes: 1 addition & 1 deletion catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ tasks.test {

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.2")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.4")
}

val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
Expand Down
2 changes: 1 addition & 1 deletion catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ tasks.test {

doFirst {
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.13")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.2")
environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.4")
}

val init = project.extra.get("initIntegrationTest") as (Test) -> Unit
Expand Down
3 changes: 2 additions & 1 deletion dev/docker/kerberos-hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ RUN apt-get update && apt-get upgrade -y && apt-get install --fix-missing -yq \
krb5-admin-server \
krb5-user \
krb5-config \
jsvc
jsvc \
net-tools

#################################################################################
## setup ssh
Expand Down
32 changes: 25 additions & 7 deletions dev/docker/kerberos-hive/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,22 +79,36 @@ echo "Starting HDFS..."
echo "Starting NameNode..."
${HADOOP_HOME}/sbin/hadoop-daemon.sh start namenode

# Check if the nameNode is running
ps -ef | grep NameNode | grep -v "grep"
if [[ $? -ne 0 ]]; then
echo "NameNode failed to start, please check the logs"
echo "HDFS NameNode log start---------------------------"
cat ${HADOOP_HOME}/logs/*.log
cat ${HADOOP_HOME}/logs/*.out
echo "HDFS NameNode log end-----------------------------"
exit 1
fi


echo "Starting DataNode..."
${HADOOP_HOME}/sbin/start-secure-dns.sh
sleep 5

# Check if the DataNode is running
ps -ef | grep DataNode | grep -v "color=auto"
ps -ef | grep DataNode | grep -v "grep"
if [[ $? -ne 0 ]]; then
echo "DataNode failed to start, please check the logs"
ehco "HDFS DataNode log start----------------------------"
cat ${HADOOP_HOME}/bin/logs/hadoop-root-datanode-*.log
echo "HDFS DataNode log start---------------------------"
cat ${HADOOP_HOME}/logs/*.log
cat ${HADOOP_HOME}/logs/*.out
echo "HDFS DataNode log end-----------------------------"
exit 1
fi

retry_times=0
ready=0
while [[ ${retry_times} -lt 10 ]]; do
while [[ ${retry_times} -lt 15 ]]; do
hdfs_ready=$(hdfs dfsadmin -report | grep "Live datanodes" | awk '{print $3}')
if [[ ${hdfs_ready} == "(1):" ]]; then
echo "HDFS is ready, retry_times = ${retry_times}"
Expand All @@ -106,9 +120,13 @@ while [[ ${retry_times} -lt 10 ]]; do
done

if [[ ${ready} -ne 1 ]]; then
echo "HDFS is not ready"
ehco "HDFS DataNode log start---------------------------"
cat ${HADOOP_HOME}/bin/logs/hadoop-root-datanode-*.log
echo "HDFS is not ready, execute log:"
ps -ef | grep DataNode | grep -v "grep"
hdfs dfsadmin -report
echo "HDFS DataNode log start---------------------------"
cat ${HADOOP_HOME}/logs/*.log
cat ${HADOOP_HOME}/logs/*.out
echo "HDFS DataNode log end-----------------------------"
exit 1
fi

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import com.google.common.collect.ImmutableSet;
import java.io.IOException;
import java.time.Duration;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
Expand Down Expand Up @@ -72,7 +73,14 @@ protected BaseContainer(
Map<String, String> filesToMount,
Map<String, String> envVars,
Optional<Network> network) {
this.container = new GenericContainer<>(requireNonNull(image, "image is null"));
this.container =
new GenericContainer<>(requireNonNull(image, "image is null"))
.withCreateContainerCmdModifier(
cmd ->
cmd.getHostConfig()
.withSysctls(
Collections.singletonMap(
"net.ipv4.ip_local_port_range", "20000 40000")));
this.ports = requireNonNull(ports, "ports is null");
this.hostName = requireNonNull(hostName, "hostName is null");
this.extraHosts = extraHosts;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ protected boolean checkContainerStatus(int retryLimit) {
if (result.getExitCode() == 0) {
return true;
}

LOG.warn(
"Failed to execute sql: {}, Std-out: {}, Std-error:{}",
createTableSQL,
result.getStdout(),
result.getStderr());
} catch (Exception e) {
LOG.error("Failed to execute sql: {}", createTableSQL, e);
}
Expand Down

0 comments on commit 2a2a190

Please sign in to comment.