diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index 3596f6e9e9c..8afcf6a23e3 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -99,6 +99,7 @@ tasks.test { doFirst { environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") + environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-kerberos-hive:0.1.0") } val init = project.extra.get("initIntegrationTest") as (Test) -> Unit diff --git a/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/integration/test/HDFSKerberosIT.java b/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/integration/test/HDFSKerberosIT.java new file mode 100644 index 00000000000..a8d3f657f24 --- /dev/null +++ b/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/integration/test/HDFSKerberosIT.java @@ -0,0 +1,120 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + +package com.datastrato.gravitino.catalog.hadoop.integration.test; + +import com.datastrato.gravitino.integration.test.container.ContainerSuite; +import com.datastrato.gravitino.integration.test.container.HiveContainer; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.security.PrivilegedAction; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Tag("gravitino-docker-it") +public class HDFSKerberosIT { + private static final Logger LOG = LoggerFactory.getLogger(HDFSKerberosIT.class); + + private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); + private static final String CLIENT_PRINCIPAL = "cli@HADOOPKRB"; + private static UserGroupInformation clientUGI; + + private static String keytabPath; + + @BeforeAll + public static void setup() throws IOException { + containerSuite.startKerberosHiveContainer(); + + File baseDir = new File(System.getProperty("java.io.tmpdir")); + File file = Files.createTempDirectory(baseDir.toPath(), "test").toFile(); + file.deleteOnExit(); + + // Copy the keytab and krb5.conf from the container + keytabPath = file.getAbsolutePath() + "/client.keytab"; + containerSuite + .getKerberosHiveContainer() + .getContainer() + .copyFileFromContainer("/etc/admin.keytab", keytabPath); + + String krb5TmpPath = file.getAbsolutePath() + "/krb5.conf_tmp"; + String krb5Path = file.getAbsolutePath() + "/krb5.conf"; + containerSuite + .getKerberosHiveContainer() + .getContainer() + .copyFileFromContainer("/etc/krb5.conf", krb5TmpPath); + + // Modify the krb5.conf and change the kdc and admin_server to the container IP + String ip = containerSuite.getKerberosHiveContainer().getContainerIpAddress(); + String content = FileUtils.readFileToString(new File(krb5TmpPath), StandardCharsets.UTF_8); + content = content.replace("kdc = localhost:88", "kdc = " + ip + ":88"); + content = content.replace("admin_server = localhost", "admin_server = " + ip + ":749"); + FileUtils.write(new File(krb5Path), content, StandardCharsets.UTF_8); + + LOG.info("Kerberos kdc config:\n{}", content); + + System.setProperty("java.security.krb5.conf", krb5Path); + System.setProperty("sun.security.krb5.debug", "true"); + } + + @AfterAll + public static void tearDown() { + // Reset the UGI + UserGroupInformation.reset(); + + // Clean up the kerberos configuration + System.clearProperty("java.security.krb5.conf"); + System.clearProperty("sun.security.krb5.debug"); + } + + @Test + public void testKerberosHDFS() throws IOException { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", defaultBaseLocation()); + conf.setBoolean("fs.hdfs.impl.disable.cache", true); + conf.set("hadoop.security.authentication", "kerberos"); + + UserGroupInformation.setConfiguration(conf); + clientUGI = UserGroupInformation.loginUserFromKeytabAndReturnUGI(CLIENT_PRINCIPAL, keytabPath); + PrivilegedAction action = + (PrivilegedAction) + () -> { + try { + FileSystem fs = FileSystem.get(conf); + Path path = new Path("/"); + Assertions.assertTrue(fs.exists(path)); + return null; + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + + clientUGI.doAs(action); + + // Clear UGI, It will throw exception + UserGroupInformation.reset(); + Exception e = Assertions.assertThrows(Exception.class, action::run); + Assertions.assertInstanceOf(AccessControlException.class, e.getCause()); + } + + private static String defaultBaseLocation() { + return String.format( + "hdfs://%s:%d/user/", + containerSuite.getKerberosHiveContainer().getContainerIpAddress(), + HiveContainer.HDFS_DEFAULTFS_PORT); + } +} diff --git a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/BaseContainer.java b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/BaseContainer.java index 35cc6f54cc3..059f48dab6e 100644 --- a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/BaseContainer.java +++ b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/BaseContainer.java @@ -36,7 +36,7 @@ public abstract class BaseContainer implements AutoCloseable { public static final Logger LOG = LoggerFactory.getLogger(BaseContainer.class); // Host name of the container - private final String hostName; + protected final String hostName; // Exposed ports of the container private final Set ports; // Files to mount in the container @@ -153,6 +153,7 @@ protected abstract static class Builder< SELF extends Builder, CONTAINER extends BaseContainer> { protected String image; protected String hostName; + protected boolean kerberosEnabled; protected Set exposePorts = ImmutableSet.of(); protected Map extraHosts = ImmutableMap.of(); protected Map filesToMount = ImmutableMap.of(); @@ -201,6 +202,11 @@ public SELF withNetwork(Network network) { return self; } + public SELF withKerberosEnabled(boolean kerberosEnabled) { + this.kerberosEnabled = kerberosEnabled; + return self; + } + public abstract CONTAINER build(); } } diff --git a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java index 22b10306ab8..082a4a82c19 100644 --- a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java @@ -42,6 +42,7 @@ public class ContainerSuite implements Closeable { private static volatile TrinoITContainers trinoITContainers; private static volatile KafkaContainer kafkaContainer; private static volatile DorisContainer dorisContainer; + private static volatile HiveContainer kerberosHiveContainer; private static volatile MySQLContainer mySQLContainer; private static volatile MySQLContainer mySQLVersion5Container; @@ -101,6 +102,25 @@ public void startHiveContainer() { } } + public void startKerberosHiveContainer() { + if (kerberosHiveContainer == null) { + synchronized (ContainerSuite.class) { + if (kerberosHiveContainer == null) { + // Start Hive container + HiveContainer.Builder hiveBuilder = + HiveContainer.builder() + .withHostName("gravitino-ci-kerberos-hive") + .withEnvVars(ImmutableMap.builder().build()) + .withKerberosEnabled(true) + .withNetwork(network); + HiveContainer container = closer.register(hiveBuilder.build()); + container.start(); + kerberosHiveContainer = container; + } + } + } + } + public void startTrinoContainer( String trinoConfDir, String trinoConnectorLibDir, @@ -284,6 +304,10 @@ public HiveContainer getHiveContainer() { return hiveContainer; } + public HiveContainer getKerberosHiveContainer() { + return kerberosHiveContainer; + } + public DorisContainer getDorisContainer() { return dorisContainer; } diff --git a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/HiveContainer.java b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/HiveContainer.java index 1006b957dbd..14e5cb484dd 100644 --- a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/HiveContainer.java +++ b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/HiveContainer.java @@ -25,6 +25,8 @@ public class HiveContainer extends BaseContainer { public static final Logger LOG = LoggerFactory.getLogger(HiveContainer.class); public static final String DEFAULT_IMAGE = System.getenv("GRAVITINO_CI_HIVE_DOCKER_IMAGE"); + public static final String KERBEROS_IMAGE = + System.getenv("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE"); public static final String HOST_NAME = "gravitino-ci-hive"; private static final int MYSQL_PORT = 3306; public static final int HDFS_DEFAULTFS_PORT = 9000; @@ -51,13 +53,13 @@ protected HiveContainer( @Override protected void setupContainer() { super.setupContainer(); - withLogConsumer(new PrintingContainerLog(format("%-14s| ", "HiveContainer"))); + withLogConsumer(new PrintingContainerLog(format("%-14s| ", "HiveContainer-" + hostName))); } @Override public void start() { super.start(); - Preconditions.check("Hive container startup failed!", checkContainerStatus(10)); + Preconditions.check("Hive container startup failed!", checkContainerStatus(15)); } @Override @@ -88,8 +90,8 @@ private void copyHiveLog() { @Override protected boolean checkContainerStatus(int retryLimit) { await() - .atMost(100, TimeUnit.SECONDS) - .pollInterval(100 / retryLimit, TimeUnit.SECONDS) + .atMost(150, TimeUnit.SECONDS) + .pollInterval(150 / retryLimit, TimeUnit.SECONDS) .until( () -> { try { @@ -113,40 +115,37 @@ protected boolean checkContainerStatus(int retryLimit) { return false; }); - String sql = "show databases"; + final String showDatabaseSQL = "show databases"; await() .atMost(30, TimeUnit.SECONDS) .pollInterval(30 / retryLimit, TimeUnit.SECONDS) .until( () -> { try { - Container.ExecResult result = executeInContainer("hive", "-e", sql); + Container.ExecResult result = executeInContainer("hive", "-e", showDatabaseSQL); if (result.getStdout().contains("default")) { return true; } } catch (Exception e) { - LOG.error("Failed to execute sql: {}", sql, e); + LOG.error("Failed to execute sql: {}", showDatabaseSQL, e); } return false; }); - + final String createTableSQL = + "CREATE TABLE IF NOT EXISTS default.employee ( eid int, name String, " + + "salary String, destination String) "; await() .atMost(30, TimeUnit.SECONDS) .pollInterval(30 / retryLimit, TimeUnit.SECONDS) .until( () -> { try { - Container.ExecResult result = - executeInContainer( - "hive", - "-e", - "CREATE TABLE IF NOT EXISTS default.employee ( eid int, name String, " - + "salary String, destination String) "); + Container.ExecResult result = executeInContainer("hive", "-e", createTableSQL); if (result.getExitCode() == 0) { return true; } } catch (Exception e) { - LOG.error("Failed to execute sql: {}", sql, e); + LOG.error("Failed to execute sql: {}", createTableSQL, e); } return false; }); @@ -183,7 +182,13 @@ private Builder() { @Override public HiveContainer build() { return new HiveContainer( - image, hostName, exposePorts, extraHosts, filesToMount, envVars, network); + kerberosEnabled ? KERBEROS_IMAGE : image, + kerberosEnabled ? "kerberos-" + hostName : hostName, + exposePorts, + extraHosts, + filesToMount, + envVars, + network); } } }