From c7f571cd673168f486c177498b68d88f3c0f86de Mon Sep 17 00:00:00 2001 From: yuqi Date: Sat, 21 Sep 2024 16:53:05 +0800 Subject: [PATCH 1/4] Add S3 related configuration to support Hive S3 schema/table. --- dev/docker/hive/hive-site.xml | 21 +++++++++++++++++++++ dev/docker/hive/start.sh | 6 ++++++ docs/docker-image-details.md | 6 ++++++ 3 files changed, 33 insertions(+) diff --git a/dev/docker/hive/hive-site.xml b/dev/docker/hive/hive-site.xml index 3346d6be61f..477187153cb 100644 --- a/dev/docker/hive/hive-site.xml +++ b/dev/docker/hive/hive-site.xml @@ -42,4 +42,25 @@ hdfs://__REPLACE__HOST_NAME:9000/user/hive/warehouse location of default database for the warehouse + + + fs.s3a.access.key + S3_ACCESS_KEY_ID + + + + fs.s3a.secret.key + S3_SECRET_KEY_ID + + + + fs.s3a.endpoint + S3_ENDPOINT_ID + + + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,com.amazonaws.auth.EnvironmentVariableCredentialsProvider,org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider + + diff --git a/dev/docker/hive/start.sh b/dev/docker/hive/start.sh index 8bf1f12b97f..b9c545a0a7f 100644 --- a/dev/docker/hive/start.sh +++ b/dev/docker/hive/start.sh @@ -27,6 +27,8 @@ else ln -s ${HADOOP2_HOME} ${HADOOP_HOME} fi + cp ${HADOOP_HOME}/share/hadoop/tools/lib/*aws* ${HIVE_HOME}/lib + # Copy Hadoop and Hive configuration file and update hostname cp -f ${HADOOP_TMP_CONF_DIR}/* ${HADOOP_CONF_DIR} cp -f ${HIVE_TMP_CONF_DIR}/* ${HIVE_CONF_DIR} @@ -34,6 +36,10 @@ sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/core-site.xml sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/hdfs-site.xml sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HIVE_CONF_DIR}/hive-site.xml +sed -i "s|S3_ACCESS_KEY_ID|${S3_ACCESS_KEY}|g" ${HIVE_CONF_DIR}/hive-site.xml +sed -i "s|S3_SECRET_KEY_ID|${S3_SECRET_KEY}|g" ${HIVE_CONF_DIR}/hive-site.xml +sed -i "s|S3_ENDPOINT_ID|${S3_ENDPOINT}|g" ${HIVE_CONF_DIR}/hive-site.xml + # Link mysql-connector-java after deciding where HIVE_HOME symbolic link points to. ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.jar ${HIVE_HOME}/lib diff --git a/docs/docker-image-details.md b/docs/docker-image-details.md index 629541ea026..ba061ccd988 100644 --- a/docs/docker-image-details.md +++ b/docs/docker-image-details.md @@ -147,6 +147,12 @@ You can use this kind of image to test the catalog of Apache Hive. Changelog +- apache/gravitino-ci:hive-0.1.14 + - Add amazon S3 related configurations in the `hive-site.xml` file. + - `fs.s3a.access.key` The access key for the S3 bucket. + - `fs.s3a.secret.key` The secret key for the S3 bucket. + - `fs.s3a.endpoint` The endpoint for the S3 bucket. + - apache/gravitino-ci:hive-0.1.13 (Switch to Apache official DockerHub repository) - Use Gravitino release 0.6.0 Dockerfile to build the image. From 78ec1b7d8a289f837d74b5bf09aa641e0d37736e Mon Sep 17 00:00:00 2001 From: yuqi Date: Sat, 21 Sep 2024 17:14:42 +0800 Subject: [PATCH 2/4] Add Hive catalog with S3 location integration tests. --- build.gradle.kts | 3 +- catalogs/catalog-hive/build.gradle.kts | 2 + .../hive/integration/test/CatalogHiveIT.java | 218 ++++++++++-------- .../integration/test/CatalogHiveS3IT.java | 164 +++++++++++++ gradle/libs.versions.toml | 2 + .../test/container/ContainerSuite.java | 39 +++- .../GravitinoLocalStackContainer.java | 69 ++++++ 7 files changed, 394 insertions(+), 103 deletions(-) create mode 100644 catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java create mode 100644 integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/GravitinoLocalStackContainer.java diff --git a/build.gradle.kts b/build.gradle.kts index 1737902aff6..f7da9d9a090 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -168,12 +168,13 @@ allprojects { param.environment("PROJECT_VERSION", project.version) // Gravitino CI Docker image - param.environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "apache/gravitino-ci:hive-0.1.13") + param.environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "apache/gravitino-ci:hive-0.1.14") param.environment("GRAVITINO_CI_KERBEROS_HIVE_DOCKER_IMAGE", "apache/gravitino-ci:kerberos-hive-0.1.5") param.environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "apache/gravitino-ci:doris-0.1.5") param.environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE", "apache/gravitino-ci:trino-0.1.6") param.environment("GRAVITINO_CI_RANGER_DOCKER_IMAGE", "apache/gravitino-ci:ranger-0.1.1") param.environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0") + param.environment("GRAVITINO_CI_LOCALSTACK_DOCKER_IMAGE", "localstack/localstack:latest") val dockerRunning = project.rootProject.extra["dockerRunning"] as? Boolean ?: false val macDockerConnector = project.rootProject.extra["macDockerConnector"] as? Boolean ?: false diff --git a/catalogs/catalog-hive/build.gradle.kts b/catalogs/catalog-hive/build.gradle.kts index 84474878e8c..8a0f9402f47 100644 --- a/catalogs/catalog-hive/build.gradle.kts +++ b/catalogs/catalog-hive/build.gradle.kts @@ -148,6 +148,8 @@ dependencies { testImplementation(libs.slf4j.api) testImplementation(libs.testcontainers) testImplementation(libs.testcontainers.mysql) + testImplementation(libs.testcontainers.localstack) + testImplementation(libs.hadoop2.s3) testRuntimeOnly(libs.junit.jupiter.engine) } diff --git a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java index 68e3b4e96cf..2937eb6f7aa 100644 --- a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java +++ b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java @@ -23,12 +23,9 @@ import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.EXTERNAL; import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.FORMAT; import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.INPUT_FORMAT; -import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.LOCATION; -import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.NUM_FILES; import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.OUTPUT_FORMAT; import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.SERDE_LIB; import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE; -import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.TOTAL_SIZE; import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.TRANSIENT_LAST_DDL_TIME; import static org.apache.gravitino.catalog.hive.TableType.EXTERNAL_TABLE; import static org.apache.gravitino.catalog.hive.TableType.MANAGED_TABLE; @@ -45,6 +42,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -110,33 +108,35 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Tag("gravitino-docker-test") +@TestInstance(TestInstance.Lifecycle.PER_CLASS) public class CatalogHiveIT extends AbstractIT { private static final Logger LOG = LoggerFactory.getLogger(CatalogHiveIT.class); public static final String metalakeName = GravitinoITUtils.genRandomName("CatalogHiveIT_metalake"); - public static final String catalogName = GravitinoITUtils.genRandomName("CatalogHiveIT_catalog"); - public static final String SCHEMA_PREFIX = "CatalogHiveIT_schema"; - public static final String schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); - public static final String TABLE_PREFIX = "CatalogHiveIT_table"; - public static final String tableName = GravitinoITUtils.genRandomName(TABLE_PREFIX); + public String catalogName = GravitinoITUtils.genRandomName("CatalogHiveIT_catalog"); + public String SCHEMA_PREFIX = "CatalogHiveIT_schema"; + public String schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); + public String TABLE_PREFIX = "CatalogHiveIT_table"; + public String tableName = GravitinoITUtils.genRandomName(TABLE_PREFIX); public static final String ALTER_TABLE_NAME = "alert_table_name"; public static final String TABLE_COMMENT = "table_comment"; public static final String HIVE_COL_NAME1 = "hive_col_name1"; public static final String HIVE_COL_NAME2 = "hive_col_name2"; public static final String HIVE_COL_NAME3 = "hive_col_name3"; - private static String HIVE_METASTORE_URIS; - private static final String provider = "hive"; - private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); - private static HiveClientPool hiveClientPool; - private static GravitinoMetalake metalake; - private static Catalog catalog; - private static SparkSession sparkSession; - private static FileSystem hdfs; - private static final String SELECT_ALL_TEMPLATE = "SELECT * FROM %s.%s"; + protected String HIVE_METASTORE_URIS; + protected final String provider = "hive"; + protected final ContainerSuite containerSuite = ContainerSuite.getInstance(); + private HiveClientPool hiveClientPool; + protected GravitinoMetalake metalake; + protected Catalog catalog; + protected SparkSession sparkSession; + protected FileSystem fileSystem; + private final String SELECT_ALL_TEMPLATE = "SELECT * FROM %s.%s"; private static String getInsertWithoutPartitionSql( String dbName, String tableName, String values) { @@ -161,24 +161,11 @@ private static String getInsertWithPartitionSql( STRING_TYPE_NAME, "'gravitino_it_test'"); - @BeforeAll - public static void startup() throws Exception { + protected void startNecessaryContainer() { containerSuite.startHiveContainer(); + } - HIVE_METASTORE_URIS = - String.format( - "thrift://%s:%d", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HIVE_METASTORE_PORT); - - HiveConf hiveConf = new HiveConf(); - hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, HIVE_METASTORE_URIS); - - // Check if Hive client can connect to Hive metastore - hiveClientPool = new HiveClientPool(1, hiveConf); - List dbs = hiveClientPool.run(client -> client.getAllDatabases()); - Assertions.assertFalse(dbs.isEmpty()); - + protected void initSparkSession() { sparkSession = SparkSession.builder() .master("local[1]") @@ -194,7 +181,9 @@ public static void startup() throws Exception { .config("mapreduce.input.fileinputformat.input.dir.recursive", "true") .enableHiveSupport() .getOrCreate(); + } + protected void initFileSystem() throws IOException { Configuration conf = new Configuration(); conf.set( "fs.defaultFS", @@ -202,7 +191,29 @@ public static void startup() throws Exception { "hdfs://%s:%d", containerSuite.getHiveContainer().getContainerIpAddress(), HiveContainer.HDFS_DEFAULTFS_PORT)); - hdfs = FileSystem.get(conf); + fileSystem = FileSystem.get(conf); + } + + @BeforeAll + public void startup() throws Exception { + startNecessaryContainer(); + + HIVE_METASTORE_URIS = + String.format( + "thrift://%s:%d", + containerSuite.getHiveContainer().getContainerIpAddress(), + HiveContainer.HIVE_METASTORE_PORT); + + HiveConf hiveConf = new HiveConf(); + hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, HIVE_METASTORE_URIS); + + // Check if Hive client can connect to Hive metastore + hiveClientPool = new HiveClientPool(1, hiveConf); + List dbs = hiveClientPool.run(client -> client.getAllDatabases()); + Assertions.assertFalse(dbs.isEmpty()); + + initSparkSession(); + initFileSystem(); createMetalake(); createCatalog(); @@ -210,7 +221,7 @@ public static void startup() throws Exception { } @AfterAll - public static void stop() throws IOException { + public void stop() throws IOException { if (client != null) { Arrays.stream(catalog.asSchemas().listSchemas()) .filter(schema -> !schema.equals("default")) @@ -233,8 +244,8 @@ public static void stop() throws IOException { sparkSession.close(); } - if (hdfs != null) { - hdfs.close(); + if (fileSystem != null) { + fileSystem.close(); } try { closer.close(); @@ -254,7 +265,7 @@ public void resetSchema() throws TException, InterruptedException { createSchema(); } - private static void createMetalake() { + private void createMetalake() { GravitinoMetalake[] gravitinoMetalakes = client.listMetalakes(); Assertions.assertEquals(0, gravitinoMetalakes.length); @@ -266,7 +277,7 @@ private static void createMetalake() { metalake = loadMetalake; } - private static void createCatalog() { + protected void createCatalog() { Map properties = Maps.newHashMap(); properties.put(METASTORE_URIS, HIVE_METASTORE_URIS); @@ -275,20 +286,10 @@ private static void createCatalog() { catalog = metalake.loadCatalog(catalogName); } - private static void createSchema() throws TException, InterruptedException { - Map properties = Maps.newHashMap(); - properties.put("key1", "val1"); - properties.put("key2", "val2"); - properties.put( - "location", - String.format( - "hdfs://%s:%d/user/hive/warehouse/%s.db", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HDFS_DEFAULTFS_PORT, - schemaName.toLowerCase())); + private void createSchema() throws TException, InterruptedException { + Map schemaProperties = createSchemaProperties(); String comment = "comment"; - - catalog.asSchemas().createSchema(schemaName, comment, properties); + catalog.asSchemas().createSchema(schemaName, comment, schemaProperties); Schema loadSchema = catalog.asSchemas().loadSchema(schemaName); Assertions.assertEquals(schemaName.toLowerCase(), loadSchema.name()); Assertions.assertEquals(comment, loadSchema.comment()); @@ -335,7 +336,7 @@ private void checkTableReadWrite(org.apache.hadoop.hive.metastore.api.Table tabl Path tableDirectory = new Path(table.getSd().getLocation()); FileStatus[] fileStatuses; try { - fileStatuses = hdfs.listStatus(tableDirectory); + fileStatuses = fileSystem.listStatus(tableDirectory); } catch (IOException e) { LOG.warn("Failed to list status of table directory", e); throw new RuntimeException(e); @@ -346,7 +347,7 @@ private void checkTableReadWrite(org.apache.hadoop.hive.metastore.api.Table tabl } } - private Map createProperties() { + protected Map createProperties() { Map properties = Maps.newHashMap(); properties.put("key1", "val1"); properties.put("key2", "val2"); @@ -560,7 +561,7 @@ public void testHiveTableProperties() throws TException, InterruptedException { catalog .asTableCatalog() .createTable( - nameIdentifier, columns, TABLE_COMMENT, ImmutableMap.of(), Transforms.EMPTY_TRANSFORM); + nameIdentifier, columns, TABLE_COMMENT, createProperties(), Transforms.EMPTY_TRANSFORM); Table loadedTable1 = catalog.asTableCatalog().loadTable(nameIdentifier); HiveTablePropertiesMetadata tablePropertiesMetadata = new HiveTablePropertiesMetadata(); org.apache.hadoop.hive.metastore.api.Table actualTable = @@ -569,6 +570,10 @@ public void testHiveTableProperties() throws TException, InterruptedException { checkTableReadWrite(actualTable); // test set properties + Map properties = createProperties(); + properties.put(FORMAT, "textfile"); + properties.put(SERDE_LIB, HiveStorageConstants.OPENCSV_SERDE_CLASS); + properties.put(TABLE_TYPE, "external_table"); String table2 = GravitinoITUtils.genRandomName(TABLE_PREFIX); catalog .asTableCatalog() @@ -576,18 +581,7 @@ public void testHiveTableProperties() throws TException, InterruptedException { NameIdentifier.of(schemaName, table2), columns, TABLE_COMMENT, - ImmutableMap.of( - TABLE_TYPE, - "external_table", - LOCATION, - String.format( - "hdfs://%s:%d/tmp", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HDFS_DEFAULTFS_PORT), - FORMAT, - "textfile", - SERDE_LIB, - HiveStorageConstants.OPENCSV_SERDE_CLASS), + properties, Transforms.EMPTY_TRANSFORM); Table loadedTable2 = catalog.asTableCatalog().loadTable(NameIdentifier.of(schemaName, table2)); org.apache.hadoop.hive.metastore.api.Table actualTable2 = @@ -607,10 +601,9 @@ public void testHiveTableProperties() throws TException, InterruptedException { Assertions.assertEquals( ((Boolean) tablePropertiesMetadata.getDefaultValue(EXTERNAL)).toString().toUpperCase(), actualTable.getParameters().get(EXTERNAL)); - Assertions.assertTrue(actualTable2.getSd().getLocation().endsWith("/tmp")); Assertions.assertNotNull(loadedTable2.properties().get(TRANSIENT_LAST_DDL_TIME)); - Assertions.assertNotNull(loadedTable2.properties().get(NUM_FILES)); - Assertions.assertNotNull(loadedTable2.properties().get(TOTAL_SIZE)); + + // S3 doesn't support NUM_FILES and TOTAL_SIZE checkTableReadWrite(actualTable2); // test alter properties exception @@ -630,36 +623,42 @@ public void testHiveTableProperties() throws TException, InterruptedException { public void testHiveSchemaProperties() throws TException, InterruptedException { // test LOCATION property String schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); - Map properties = Maps.newHashMap(); - String expectedSchemaLocation = - String.format( - "hdfs://%s:%d/tmp", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HDFS_DEFAULTFS_PORT); - properties.put(HiveSchemaPropertiesMetadata.LOCATION, expectedSchemaLocation); - catalog.asSchemas().createSchema(schemaName, "comment", properties); + Map schemaProperties = createSchemaProperties(); + String expectedHDFSSchemaLocation = schemaProperties.get(HiveSchemaPropertiesMetadata.LOCATION); + + catalog.asSchemas().createSchema(schemaName, "comment", schemaProperties); Database actualSchema = hiveClientPool.run(client -> client.getDatabase(schemaName)); String actualSchemaLocation = actualSchema.getLocationUri(); - Assertions.assertTrue(actualSchemaLocation.endsWith(expectedSchemaLocation)); + Assertions.assertTrue(actualSchemaLocation.endsWith(expectedHDFSSchemaLocation)); NameIdentifier tableIdent = NameIdentifier.of(schemaName, GravitinoITUtils.genRandomName(TABLE_PREFIX)); + + Map tableProperties = createProperties(); + String expectedSchemaLocation = + tableProperties.getOrDefault( + HiveSchemaPropertiesMetadata.LOCATION, expectedHDFSSchemaLocation); + catalog .asTableCatalog() .createTable( tableIdent, createColumns(), TABLE_COMMENT, - ImmutableMap.of(), + tableProperties, Transforms.EMPTY_TRANSFORM); org.apache.hadoop.hive.metastore.api.Table actualTable = hiveClientPool.run(client -> client.getTable(schemaName, tableIdent.name())); String actualTableLocation = actualTable.getSd().getLocation(); // use `tableIdent.name().toLowerCase()` because HMS will convert table name to lower - String expectedTableLocation = expectedSchemaLocation + "/" + tableIdent.name().toLowerCase(); - Assertions.assertTrue(actualTableLocation.endsWith(expectedTableLocation)); + + // actualTable.getSd().getLocation() is null for S3 + if (!tableProperties.containsKey(HiveTablePropertiesMetadata.LOCATION)) { + String expectedTableLocation = expectedSchemaLocation + "/" + tableIdent.name().toLowerCase(); + Assertions.assertTrue(actualTableLocation.endsWith(expectedTableLocation)); + } checkTableReadWrite(actualTable); } @@ -901,7 +900,7 @@ public void testDropPartition() throws TException, InterruptedException, IOExcep hiveClientPool.run(client -> client.getTable(schemaName, createdTable.name())); Path partitionDirectory = new Path(hiveTab.getSd().getLocation() + identity.name()); Assertions.assertFalse( - hdfs.exists(partitionDirectory), "The partition directory should not exist"); + fileSystem.exists(partitionDirectory), "The partition directory should not exist"); // add partition "hive_col_name2=2024-01-02/hive_col_name3=gravitino_it_test2" String[] field3 = new String[] {"hive_col_name2"}; @@ -953,7 +952,7 @@ public void testDropPartition() throws TException, InterruptedException, IOExcep client.getPartition(schemaName, createdTable.name(), partitionAdded1.name()))); Path partitionDirectory1 = new Path(hiveTab.getSd().getLocation() + identity1.name()); Assertions.assertFalse( - hdfs.exists(partitionDirectory1), "The partition directory should not exist"); + fileSystem.exists(partitionDirectory1), "The partition directory should not exist"); Assertions.assertThrows( NoSuchObjectException.class, () -> @@ -962,7 +961,7 @@ public void testDropPartition() throws TException, InterruptedException, IOExcep client.getPartition(schemaName, createdTable.name(), partitionAdded2.name()))); Path partitionDirectory2 = new Path(hiveTab.getSd().getLocation() + identity2.name()); Assertions.assertFalse( - hdfs.exists(partitionDirectory2), "The partition directory should not exist"); + fileSystem.exists(partitionDirectory2), "The partition directory should not exist"); // test no-exist partition with ifExist=false Assertions.assertFalse(createdTable.supportPartitions().dropPartition(partitionAdded.name())); @@ -1388,7 +1387,7 @@ void testAlterEntityName() { // Schema does not have the rename operation. final String schemaName = GravitinoITUtils.genRandomName("CatalogHiveIT_schema"); - catalog.asSchemas().createSchema(schemaName, "", ImmutableMap.of()); + catalog.asSchemas().createSchema(schemaName, "", createSchemaProperties()); final Catalog cata = catalog; // Now try to rename table @@ -1472,19 +1471,23 @@ public void testDropHiveManagedTable() throws TException, InterruptedException, catalog.asTableCatalog().dropTable(NameIdentifier.of(schemaName, tableName)); Boolean existed = hiveClientPool.run(client -> client.tableExists(schemaName, tableName)); Assertions.assertFalse(existed, "The Hive table should not exist"); - Assertions.assertFalse(hdfs.exists(tableDirectory), "The table directory should not exist"); + Assertions.assertFalse( + fileSystem.exists(tableDirectory), "The table directory should not exist"); } @Test public void testDropHiveExternalTable() throws TException, InterruptedException, IOException { Column[] columns = createColumns(); + Map properties = createProperties(); + properties.put(TABLE_TYPE, EXTERNAL_TABLE.name().toLowerCase(Locale.ROOT)); + catalog .asTableCatalog() .createTable( NameIdentifier.of(schemaName, tableName), columns, TABLE_COMMENT, - ImmutableMap.of(TABLE_TYPE, EXTERNAL_TABLE.name().toLowerCase(Locale.ROOT)), + properties, new Transform[] {Transforms.identity(columns[2].name())}); // Directly get table from Hive metastore to check if the table is created successfully. org.apache.hadoop.hive.metastore.api.Table hiveTab = @@ -1497,7 +1500,7 @@ public void testDropHiveExternalTable() throws TException, InterruptedException, Assertions.assertFalse(existed, "The table should be not exist"); Path tableDirectory = new Path(hiveTab.getSd().getLocation()); Assertions.assertTrue( - hdfs.listStatus(tableDirectory).length > 0, "The table should not be empty"); + fileSystem.listStatus(tableDirectory).length > 0, "The table should not be empty"); } @Test @@ -1525,21 +1528,25 @@ public void testPurgeHiveManagedTable() throws TException, InterruptedException, catalog.asTableCatalog().purgeTable(NameIdentifier.of(schemaName, tableName)), "The table should not be found in the catalog"); Path tableDirectory = new Path(hiveTab.getSd().getLocation()); - Assertions.assertFalse(hdfs.exists(tableDirectory), "The table directory should not exist"); - Path trashDirectory = hdfs.getTrashRoot(tableDirectory); - Assertions.assertFalse(hdfs.exists(trashDirectory), "The trash should not exist"); + Assertions.assertFalse( + fileSystem.exists(tableDirectory), "The table directory should not exist"); + Path trashDirectory = fileSystem.getTrashRoot(tableDirectory); + Assertions.assertFalse(fileSystem.exists(trashDirectory), "The trash should not exist"); } @Test public void testPurgeHiveExternalTable() throws TException, InterruptedException, IOException { Column[] columns = createColumns(); + Map properties = createProperties(); + properties.put(TABLE_TYPE, EXTERNAL_TABLE.name().toLowerCase(Locale.ROOT)); + catalog .asTableCatalog() .createTable( NameIdentifier.of(schemaName, tableName), columns, TABLE_COMMENT, - ImmutableMap.of(TABLE_TYPE, EXTERNAL_TABLE.name().toLowerCase(Locale.ROOT)), + properties, new Transform[] {Transforms.identity(columns[2].name())}); // Directly get table from Hive metastore to check if the table is created successfully. @@ -1560,7 +1567,7 @@ public void testPurgeHiveExternalTable() throws TException, InterruptedException Assertions.assertTrue(existed, "The table should be still exist"); Path tableDirectory = new Path(hiveTab.getSd().getLocation()); Assertions.assertTrue( - hdfs.listStatus(tableDirectory).length > 0, "The table should not be empty"); + fileSystem.listStatus(tableDirectory).length > 0, "The table should not be empty"); } @Test @@ -1649,7 +1656,10 @@ void testAlterCatalogProperties() { Exception exception = Assertions.assertThrows( Exception.class, - () -> createdCatalog.asSchemas().createSchema("schema", "comment", ImmutableMap.of())); + () -> + createdCatalog + .asSchemas() + .createSchema("schema", "comment", createSchemaProperties())); Assertions.assertTrue(exception.getMessage().contains("Failed to connect to Hive Metastore")); Catalog newCatalog = @@ -1660,14 +1670,14 @@ void testAlterCatalogProperties() { // The URI has restored, so it should not throw exception. Assertions.assertDoesNotThrow( () -> { - newCatalog.asSchemas().createSchema("schema", "comment", ImmutableMap.of()); + newCatalog.asSchemas().createSchema("schema", "comment", createSchemaProperties()); }); newCatalog.asSchemas().dropSchema("schema", true); metalake.dropCatalog(nameOfCatalog); } - private static void createCatalogWithCustomOperation(String catalogName, String customImpl) { + private void createCatalogWithCustomOperation(String catalogName, String customImpl) { Map properties = Maps.newHashMap(); properties.put(METASTORE_URIS, HIVE_METASTORE_URIS); properties.put(BaseCatalog.CATALOG_OPERATION_IMPL, customImpl); @@ -1677,4 +1687,18 @@ private static void createCatalogWithCustomOperation(String catalogName, String catalogName, Catalog.Type.RELATIONAL, provider, "comment", properties); catalog.asSchemas().listSchemas(); } + + protected Map createSchemaProperties() { + Map properties = new HashMap<>(); + properties.put("key1", "val1"); + properties.put("key2", "val2"); + properties.put( + "location", + String.format( + "hdfs://%s:%d/user/hive/warehouse/%s.db", + containerSuite.getHiveContainer().getContainerIpAddress(), + HiveContainer.HDFS_DEFAULTFS_PORT, + schemaName.toLowerCase())); + return properties; + } } diff --git a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java new file mode 100644 index 00000000000..6cb8a84b2f1 --- /dev/null +++ b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.hive.integration.test; + +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.gravitino.integration.test.container.GravitinoLocalStackContainer; +import org.apache.gravitino.integration.test.container.HiveContainer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.Container; +import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; +import org.testcontainers.shaded.org.awaitility.Awaitility; + +public class CatalogHiveS3IT extends CatalogHiveIT { + + private static final Logger LOGGER = LoggerFactory.getLogger(CatalogHiveS3IT.class); + + private static final String S3_BUCKET_NAME = "my-test-bucket"; + // private static final String S3_BUCKET_NAME = "paimon-bucket/test1"; + // private LocalStackContainer localStackContainer; + private GravitinoLocalStackContainer gravitinoLocalStackContainer; + + private static final String S3_ACCESS_KEY = "S3_ACCESS_KEY"; + private static final String S3_SECRET_KEY = "S3_SECRET_KEY"; + private static final String S3_ENDPOINT = "S3_ENDPOINT"; + + private String getS3Endpoint; + private String accessKey; + private String secretKey; + + @Override + protected void startNecessaryContainer() { + containerSuite.startLocalStackContainer(); + gravitinoLocalStackContainer = containerSuite.getLocalStackContainer(); + + Awaitility.await() + .atMost(60, TimeUnit.SECONDS) + .pollInterval(1, TimeUnit.SECONDS) + .until( + () -> { + try { + Container.ExecResult result = + gravitinoLocalStackContainer.executeInContainer( + "awslocal", "iam", "create-user", "--user-name", "anonymous"); + return result.getExitCode() == 0; + } catch (Exception e) { + LOGGER.info("LocalStack is not ready yet for: ", e); + return false; + } + }); + + gravitinoLocalStackContainer.executeInContainer( + "awslocal", "s3", "mb", "s3://" + S3_BUCKET_NAME); + + Container.ExecResult result = + gravitinoLocalStackContainer.executeInContainer( + "awslocal", "iam", "create-access-key", "--user-name", "anonymous"); + + // Get access key and secret key from result + String[] lines = result.getStdout().split("\n"); + accessKey = lines[3].split(":")[1].trim().substring(1, 21); + secretKey = lines[5].split(":")[1].trim().substring(1, 41); + + LOGGER.info("Access key: " + accessKey); + LOGGER.info("Secret key: " + secretKey); + + getS3Endpoint = + String.format("http://%s:%d", gravitinoLocalStackContainer.getContainerIpAddress(), 4566); + + gravitinoLocalStackContainer.executeInContainer( + "awslocal", + "s3api", + "put-bucket-acl", + "--bucket", + "my-test-bucket", + "--acl", + "public-read-write"); + + Map hiveContainerEnv = + ImmutableMap.of( + S3_ACCESS_KEY, + accessKey, + S3_SECRET_KEY, + secretKey, + S3_ENDPOINT, + getS3Endpoint, + HiveContainer.HIVE_RUNTIME_VERSION, + HiveContainer.HIVE3); + + containerSuite.startHiveContainer(hiveContainerEnv); + } + + @Override + protected void initFileSystem() throws IOException { + // Use S3a file system + Configuration conf = new Configuration(); + conf.set("fs.s3a.access.key", accessKey); + conf.set("fs.s3a.secret.key", secretKey); + conf.set("fs.s3a.endpoint", getS3Endpoint); + conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem"); + conf.set( + "fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"); + conf.set("fs.s3a.path.style.access", "true"); + conf.set("fs.s3a.connection.ssl.enabled", "false"); + fileSystem = FileSystem.get(URI.create("s3a://" + S3_BUCKET_NAME), conf); + } + + @Override + protected void initSparkSession() { + sparkSession = + SparkSession.builder() + .master("local[1]") + .appName("Hive Catalog integration test") + .config("hive.metastore.uris", HIVE_METASTORE_URIS) + .config( + "spark.sql.warehouse.dir", + String.format( + "hdfs://%s:%d/user/hive/warehouse", + containerSuite.getHiveContainer().getContainerIpAddress(), + HiveContainer.HDFS_DEFAULTFS_PORT)) + .config("spark.hadoop.fs.s3a.access.key", accessKey) + .config("spark.hadoop.fs.s3a.secret.key", secretKey) + .config("spark.hadoop.fs.s3a.endpoint", getS3Endpoint) + .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") + .config("spark.hadoop.fs.s3a.path.style.access", "true") + .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") + .config( + "spark.hadoop.fs.s3a.aws.credentials.provider", + "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") + .config("spark.sql.storeAssignmentPolicy", "LEGACY") + .config("mapreduce.input.fileinputformat.input.dir.recursive", "true") + .enableHiveSupport() + .getOrCreate(); + } + + @Override + protected Map createSchemaProperties() { + Map properties = super.createSchemaProperties(); + properties.put("location", "s3a://" + S3_BUCKET_NAME + "/test-" + System.currentTimeMillis()); + return properties; + } +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index af0b1becb5f..95c08e9332b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -141,6 +141,7 @@ hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref hadoop2-hdfs-client = { group = "org.apache.hadoop", name = "hadoop-hdfs-client", version.ref = "hadoop2" } hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop2"} hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop2"} +hadoop2-s3 = { group = "org.apache.hadoop", name = "hadoop-aws", version.ref = "hadoop2"} hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop3" } hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop3"} hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"} @@ -178,6 +179,7 @@ testcontainers = { group = "org.testcontainers", name = "testcontainers", versio testcontainers-mysql = { group = "org.testcontainers", name = "mysql", version.ref = "testcontainers" } testcontainers-postgresql = { group = "org.testcontainers", name = "postgresql", version.ref = "testcontainers" } testcontainers-junit-jupiter = { group = "org.testcontainers", name = "junit-jupiter", version.ref = "testcontainers" } +testcontainers-localstack = { group = "org.testcontainers", name = "localstack", version.ref = "testcontainers" } trino-jdbc = { group = "io.trino", name = "trino-jdbc", version.ref = "trino" } jwt-api = { group = "io.jsonwebtoken", name = "jjwt-api", version.ref = "jwt"} jwt-impl = { group = "io.jsonwebtoken", name = "jjwt-impl", version.ref = "jwt"} diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java index bc311c4bcda..22671cb3bfa 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java @@ -71,6 +71,7 @@ public class ContainerSuite implements Closeable { private static volatile Map pgContainerMap = new EnumMap<>(PGImageName.class); + private static volatile GravitinoLocalStackContainer gravitinoLocalStackContainer; protected static final CloseableGroup closer = CloseableGroup.create(); private static void initIfNecessary() { @@ -112,7 +113,11 @@ public Network getNetwork() { return network; } - public void startHiveContainer() { + public void startHiveContainer(Map env) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.putAll(env); + builder.put("HADOOP_USER_NAME", "anonymous"); + if (hiveContainer == null) { synchronized (ContainerSuite.class) { if (hiveContainer == null) { @@ -121,10 +126,7 @@ public void startHiveContainer() { HiveContainer.Builder hiveBuilder = HiveContainer.builder() .withHostName("gravitino-ci-hive") - .withEnvVars( - ImmutableMap.builder() - .put("HADOOP_USER_NAME", "anonymous") - .build()) + .withEnvVars(builder.build()) .withNetwork(network); HiveContainer container = closer.register(hiveBuilder.build()); container.start(); @@ -134,6 +136,10 @@ public void startHiveContainer() { } } + public void startHiveContainer() { + startHiveContainer(ImmutableMap.of()); + } + /** * To start and enable Ranger plugin in Hive container,
* you can specify environment variables:
@@ -361,6 +367,29 @@ public void startKafkaContainer() { } } + public void startLocalStackContainer() { + if (gravitinoLocalStackContainer == null) { + synchronized (ContainerSuite.class) { + if (gravitinoLocalStackContainer == null) { + GravitinoLocalStackContainer.Builder builder = + GravitinoLocalStackContainer.builder().withNetwork(network); + GravitinoLocalStackContainer container = closer.register(builder.build()); + try { + container.start(); + } catch (Exception e) { + LOG.error("Failed to start LocalStack container", e); + throw new RuntimeException("Failed to start LocalStack container", e); + } + gravitinoLocalStackContainer = container; + } + } + } + } + + public GravitinoLocalStackContainer getLocalStackContainer() { + return gravitinoLocalStackContainer; + } + public KafkaContainer getKafkaContainer() { return kafkaContainer; } diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/GravitinoLocalStackContainer.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/GravitinoLocalStackContainer.java new file mode 100644 index 00000000000..11eae352503 --- /dev/null +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/GravitinoLocalStackContainer.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.integration.test.container; + +import com.google.common.collect.ImmutableSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.testcontainers.containers.Network; + +public class GravitinoLocalStackContainer extends BaseContainer { + + public static final String DEFAULT_IMAGE = System.getenv("GRAVITINO_CI_LOCALSTACK_DOCKER_IMAGE"); + public static final String HOST_NAME = "gravitino-ci-localstack"; + public static final int PORT = 4566; + + public GravitinoLocalStackContainer( + String image, + String hostName, + Set ports, + Map extraHosts, + Map filesToMount, + Map envVars, + Optional network) { + super(image, hostName, ports, extraHosts, filesToMount, envVars, network); + } + + public static Builder builder() { + return new Builder(); + } + + @Override + protected boolean checkContainerStatus(int retryLimit) { + return true; + } + + public static class Builder + extends BaseContainer.Builder< + GravitinoLocalStackContainer.Builder, GravitinoLocalStackContainer> { + public Builder() { + super(); + this.image = DEFAULT_IMAGE; + this.hostName = HOST_NAME; + this.exposePorts = ImmutableSet.of(PORT); + } + + @Override + public GravitinoLocalStackContainer build() { + return new GravitinoLocalStackContainer( + image, hostName, exposePorts, extraHosts, filesToMount, envVars, network); + } + } +} From 01ac7740657e3ad467ab99930cc7a53d35090d95 Mon Sep 17 00:00:00 2001 From: yuqi Date: Sat, 21 Sep 2024 17:59:10 +0800 Subject: [PATCH 3/4] Fix test error. --- .../hive/integration/test/CatalogHiveIT.java | 12 ++++---- .../integration/test/CatalogHiveS3IT.java | 15 ++++++++-- .../test/container/ContainerSuite.java | 29 +++++++++++++++++++ 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java index 2937eb6f7aa..73a6a33cb9b 100644 --- a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java +++ b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveIT.java @@ -163,6 +163,12 @@ private static String getInsertWithPartitionSql( protected void startNecessaryContainer() { containerSuite.startHiveContainer(); + + HIVE_METASTORE_URIS = + String.format( + "thrift://%s:%d", + containerSuite.getHiveContainer().getContainerIpAddress(), + HiveContainer.HIVE_METASTORE_PORT); } protected void initSparkSession() { @@ -198,12 +204,6 @@ protected void initFileSystem() throws IOException { public void startup() throws Exception { startNecessaryContainer(); - HIVE_METASTORE_URIS = - String.format( - "thrift://%s:%d", - containerSuite.getHiveContainer().getContainerIpAddress(), - HiveContainer.HIVE_METASTORE_PORT); - HiveConf hiveConf = new HiveConf(); hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, HIVE_METASTORE_URIS); diff --git a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java index 6cb8a84b2f1..fb94b556a12 100644 --- a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java +++ b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.net.URI; +import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.gravitino.integration.test.container.GravitinoLocalStackContainer; @@ -109,7 +110,13 @@ protected void startNecessaryContainer() { HiveContainer.HIVE_RUNTIME_VERSION, HiveContainer.HIVE3); - containerSuite.startHiveContainer(hiveContainerEnv); + containerSuite.startHiveContainerWithS3(hiveContainerEnv); + + HIVE_METASTORE_URIS = + String.format( + "thrift://%s:%d", + containerSuite.getHiveContainerWithS3().getContainerIpAddress(), + HiveContainer.HIVE_METASTORE_PORT); } @Override @@ -138,7 +145,7 @@ protected void initSparkSession() { "spark.sql.warehouse.dir", String.format( "hdfs://%s:%d/user/hive/warehouse", - containerSuite.getHiveContainer().getContainerIpAddress(), + containerSuite.getHiveContainerWithS3().getContainerIpAddress(), HiveContainer.HDFS_DEFAULTFS_PORT)) .config("spark.hadoop.fs.s3a.access.key", accessKey) .config("spark.hadoop.fs.s3a.secret.key", secretKey) @@ -157,7 +164,9 @@ protected void initSparkSession() { @Override protected Map createSchemaProperties() { - Map properties = super.createSchemaProperties(); + Map properties = new HashMap<>(); + properties.put("key1", "val1"); + properties.put("key2", "val2"); properties.put("location", "s3a://" + S3_BUCKET_NAME + "/test-" + System.currentTimeMillis()); return properties; } diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java index 22671cb3bfa..ce9faec7899 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java @@ -72,6 +72,8 @@ public class ContainerSuite implements Closeable { new EnumMap<>(PGImageName.class); private static volatile GravitinoLocalStackContainer gravitinoLocalStackContainer; + private static volatile HiveContainer hiveContainerWithS3; + protected static final CloseableGroup closer = CloseableGroup.create(); private static void initIfNecessary() { @@ -136,6 +138,29 @@ public void startHiveContainer(Map env) { } } + public void startHiveContainerWithS3(Map env) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.putAll(env); + builder.put("HADOOP_USER_NAME", "anonymous"); + + if (hiveContainerWithS3 == null) { + synchronized (ContainerSuite.class) { + if (hiveContainerWithS3 == null) { + initIfNecessary(); + // Start Hive container + HiveContainer.Builder hiveBuilder = + HiveContainer.builder() + .withHostName("gravitino-ci-hive") + .withEnvVars(builder.build()) + .withNetwork(network); + HiveContainer container = closer.register(hiveBuilder.build()); + container.start(); + hiveContainerWithS3 = container; + } + } + } + } + public void startHiveContainer() { startHiveContainer(ImmutableMap.of()); } @@ -390,6 +415,10 @@ public GravitinoLocalStackContainer getLocalStackContainer() { return gravitinoLocalStackContainer; } + public HiveContainer getHiveContainerWithS3() { + return hiveContainerWithS3; + } + public KafkaContainer getKafkaContainer() { return kafkaContainer; } From 771d6a3a789895a6480a85df219a493bd8962478 Mon Sep 17 00:00:00 2001 From: yuqi Date: Mon, 23 Sep 2024 11:21:36 +0800 Subject: [PATCH 4/4] Remove some unused code. --- .../catalog/hive/integration/test/CatalogHiveS3IT.java | 2 -- .../integration/test/container/ContainerSuite.java | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java index fb94b556a12..83d52d2411b 100644 --- a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java +++ b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/integration/test/CatalogHiveS3IT.java @@ -39,8 +39,6 @@ public class CatalogHiveS3IT extends CatalogHiveIT { private static final Logger LOGGER = LoggerFactory.getLogger(CatalogHiveS3IT.class); private static final String S3_BUCKET_NAME = "my-test-bucket"; - // private static final String S3_BUCKET_NAME = "paimon-bucket/test1"; - // private LocalStackContainer localStackContainer; private GravitinoLocalStackContainer gravitinoLocalStackContainer; private static final String S3_ACCESS_KEY = "S3_ACCESS_KEY"; diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java index ce9faec7899..12a88bbd94c 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/container/ContainerSuite.java @@ -72,6 +72,13 @@ public class ContainerSuite implements Closeable { new EnumMap<>(PGImageName.class); private static volatile GravitinoLocalStackContainer gravitinoLocalStackContainer; + + /** + * We can share the same Hive container as Hive container with S3 contains the following + * differences: 1. Configuration of S3 and corresponding environment variables 2. The Hive + * container with S3 is Hive3 container and the Hive container is Hive2 container. There is + * something wrong with the hive2 container to access the S3. + */ private static volatile HiveContainer hiveContainerWithS3; protected static final CloseableGroup closer = CloseableGroup.create();