From cadfc52f2440a5845ffc6271b0ae52ba3180c290 Mon Sep 17 00:00:00 2001 From: Jerry Shao Date: Thu, 22 Feb 2024 18:54:53 +0800 Subject: [PATCH] [#2307] improve(catalog-hadoop): Formalize the storage location path before storing (#2308) ### What changes were proposed in this pull request? This PR improves to formalize the storage location path into a standard URI format before storing into the entity store. ### Why are the changes needed? Users could set the storage location with different ways, like "/tmp/xxx", "xxx". We need to formalize the path to give user a consistent view of the path. Fix: #2307 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? UTs. --- .../hadoop/HadoopCatalogOperations.java | 9 ++ .../hadoop/TestHadoopCatalogOperations.java | 112 +++++++++++++++++- 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/catalogs/catalog-hadoop/src/main/java/com/datastrato/gravitino/catalog/hadoop/HadoopCatalogOperations.java b/catalogs/catalog-hadoop/src/main/java/com/datastrato/gravitino/catalog/hadoop/HadoopCatalogOperations.java index 587f8d18986..bd911bbfd32 100644 --- a/catalogs/catalog-hadoop/src/main/java/com/datastrato/gravitino/catalog/hadoop/HadoopCatalogOperations.java +++ b/catalogs/catalog-hadoop/src/main/java/com/datastrato/gravitino/catalog/hadoop/HadoopCatalogOperations.java @@ -189,7 +189,10 @@ public Fileset createFileset( StringUtils.isNotBlank(storageLocation) ? new Path(storageLocation) : new Path(schemaPath, ident.name()); + try { + // formalize the path to avoid path without scheme, uri, authority, etc. + filesetPath = formalizePath(filesetPath, hadoopConf); FileSystem fs = filesetPath.getFileSystem(hadoopConf); if (!fs.exists(filesetPath)) { if (!fs.mkdirs(filesetPath)) { @@ -599,4 +602,10 @@ private Path getSchemaPath(String name, Map properties) { .map(Path::new) .orElse(catalogStorageLocation.map(p -> new Path(p, name)).orElse(null)); } + + @VisibleForTesting + static Path formalizePath(Path path, Configuration configuration) throws IOException { + FileSystem defaultFs = FileSystem.get(configuration); + return path.makeQualified(defaultFs.getUri(), defaultFs.getWorkingDirectory()); + } } diff --git a/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java b/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java index d95b03d3d57..78573dee00b 100644 --- a/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java +++ b/catalogs/catalog-hadoop/src/test/java/com/datastrato/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java @@ -34,6 +34,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import java.io.IOException; +import java.nio.file.Paths; import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -58,8 +59,10 @@ public class TestHadoopCatalogOperations { private static final String ROCKS_DB_STORE_PATH = "/tmp/gravitino_test_entityStore_" + UUID.randomUUID().toString().replace("-", ""); - private static final String TEST_ROOT_PATH = - "file:/tmp/gravitino_test_catalog_" + UUID.randomUUID().toString().replace("-", ""); + private static final String UNFORMALIZED_TEST_ROOT_PATH = + "/tmp/gravitino_test_catalog_" + UUID.randomUUID().toString().replace("-", ""); + + private static final String TEST_ROOT_PATH = "file:" + UNFORMALIZED_TEST_ROOT_PATH; private static EntityStore store; @@ -533,6 +536,37 @@ public void testAlterFilesetProperties() throws IOException { } } + @Test + public void testFormalizePath() throws IOException { + + String[] paths = + new String[] { + "tmp/catalog", + "/tmp/catalog", + "file:/tmp/catalog", + "file:///tmp/catalog", + "hdfs://localhost:9000/tmp/catalog", + "s3://bucket/tmp/catalog", + "gs://bucket/tmp/catalog" + }; + + String[] expected = + new String[] { + "file:" + Paths.get("").toAbsolutePath() + "/tmp/catalog", + "file:/tmp/catalog", + "file:/tmp/catalog", + "file:/tmp/catalog", + "hdfs://localhost:9000/tmp/catalog", + "s3://bucket/tmp/catalog", + "gs://bucket/tmp/catalog" + }; + + for (int i = 0; i < paths.length; i++) { + Path actual = HadoopCatalogOperations.formalizePath(new Path(paths[i]), new Configuration()); + Assertions.assertEquals(expected[i], actual.toString()); + } + } + private static Stream locationArguments() { return Stream.of( // Honor the catalog location @@ -606,7 +640,79 @@ private static Stream locationArguments() { null, null, TEST_ROOT_PATH + "/fileset19", - TEST_ROOT_PATH + "/fileset19")); + TEST_ROOT_PATH + "/fileset19"), + // Honor the catalog location + Arguments.of( + "fileset101", + Fileset.Type.MANAGED, + UNFORMALIZED_TEST_ROOT_PATH + "/catalog201", + null, + null, + TEST_ROOT_PATH + "/catalog201/s1_fileset101/fileset101"), + Arguments.of( + // honor the schema location + "fileset102", + Fileset.Type.MANAGED, + null, + UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset102", + null, + TEST_ROOT_PATH + "/s1_fileset102/fileset102"), + Arguments.of( + // honor the schema location + "fileset103", + Fileset.Type.MANAGED, + UNFORMALIZED_TEST_ROOT_PATH + "/catalog202", + UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset103", + null, + TEST_ROOT_PATH + "/s1_fileset103/fileset103"), + Arguments.of( + // honor the storage location + "fileset104", + Fileset.Type.MANAGED, + UNFORMALIZED_TEST_ROOT_PATH + "/catalog203", + UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset104", + UNFORMALIZED_TEST_ROOT_PATH + "/fileset104", + TEST_ROOT_PATH + "/fileset104"), + Arguments.of( + // honor the storage location + "fileset105", + Fileset.Type.MANAGED, + null, + null, + UNFORMALIZED_TEST_ROOT_PATH + "/fileset105", + TEST_ROOT_PATH + "/fileset105"), + Arguments.of( + // honor the storage location + "fileset106", + Fileset.Type.MANAGED, + UNFORMALIZED_TEST_ROOT_PATH + "/catalog204", + null, + UNFORMALIZED_TEST_ROOT_PATH + "/fileset106", + TEST_ROOT_PATH + "/fileset106"), + Arguments.of( + // honor the storage location + "fileset107", + Fileset.Type.EXTERNAL, + UNFORMALIZED_TEST_ROOT_PATH + "/catalog205", + UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset107", + UNFORMALIZED_TEST_ROOT_PATH + "/fileset107", + TEST_ROOT_PATH + "/fileset107"), + Arguments.of( + // honor the storage location + "fileset108", + Fileset.Type.EXTERNAL, + null, + UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset108", + UNFORMALIZED_TEST_ROOT_PATH + "/fileset108", + TEST_ROOT_PATH + "/fileset108"), + Arguments.of( + // honor the storage location + "fileset109", + Fileset.Type.EXTERNAL, + null, + null, + UNFORMALIZED_TEST_ROOT_PATH + "/fileset109", + TEST_ROOT_PATH + "/fileset109")); } private static Stream testRenameArguments() {