Skip to content

Commit

Permalink
[apache#2307] improve(catalog-hadoop): Formalize the storage location…
Browse files Browse the repository at this point in the history
… path before storing (apache#2308)

### What changes were proposed in this pull request?

This PR improves to formalize the storage location path into a standard
URI format before storing into the entity store.

### Why are the changes needed?

Users could set the storage location with different ways, like
"/tmp/xxx", "xxx". We need to formalize the path to give user a
consistent view of the path.

Fix: apache#2307 

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

UTs.
  • Loading branch information
jerryshao authored Feb 22, 2024
1 parent 4e338e3 commit cadfc52
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ public Fileset createFileset(
StringUtils.isNotBlank(storageLocation)
? new Path(storageLocation)
: new Path(schemaPath, ident.name());

try {
// formalize the path to avoid path without scheme, uri, authority, etc.
filesetPath = formalizePath(filesetPath, hadoopConf);
FileSystem fs = filesetPath.getFileSystem(hadoopConf);
if (!fs.exists(filesetPath)) {
if (!fs.mkdirs(filesetPath)) {
Expand Down Expand Up @@ -599,4 +602,10 @@ private Path getSchemaPath(String name, Map<String, String> properties) {
.map(Path::new)
.orElse(catalogStorageLocation.map(p -> new Path(p, name)).orElse(null));
}

@VisibleForTesting
static Path formalizePath(Path path, Configuration configuration) throws IOException {
FileSystem defaultFs = FileSystem.get(configuration);
return path.makeQualified(defaultFs.getUri(), defaultFs.getWorkingDirectory());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
Expand All @@ -58,8 +59,10 @@ public class TestHadoopCatalogOperations {
private static final String ROCKS_DB_STORE_PATH =
"/tmp/gravitino_test_entityStore_" + UUID.randomUUID().toString().replace("-", "");

private static final String TEST_ROOT_PATH =
"file:/tmp/gravitino_test_catalog_" + UUID.randomUUID().toString().replace("-", "");
private static final String UNFORMALIZED_TEST_ROOT_PATH =
"/tmp/gravitino_test_catalog_" + UUID.randomUUID().toString().replace("-", "");

private static final String TEST_ROOT_PATH = "file:" + UNFORMALIZED_TEST_ROOT_PATH;

private static EntityStore store;

Expand Down Expand Up @@ -533,6 +536,37 @@ public void testAlterFilesetProperties() throws IOException {
}
}

@Test
public void testFormalizePath() throws IOException {

String[] paths =
new String[] {
"tmp/catalog",
"/tmp/catalog",
"file:/tmp/catalog",
"file:///tmp/catalog",
"hdfs://localhost:9000/tmp/catalog",
"s3://bucket/tmp/catalog",
"gs://bucket/tmp/catalog"
};

String[] expected =
new String[] {
"file:" + Paths.get("").toAbsolutePath() + "/tmp/catalog",
"file:/tmp/catalog",
"file:/tmp/catalog",
"file:/tmp/catalog",
"hdfs://localhost:9000/tmp/catalog",
"s3://bucket/tmp/catalog",
"gs://bucket/tmp/catalog"
};

for (int i = 0; i < paths.length; i++) {
Path actual = HadoopCatalogOperations.formalizePath(new Path(paths[i]), new Configuration());
Assertions.assertEquals(expected[i], actual.toString());
}
}

private static Stream<Arguments> locationArguments() {
return Stream.of(
// Honor the catalog location
Expand Down Expand Up @@ -606,7 +640,79 @@ private static Stream<Arguments> locationArguments() {
null,
null,
TEST_ROOT_PATH + "/fileset19",
TEST_ROOT_PATH + "/fileset19"));
TEST_ROOT_PATH + "/fileset19"),
// Honor the catalog location
Arguments.of(
"fileset101",
Fileset.Type.MANAGED,
UNFORMALIZED_TEST_ROOT_PATH + "/catalog201",
null,
null,
TEST_ROOT_PATH + "/catalog201/s1_fileset101/fileset101"),
Arguments.of(
// honor the schema location
"fileset102",
Fileset.Type.MANAGED,
null,
UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset102",
null,
TEST_ROOT_PATH + "/s1_fileset102/fileset102"),
Arguments.of(
// honor the schema location
"fileset103",
Fileset.Type.MANAGED,
UNFORMALIZED_TEST_ROOT_PATH + "/catalog202",
UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset103",
null,
TEST_ROOT_PATH + "/s1_fileset103/fileset103"),
Arguments.of(
// honor the storage location
"fileset104",
Fileset.Type.MANAGED,
UNFORMALIZED_TEST_ROOT_PATH + "/catalog203",
UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset104",
UNFORMALIZED_TEST_ROOT_PATH + "/fileset104",
TEST_ROOT_PATH + "/fileset104"),
Arguments.of(
// honor the storage location
"fileset105",
Fileset.Type.MANAGED,
null,
null,
UNFORMALIZED_TEST_ROOT_PATH + "/fileset105",
TEST_ROOT_PATH + "/fileset105"),
Arguments.of(
// honor the storage location
"fileset106",
Fileset.Type.MANAGED,
UNFORMALIZED_TEST_ROOT_PATH + "/catalog204",
null,
UNFORMALIZED_TEST_ROOT_PATH + "/fileset106",
TEST_ROOT_PATH + "/fileset106"),
Arguments.of(
// honor the storage location
"fileset107",
Fileset.Type.EXTERNAL,
UNFORMALIZED_TEST_ROOT_PATH + "/catalog205",
UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset107",
UNFORMALIZED_TEST_ROOT_PATH + "/fileset107",
TEST_ROOT_PATH + "/fileset107"),
Arguments.of(
// honor the storage location
"fileset108",
Fileset.Type.EXTERNAL,
null,
UNFORMALIZED_TEST_ROOT_PATH + "/s1_fileset108",
UNFORMALIZED_TEST_ROOT_PATH + "/fileset108",
TEST_ROOT_PATH + "/fileset108"),
Arguments.of(
// honor the storage location
"fileset109",
Fileset.Type.EXTERNAL,
null,
null,
UNFORMALIZED_TEST_ROOT_PATH + "/fileset109",
TEST_ROOT_PATH + "/fileset109"));
}

private static Stream<Arguments> testRenameArguments() {
Expand Down

0 comments on commit cadfc52

Please sign in to comment.