From 2c00e4aa1db6d45ee1c86c2e896437101b3d7e54 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Thu, 21 Nov 2024 10:15:52 +0100 Subject: [PATCH] Core: Add TableUtil to provide access to a table's format version --- .../org/apache/iceberg/SerializableTable.java | 6 +- .../java/org/apache/iceberg/TableUtil.java | 59 ++++++++++ .../org/apache/iceberg/TestTableUtil.java | 111 ++++++++++++++++++ .../apache/iceberg/catalog/CatalogTests.java | 10 +- .../hadoop/TestTableSerialization.java | 9 ++ 5 files changed, 188 insertions(+), 7 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/TableUtil.java create mode 100644 core/src/test/java/org/apache/iceberg/TestTableUtil.java diff --git a/core/src/main/java/org/apache/iceberg/SerializableTable.java b/core/src/main/java/org/apache/iceberg/SerializableTable.java index a2c0d776423c..968a12b5231a 100644 --- a/core/src/main/java/org/apache/iceberg/SerializableTable.java +++ b/core/src/main/java/org/apache/iceberg/SerializableTable.java @@ -120,7 +120,7 @@ private FileIO fileIO(Table table) { return table.io(); } - private Table lazyTable() { + protected Table lazyTable() { if (lazyTable == null) { synchronized (this) { if (lazyTable == null) { @@ -428,6 +428,10 @@ public StaticTableOperations operations() { this.getClass().getName() + " does not support operations()"); } + public BaseMetadataTable underlyingMetadataTable() { + return (BaseMetadataTable) lazyTable(); + } + public MetadataTableType type() { return type; } diff --git a/core/src/main/java/org/apache/iceberg/TableUtil.java b/core/src/main/java/org/apache/iceberg/TableUtil.java new file mode 100644 index 000000000000..8550b6e1f98a --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TableUtil.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public class TableUtil { + private TableUtil() {} + + /** + * Returns the format version of the given table. Note that subclasses of {@link + * BaseMetadataTable} do not have a format version. The only exception to this is {@link + * PositionDeletesTable}, where the format version is fetched from the underlying table. + */ + public static int formatVersion(Table table) { + Preconditions.checkArgument(null != table, "Invalid table: null"); + + // being able to read the format version from the PositionDeletesTable is mainly needed in + // SparkPositionDeletesRewrite when determining whether to rewrite V2 position deletes to DVs + if (table instanceof BaseMetadataTable) { + Preconditions.checkArgument( + table instanceof PositionDeletesTable, + "%s table does not have a format version", + ((BaseMetadataTable) table).metadataTableType()); + + return ((PositionDeletesTable) table).table().operations().current().formatVersion(); + } else if (table instanceof SerializableTable.SerializableMetadataTable) { + SerializableTable.SerializableMetadataTable metadataTable = + (SerializableTable.SerializableMetadataTable) table; + Preconditions.checkArgument( + metadataTable.type() == MetadataTableType.POSITION_DELETES, + "%s table does not have a format version", + metadataTable.type()); + + return metadataTable.underlyingMetadataTable().table().operations().current().formatVersion(); + } else if (table instanceof HasTableOperations) { + return ((HasTableOperations) table).operations().current().formatVersion(); + } else { + throw new IllegalArgumentException( + String.format("%s does not have a format version", table.getClass().getSimpleName())); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTableUtil.java b/core/src/test/java/org/apache/iceberg/TestTableUtil.java new file mode 100644 index 000000000000..97e102b22d7f --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestTableUtil.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.io.File; +import java.util.Arrays; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.inmemory.InMemoryCatalog; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestTableUtil { + private static final Namespace NS = Namespace.of("ns"); + private static final TableIdentifier IDENTIFIER = TableIdentifier.of(NS, "test"); + + @TempDir private File tmp; + + private InMemoryCatalog catalog; + + @BeforeEach + public void initCatalog() { + catalog = new InMemoryCatalog(); + catalog.initialize("catalog", ImmutableMap.of()); + catalog.createNamespace(NS); + } + + @Test + public void nullTable() { + assertThatThrownBy(() -> TableUtil.formatVersion(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid table: null"); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3}) + public void formatVersionFromBaseTable(int formatVersion) { + Table table = + catalog.createTable( + IDENTIFIER, + new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())), + PartitionSpec.unpartitioned(), + ImmutableMap.of(TableProperties.FORMAT_VERSION, String.valueOf(formatVersion))); + + assertThat(TableUtil.formatVersion(table)).isEqualTo(formatVersion); + assertThat(TableUtil.formatVersion(SerializableTable.copyOf(table))).isEqualTo(formatVersion); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3}) + public void formatVersionFromPositionDeletesTable(int formatVersion) { + Table table = + catalog.createTable( + IDENTIFIER, + new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())), + PartitionSpec.unpartitioned(), + ImmutableMap.of(TableProperties.FORMAT_VERSION, String.valueOf(formatVersion))); + + PositionDeletesTable positionDeletesTable = new PositionDeletesTable(table); + assertThat(TableUtil.formatVersion(positionDeletesTable)).isEqualTo(formatVersion); + assertThat(TableUtil.formatVersion(SerializableTable.copyOf(positionDeletesTable))) + .isEqualTo(formatVersion); + } + + @Test + public void formatVersionForMetadataTables() { + Table table = + catalog.createTable( + IDENTIFIER, new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get()))); + + for (MetadataTableType type : + Arrays.stream(MetadataTableType.values()) + .filter(type -> type != MetadataTableType.POSITION_DELETES) + .collect(Collectors.toList())) { + + Table metadataTable = MetadataTableUtils.createMetadataTableInstance(table, type); + assertThatThrownBy(() -> TableUtil.formatVersion(metadataTable)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("%s table does not have a format version", type); + + assertThatThrownBy(() -> TableUtil.formatVersion(SerializableTable.copyOf(metadataTable))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("%s table does not have a format version", type); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java index a011578865b4..e7b379425f5b 100644 --- a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java +++ b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java @@ -46,6 +46,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TableUtil; import org.apache.iceberg.TestHelpers; import org.apache.iceberg.Transaction; import org.apache.iceberg.UpdatePartitionSpec; @@ -1273,9 +1274,7 @@ public void testUpdateTableSpecThenRevert() { .withPartitionSpec(SPEC) .withProperty("format-version", "2") .create(); - assertThat(((BaseTable) table).operations().current().formatVersion()) - .as("Should be a v2 table") - .isEqualTo(2); + assertThat(TableUtil.formatVersion(table)).as("Should be a v2 table").isEqualTo(2); table.updateSpec().addField("id").commit(); @@ -2510,7 +2509,7 @@ public void testConcurrentReplaceTransactionSortOrderConflict() { } @ParameterizedTest - @ValueSource(ints = {1, 2}) + @ValueSource(ints = {1, 2, 3}) public void createTableTransaction(int formatVersion) { if (requiresNamespaceCreate()) { catalog().createNamespace(NS); @@ -2524,8 +2523,7 @@ public void createTableTransaction(int formatVersion) { ImmutableMap.of("format-version", String.valueOf(formatVersion))) .commitTransaction(); - BaseTable table = (BaseTable) catalog().loadTable(TABLE); - assertThat(table.operations().current().formatVersion()).isEqualTo(formatVersion); + assertThat(TableUtil.formatVersion(catalog().loadTable(TABLE))).isEqualTo(formatVersion); } @ParameterizedTest diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java b/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java index 5ef4697b4736..494e22f6501c 100644 --- a/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java +++ b/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java @@ -40,6 +40,7 @@ import org.apache.iceberg.StaticTableOperations; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TableUtil; import org.apache.iceberg.TestHelpers; import org.apache.iceberg.Transaction; import org.apache.iceberg.io.CloseableIterable; @@ -67,6 +68,7 @@ public void testSerializableTable() throws IOException, ClassNotFoundException { assertThat(serializableTable).isInstanceOf(HasTableOperations.class); assertThat(((HasTableOperations) serializableTable).operations()) .isInstanceOf(StaticTableOperations.class); + assertThat(TableUtil.formatVersion(serializableTable)).isEqualTo(2); } @Test @@ -106,6 +108,13 @@ public void testSerializableMetadataTable() throws IOException, ClassNotFoundExc assertThatThrownBy(() -> ((HasTableOperations) serializableTable).operations()) .isInstanceOf(UnsupportedOperationException.class) .hasMessageEndingWith("does not support operations()"); + if (MetadataTableType.POSITION_DELETES == type) { + assertThat(TableUtil.formatVersion(serializableTable)).isEqualTo(2); + } else { + assertThatThrownBy(() -> TableUtil.formatVersion(serializableTable)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("%s table does not have a format version", type); + } } }