From 2543c49b67d324f4c8c6aba89858ac32026d4f47 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Thu, 21 Nov 2024 10:15:52 +0100 Subject: [PATCH 1/3] Core: Add TableUtil to provide access to a table's format version --- .../java/org/apache/iceberg/TableUtil.java | 40 ++++++++ .../org/apache/iceberg/TestTableUtil.java | 92 +++++++++++++++++++ .../apache/iceberg/catalog/CatalogTests.java | 10 +- .../hadoop/TestTableSerialization.java | 5 + 4 files changed, 141 insertions(+), 6 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/TableUtil.java create mode 100644 core/src/test/java/org/apache/iceberg/TestTableUtil.java diff --git a/core/src/main/java/org/apache/iceberg/TableUtil.java b/core/src/main/java/org/apache/iceberg/TableUtil.java new file mode 100644 index 000000000000..46b2b2522d8b --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TableUtil.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public class TableUtil { + private TableUtil() {} + + /** Returns the format version of the given table */ + public static int formatVersion(Table table) { + Preconditions.checkArgument(null != table, "Invalid table: null"); + + // SerializableMetadataTable is a subclass of SerializableTable but does not support + // operations() + if (table instanceof HasTableOperations + && !(table instanceof SerializableTable.SerializableMetadataTable)) { + return ((HasTableOperations) table).operations().current().formatVersion(); + } else { + throw new IllegalArgumentException( + String.format("%s does not have a format version", table.getClass().getSimpleName())); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTableUtil.java b/core/src/test/java/org/apache/iceberg/TestTableUtil.java new file mode 100644 index 000000000000..d33ecb1a91dd --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestTableUtil.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.io.File; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.inmemory.InMemoryCatalog; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestTableUtil { + private static final Namespace NS = Namespace.of("ns"); + private static final TableIdentifier IDENTIFIER = TableIdentifier.of(NS, "test"); + + @TempDir private File tmp; + + private InMemoryCatalog catalog; + + @BeforeEach + public void initCatalog() { + catalog = new InMemoryCatalog(); + catalog.initialize("catalog", ImmutableMap.of()); + catalog.createNamespace(NS); + } + + @Test + public void nullTable() { + assertThatThrownBy(() -> TableUtil.formatVersion(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid table: null"); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3}) + public void formatVersionForBaseTable(int formatVersion) { + Table table = + catalog.createTable( + IDENTIFIER, + new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())), + PartitionSpec.unpartitioned(), + ImmutableMap.of(TableProperties.FORMAT_VERSION, String.valueOf(formatVersion))); + + assertThat(TableUtil.formatVersion(table)).isEqualTo(formatVersion); + assertThat(TableUtil.formatVersion(SerializableTable.copyOf(table))).isEqualTo(formatVersion); + } + + @Test + public void formatVersionForMetadataTables() { + Table table = + catalog.createTable( + IDENTIFIER, new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get()))); + + for (MetadataTableType type : MetadataTableType.values()) { + Table metadataTable = MetadataTableUtils.createMetadataTableInstance(table, type); + assertThatThrownBy(() -> TableUtil.formatVersion(metadataTable)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "%s does not have a format version", metadataTable.getClass().getSimpleName()); + + assertThatThrownBy(() -> TableUtil.formatVersion(SerializableTable.copyOf(metadataTable))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "%s does not have a format version", + SerializableTable.SerializableMetadataTable.class.getSimpleName()); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java index 4df91a49033d..ecdf30463472 100644 --- a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java +++ b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java @@ -49,6 +49,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TableUtil; import org.apache.iceberg.TestHelpers; import org.apache.iceberg.Transaction; import org.apache.iceberg.UpdatePartitionSpec; @@ -1282,9 +1283,7 @@ public void testUpdateTableSpecThenRevert() { .withPartitionSpec(SPEC) .withProperty("format-version", "2") .create(); - assertThat(((BaseTable) table).operations().current().formatVersion()) - .as("Should be a v2 table") - .isEqualTo(2); + assertThat(TableUtil.formatVersion(table)).as("Should be a v2 table").isEqualTo(2); table.updateSpec().addField("id").commit(); @@ -2519,7 +2518,7 @@ public void testConcurrentReplaceTransactionSortOrderConflict() { } @ParameterizedTest - @ValueSource(ints = {1, 2}) + @ValueSource(ints = {1, 2, 3}) public void createTableTransaction(int formatVersion) { if (requiresNamespaceCreate()) { catalog().createNamespace(NS); @@ -2533,8 +2532,7 @@ public void createTableTransaction(int formatVersion) { ImmutableMap.of("format-version", String.valueOf(formatVersion))) .commitTransaction(); - BaseTable table = (BaseTable) catalog().loadTable(TABLE); - assertThat(table.operations().current().formatVersion()).isEqualTo(formatVersion); + assertThat(TableUtil.formatVersion(catalog().loadTable(TABLE))).isEqualTo(formatVersion); } @ParameterizedTest diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java b/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java index 5ef4697b4736..247535c58b75 100644 --- a/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java +++ b/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java @@ -40,6 +40,7 @@ import org.apache.iceberg.StaticTableOperations; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TableUtil; import org.apache.iceberg.TestHelpers; import org.apache.iceberg.Transaction; import org.apache.iceberg.io.CloseableIterable; @@ -67,6 +68,7 @@ public void testSerializableTable() throws IOException, ClassNotFoundException { assertThat(serializableTable).isInstanceOf(HasTableOperations.class); assertThat(((HasTableOperations) serializableTable).operations()) .isInstanceOf(StaticTableOperations.class); + assertThat(TableUtil.formatVersion(serializableTable)).isEqualTo(2); } @Test @@ -106,6 +108,9 @@ public void testSerializableMetadataTable() throws IOException, ClassNotFoundExc assertThatThrownBy(() -> ((HasTableOperations) serializableTable).operations()) .isInstanceOf(UnsupportedOperationException.class) .hasMessageEndingWith("does not support operations()"); + assertThatThrownBy(() -> TableUtil.formatVersion(serializableTable)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageEndingWith("does not have a format version"); } } From 455bc829612551418f95c70b7dfc61b656892849 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Fri, 6 Dec 2024 08:36:17 +0100 Subject: [PATCH 2/3] Store formatVersion as field in SerializableTable --- .../org/apache/iceberg/SerializableTable.java | 18 +++++++++++++++++- .../java/org/apache/iceberg/TableUtil.java | 6 ++++-- .../java/org/apache/iceberg/TestTableUtil.java | 4 ++-- .../iceberg/hadoop/TestTableSerialization.java | 2 +- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/SerializableTable.java b/core/src/main/java/org/apache/iceberg/SerializableTable.java index a2c0d776423c..858dca7abc79 100644 --- a/core/src/main/java/org/apache/iceberg/SerializableTable.java +++ b/core/src/main/java/org/apache/iceberg/SerializableTable.java @@ -62,13 +62,14 @@ public class SerializableTable implements Table, HasTableOperations, Serializabl private final FileIO io; private final EncryptionManager encryption; private final Map refs; + private final UUID uuid; + private final int formatVersion; private transient volatile LocationProvider lazyLocationProvider = null; private transient volatile Table lazyTable = null; private transient volatile Schema lazySchema = null; private transient volatile Map lazySpecs = null; private transient volatile SortOrder lazySortOrder = null; - private final UUID uuid; protected SerializableTable(Table table) { this.name = table.name(); @@ -85,6 +86,11 @@ protected SerializableTable(Table table) { this.encryption = table.encryption(); this.refs = SerializableMap.copyOf(table.refs()); this.uuid = table.uuid(); + + // formatVersion=-1 will never be used/returned, because + // SerializableMetadataTable#formatVersion() will throw an UOE when the format version is + // retrieved + this.formatVersion = table instanceof BaseMetadataTable ? -1 : TableUtil.formatVersion(table); } /** @@ -158,6 +164,10 @@ public Map properties() { return properties; } + public int formatVersion() { + return formatVersion; + } + @Override public Schema schema() { if (lazySchema == null) { @@ -428,6 +438,12 @@ public StaticTableOperations operations() { this.getClass().getName() + " does not support operations()"); } + @Override + public int formatVersion() { + throw new UnsupportedOperationException( + this.getClass().getName() + " does not have a format version"); + } + public MetadataTableType type() { return type; } diff --git a/core/src/main/java/org/apache/iceberg/TableUtil.java b/core/src/main/java/org/apache/iceberg/TableUtil.java index 46b2b2522d8b..335a5b35b805 100644 --- a/core/src/main/java/org/apache/iceberg/TableUtil.java +++ b/core/src/main/java/org/apache/iceberg/TableUtil.java @@ -29,8 +29,10 @@ public static int formatVersion(Table table) { // SerializableMetadataTable is a subclass of SerializableTable but does not support // operations() - if (table instanceof HasTableOperations - && !(table instanceof SerializableTable.SerializableMetadataTable)) { + if (table instanceof SerializableTable) { + SerializableTable serializableTable = (SerializableTable) table; + return serializableTable.formatVersion(); + } else if (table instanceof HasTableOperations) { return ((HasTableOperations) table).operations().current().formatVersion(); } else { throw new IllegalArgumentException( diff --git a/core/src/test/java/org/apache/iceberg/TestTableUtil.java b/core/src/test/java/org/apache/iceberg/TestTableUtil.java index d33ecb1a91dd..2ccb5c01f3e9 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableUtil.java +++ b/core/src/test/java/org/apache/iceberg/TestTableUtil.java @@ -83,10 +83,10 @@ public void formatVersionForMetadataTables() { "%s does not have a format version", metadataTable.getClass().getSimpleName()); assertThatThrownBy(() -> TableUtil.formatVersion(SerializableTable.copyOf(metadataTable))) - .isInstanceOf(IllegalArgumentException.class) + .isInstanceOf(UnsupportedOperationException.class) .hasMessage( "%s does not have a format version", - SerializableTable.SerializableMetadataTable.class.getSimpleName()); + SerializableTable.SerializableMetadataTable.class.getName()); } } } diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java b/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java index 247535c58b75..d253556f83ea 100644 --- a/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java +++ b/core/src/test/java/org/apache/iceberg/hadoop/TestTableSerialization.java @@ -109,7 +109,7 @@ public void testSerializableMetadataTable() throws IOException, ClassNotFoundExc .isInstanceOf(UnsupportedOperationException.class) .hasMessageEndingWith("does not support operations()"); assertThatThrownBy(() -> TableUtil.formatVersion(serializableTable)) - .isInstanceOf(IllegalArgumentException.class) + .isInstanceOf(UnsupportedOperationException.class) .hasMessageEndingWith("does not have a format version"); } } From 5df9f8d82c4234dfbbb34ea7c2b7b50f8dd07232 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Fri, 6 Dec 2024 08:46:31 +0100 Subject: [PATCH 3/3] updates --- .../org/apache/iceberg/SerializableTable.java | 17 ++++++++++++----- .../main/java/org/apache/iceberg/TableUtil.java | 2 -- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/SerializableTable.java b/core/src/main/java/org/apache/iceberg/SerializableTable.java index 858dca7abc79..f0e5220976df 100644 --- a/core/src/main/java/org/apache/iceberg/SerializableTable.java +++ b/core/src/main/java/org/apache/iceberg/SerializableTable.java @@ -86,11 +86,7 @@ protected SerializableTable(Table table) { this.encryption = table.encryption(); this.refs = SerializableMap.copyOf(table.refs()); this.uuid = table.uuid(); - - // formatVersion=-1 will never be used/returned, because - // SerializableMetadataTable#formatVersion() will throw an UOE when the format version is - // retrieved - this.formatVersion = table instanceof BaseMetadataTable ? -1 : TableUtil.formatVersion(table); + this.formatVersion = formatVersion(table); } /** @@ -168,6 +164,17 @@ public int formatVersion() { return formatVersion; } + private int formatVersion(Table table) { + if (table instanceof HasTableOperations) { + return ((HasTableOperations) table).operations().current().formatVersion(); + } else { + // formatVersion=-1 will never be used/returned, because + // SerializableMetadataTable#formatVersion() will throw an UOE when the format version is + // retrieved + return -1; + } + } + @Override public Schema schema() { if (lazySchema == null) { diff --git a/core/src/main/java/org/apache/iceberg/TableUtil.java b/core/src/main/java/org/apache/iceberg/TableUtil.java index 335a5b35b805..5f22581da07d 100644 --- a/core/src/main/java/org/apache/iceberg/TableUtil.java +++ b/core/src/main/java/org/apache/iceberg/TableUtil.java @@ -27,8 +27,6 @@ private TableUtil() {} public static int formatVersion(Table table) { Preconditions.checkArgument(null != table, "Invalid table: null"); - // SerializableMetadataTable is a subclass of SerializableTable but does not support - // operations() if (table instanceof SerializableTable) { SerializableTable serializableTable = (SerializableTable) table; return serializableTable.formatVersion();