diff --git a/api/src/main/java/com/datastrato/gravitino/exceptions/NoSuchPartitionException.java b/api/src/main/java/com/datastrato/gravitino/exceptions/NoSuchPartitionException.java new file mode 100644 index 00000000000..8fc2c104444 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/exceptions/NoSuchPartitionException.java @@ -0,0 +1,17 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.exceptions; + +/** Exception thrown when a partition with specified name is not existed. */ +public class NoSuchPartitionException extends NotFoundException { + + public NoSuchPartitionException(String message) { + super(message); + } + + public NoSuchPartitionException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/api/src/main/java/com/datastrato/gravitino/exceptions/PartitionAlreadyExistsException.java b/api/src/main/java/com/datastrato/gravitino/exceptions/PartitionAlreadyExistsException.java new file mode 100644 index 00000000000..1268fde74d5 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/exceptions/PartitionAlreadyExistsException.java @@ -0,0 +1,17 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.exceptions; + +/** Exception thrown when a partition with specified name already exists. */ +public class PartitionAlreadyExistsException extends AlreadyExistsException { + + public PartitionAlreadyExistsException(String message) { + super(message); + } + + public PartitionAlreadyExistsException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java b/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java new file mode 100644 index 00000000000..1a632d9d6b5 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java @@ -0,0 +1,97 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel; + +import com.datastrato.gravitino.exceptions.NoSuchPartitionException; +import com.datastrato.gravitino.exceptions.PartitionAlreadyExistsException; +import com.datastrato.gravitino.rel.partitions.Partition; + +public interface SupportsPartitions { + + /** + * List all partition names of the table + * + * @return The list of partition names + */ + String[] listPartitionNames(); + + /** + * List all partitions + * + * @return The list of partitions + */ + Partition[] listPartitions(); + + /** + * Get a partition by partition name, you may get one of the following types of partitions: + * + * + * + * It depends on the {@link Table#partitioning()}. A Java type conversion is required before + * getting the specific partition, for example: + * + *
+   *   RangePartition rangePartition = (RangePartition) table.supportPartitions().getPartition("p20200321");
+   *   Literal<?> upper = rangePartition.upper();
+   *   Literal<?> lower = rangePartition.lower();
+   *   ...
+   * 
+ * + * @param partitionName the name of the partition + * @return the partition + * @throws NoSuchPartitionException if the partition does not exist + */ + Partition getPartition(String partitionName) throws NoSuchPartitionException; + + /** + * Check if a partition exists. + * + * @param partitionName The name of the partition. + * @return True if the partition exists, false otherwise. + */ + default boolean partitionExists(String partitionName) { + try { + getPartition(partitionName); + return true; + } catch (NoSuchPartitionException e) { + return false; + } + } + + /** + * Add a partition with specified name and properties to the table. + * + * @param partition The partition to add. + * @return The created partition. + * @throws PartitionAlreadyExistsException If the partition already exists. + */ + Partition addPartition(Partition partition) throws PartitionAlreadyExistsException; + + /** + * Drop a partition with specified name. + * + * @param partitionName The identifier of the partition. + * @return true if a partition was deleted, false if the partition did not exist. + */ + boolean dropPartition(String partitionName); + + /** + * If the table supports purging, drop a partition with specified name and completely remove + * partition data by skipping a trash. + * + * @param partitionName The name of the partition. + * @return true if a partition was deleted, false if the partition did not exist. + * @throws NoSuchPartitionException If the partition does not exist. + * @throws UnsupportedOperationException If partition purging is not supported. + */ + default boolean purgePartition(String partitionName) + throws NoSuchPartitionException, UnsupportedOperationException { + throw new UnsupportedOperationException("Partition purging is not supported"); + } +} diff --git a/api/src/main/java/com/datastrato/gravitino/rel/Table.java b/api/src/main/java/com/datastrato/gravitino/rel/Table.java index 0f3f7c22a44..b165aac18ad 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/Table.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/Table.java @@ -58,4 +58,15 @@ default String comment() { default Map properties() { return Collections.emptyMap(); } + + /** + * Table method for working with partitions. If the table does not support partition operations, + * an {@link UnsupportedOperationException} is thrown. + * + * @return The partition support table. + * @throws UnsupportedOperationException If the table does not support partition operations. + */ + default SupportsPartitions supportPartitions() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Table does not support partition operations."); + } } diff --git a/api/src/main/java/com/datastrato/gravitino/rel/partitions/IdentityPartition.java b/api/src/main/java/com/datastrato/gravitino/rel/partitions/IdentityPartition.java new file mode 100644 index 00000000000..c5e51ae4672 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/rel/partitions/IdentityPartition.java @@ -0,0 +1,28 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel.partitions; + +import com.datastrato.gravitino.rel.expressions.literals.Literal; + +/** + * An identity partition represents a result of identity partitioning. For example, for Hive + * partition + * + *
`PARTITION (dt='2008-08-08',country='us')`
+ * + * its partition name is "dt=2008-08-08/country=us", field names are [["dt"], ["country"]] and + * values are ["2008-08-08", "us"]. + */ +public interface IdentityPartition extends Partition { + + /** @return The field names of the identity partition. */ + String[][] fieldNames(); + + /** + * @return The values of the identity partition. The values are in the same order as the field + * names. + */ + Literal[] values(); +} diff --git a/api/src/main/java/com/datastrato/gravitino/rel/partitions/ListPartition.java b/api/src/main/java/com/datastrato/gravitino/rel/partitions/ListPartition.java new file mode 100644 index 00000000000..98c845d990b --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/rel/partitions/ListPartition.java @@ -0,0 +1,26 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel.partitions; + +import com.datastrato.gravitino.rel.expressions.literals.Literal; + +/** + * A list partition represents a result of list partitioning. For example, for list partition + * + *
+ *     `PARTITION p202204_California VALUES IN (
+ *       ("2022-04-01", "Los Angeles"),
+ *       ("2022-04-01", "San Francisco")
+ *     )`
+ *     
+ * + * its name is "p202204_California" and lists are [["2022-04-01","Los Angeles"], ["2022-04-01", "San + * Francisco"]]. + */ +public interface ListPartition extends Partition { + + /** @return The values of the list partition. */ + Literal[][] lists(); +} diff --git a/api/src/main/java/com/datastrato/gravitino/rel/partitions/Partition.java b/api/src/main/java/com/datastrato/gravitino/rel/partitions/Partition.java new file mode 100644 index 00000000000..2066b35b510 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/rel/partitions/Partition.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel.partitions; + +import com.datastrato.gravitino.rel.Table; +import java.util.Map; + +/** + * A partition represents a result of partitioning a table. The partition can be either a {@link + * IdentityPartition}, {@link ListPartition} or {@link RangePartition}. It depends on the {@link + * Table#partitioning()}. + */ +public interface Partition { + + /** @return The name of the partition. */ + String name(); + + /** @return The properties of the partition, such as statistics, location, etc. */ + Map properties(); +} diff --git a/api/src/main/java/com/datastrato/gravitino/rel/partitions/Partitions.java b/api/src/main/java/com/datastrato/gravitino/rel/partitions/Partitions.java new file mode 100644 index 00000000000..ecbfe6e43e3 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/rel/partitions/Partitions.java @@ -0,0 +1,226 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel.partitions; + +import com.datastrato.gravitino.rel.expressions.literals.Literal; +import com.datastrato.gravitino.rel.expressions.transforms.Transforms; +import java.util.Arrays; +import java.util.Map; +import java.util.Objects; + +/** The helper class for partition expressions. */ +public class Partitions { + + /** + * Creates a range partition. + * + * @param name The name of the partition. + * @param upper The upper bound of the partition. + * @param lower The lower bound of the partition. + * @param properties The properties of the partition. + * @return The created partition. + */ + public static Partition range( + String name, Literal upper, Literal lower, Map properties) { + return new RangePartitionImpl(name, upper, lower, properties); + } + + /** + * Creates a list partition. + * + *

Each list in the lists must have the same length. The values in each list must correspond to + * the field definitions in the {@link Transforms.ListTransform#fieldNames()}. + * + * @param name The name of the partition. + * @param lists The values of the list partition. + * @param properties The properties of the partition. + * @return The created partition. + */ + public static Partition list(String name, Literal[][] lists, Map properties) { + return new ListPartitionImpl(name, lists, properties); + } + + /** + * Creates an identity partition. + * + *

The {@code values} must correspond to the {@code fieldNames}. + * + * @param name The name of the partition. + * @param fieldNames The field names of the identity partition. + * @param values The value of the identity partition. + * @param properties The properties of the partition. + * @return The created partition. + */ + public static Partition identity( + String name, String[][] fieldNames, Literal[] values, Map properties) { + return new IdentityPartitionImpl(name, fieldNames, values, properties); + } + + /** Represents a result of range partitioning. */ + private static class RangePartitionImpl implements RangePartition { + private final String name; + private final Literal upper; + private final Literal lower; + + private final Map properties; + + private RangePartitionImpl( + String name, Literal upper, Literal lower, Map properties) { + this.name = name; + this.properties = properties; + this.upper = upper; + this.lower = lower; + } + + /** @return The upper bound of the partition. */ + public Literal upper() { + return upper; + } + + /** @return The lower bound of the partition. */ + public Literal lower() { + return lower; + } + + @Override + public String name() { + return name; + } + + @Override + public Map properties() { + return properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RangePartitionImpl that = (RangePartitionImpl) o; + return Objects.equals(name, that.name) + && Objects.equals(upper, that.upper) + && Objects.equals(lower, that.lower) + && Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + return Objects.hash(name, upper, lower, properties); + } + } + + /** Represents a result of list partitioning. */ + private static class ListPartitionImpl implements ListPartition { + private final String name; + private final Literal[][] lists; + + private final Map properties; + + private ListPartitionImpl(String name, Literal[][] lists, Map properties) { + this.name = name; + this.properties = properties; + this.lists = lists; + } + + /** @return The values of the list partition. */ + public Literal[][] lists() { + return lists; + } + + @Override + public String name() { + return name; + } + + @Override + public Map properties() { + return properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ListPartitionImpl that = (ListPartitionImpl) o; + return Objects.equals(name, that.name) + && Arrays.deepEquals(lists, that.lists) + && Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + int result = Objects.hash(name, properties); + result = 31 * result + Arrays.deepHashCode(lists); + return result; + } + } + + /** Represents a result of identity partitioning. */ + private static class IdentityPartitionImpl implements IdentityPartition { + private final String name; + private final String[][] fieldNames; + private final Literal[] values; + private final Map properties; + + private IdentityPartitionImpl( + String name, String[][] fieldNames, Literal[] values, Map properties) { + this.name = name; + this.fieldNames = fieldNames; + this.values = values; + this.properties = properties; + } + + /** @return The field names of the identity partition. */ + public String[][] fieldNames() { + return fieldNames; + } + + /** @return The values of the identity partition. */ + public Literal[] values() { + return values; + } + + @Override + public String name() { + return name; + } + + @Override + public Map properties() { + return properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + IdentityPartitionImpl that = (IdentityPartitionImpl) o; + return Objects.equals(name, that.name) + && Arrays.deepEquals(fieldNames, that.fieldNames) + && Arrays.equals(values, that.values) + && Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + int result = Objects.hash(name, properties); + result = 31 * result + Arrays.deepHashCode(fieldNames); + result = 31 * result + Arrays.hashCode(values); + return result; + } + } +} diff --git a/api/src/main/java/com/datastrato/gravitino/rel/partitions/RangePartition.java b/api/src/main/java/com/datastrato/gravitino/rel/partitions/RangePartition.java new file mode 100644 index 00000000000..bc909ed30c2 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/rel/partitions/RangePartition.java @@ -0,0 +1,23 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel.partitions; + +import com.datastrato.gravitino.rel.expressions.literals.Literal; + +/** + * A range partition represents a result of range partitioning. For example, for range partition + * + *

`PARTITION p20200321 VALUES LESS THAN ("2020-03-22")`
+ * + * its upper bound is "2020-03-22" and its lower bound is null. + */ +public interface RangePartition extends Partition { + + /** @return The upper bound of the partition. */ + Literal upper(); + + /** @return The lower bound of the partition. */ + Literal lower(); +} diff --git a/api/src/test/java/com/datastrato/gravitino/rel/TestPartitions.java b/api/src/test/java/com/datastrato/gravitino/rel/TestPartitions.java new file mode 100644 index 00000000000..b8eba965fc4 --- /dev/null +++ b/api/src/test/java/com/datastrato/gravitino/rel/TestPartitions.java @@ -0,0 +1,80 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.rel; + +import com.datastrato.gravitino.rel.expressions.literals.Literal; +import com.datastrato.gravitino.rel.expressions.literals.Literals; +import com.datastrato.gravitino.rel.partitions.IdentityPartition; +import com.datastrato.gravitino.rel.partitions.ListPartition; +import com.datastrato.gravitino.rel.partitions.Partition; +import com.datastrato.gravitino.rel.partitions.Partitions; +import com.datastrato.gravitino.rel.partitions.RangePartition; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import java.time.LocalDate; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestPartitions { + + @Test + public void testPartitions() { + Partition partition = + Partitions.range("p0", Literals.NULL, Literals.integerLiteral(6), Maps.newHashMap()); + Assertions.assertEquals("p0", partition.name()); + Assertions.assertEquals(Maps.newHashMap(), partition.properties()); + Assertions.assertEquals(Literals.NULL, ((RangePartition) partition).upper()); + Assertions.assertEquals(Literals.integerLiteral(6), ((RangePartition) partition).lower()); + + partition = + Partitions.list( + "p202204_California", + new Literal[][] { + { + Literals.dateLiteral(LocalDate.parse("2022-04-01")), + Literals.stringLiteral("Los Angeles") + }, + { + Literals.dateLiteral(LocalDate.parse("2022-04-01")), + Literals.stringLiteral("San Francisco") + } + }, + Maps.newHashMap()); + Assertions.assertEquals("p202204_California", partition.name()); + Assertions.assertEquals(Maps.newHashMap(), partition.properties()); + Assertions.assertEquals( + Literals.dateLiteral(LocalDate.parse("2022-04-01")), + ((ListPartition) partition).lists()[0][0]); + Assertions.assertEquals( + Literals.stringLiteral("Los Angeles"), ((ListPartition) partition).lists()[0][1]); + Assertions.assertEquals( + Literals.dateLiteral(LocalDate.parse("2022-04-01")), + ((ListPartition) partition).lists()[1][0]); + Assertions.assertEquals( + Literals.stringLiteral("San Francisco"), ((ListPartition) partition).lists()[1][1]); + + partition = + Partitions.identity( + "dt=2008-08-08/country=us", + new String[][] {{"dt"}, {"country"}}, + new Literal[] { + Literals.dateLiteral(LocalDate.parse("2008-08-08")), Literals.stringLiteral("us") + }, + ImmutableMap.of("location", "/user/hive/warehouse/tpch_flat_orc_2.db/orders")); + Assertions.assertEquals("dt=2008-08-08/country=us", partition.name()); + Assertions.assertEquals( + ImmutableMap.of("location", "/user/hive/warehouse/tpch_flat_orc_2.db/orders"), + partition.properties()); + Assertions.assertArrayEquals( + new String[] {"dt"}, ((IdentityPartition) partition).fieldNames()[0]); + Assertions.assertArrayEquals( + new String[] {"country"}, ((IdentityPartition) partition).fieldNames()[1]); + Assertions.assertEquals( + Literals.dateLiteral(LocalDate.parse("2008-08-08")), + ((IdentityPartition) partition).values()[0]); + Assertions.assertEquals( + Literals.stringLiteral("us"), ((IdentityPartition) partition).values()[1]); + } +}