Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#2266] fix(partition): enable preassign partition when creating range and list partitioning table #3189

Merged
merged 5 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@

package com.datastrato.gravitino.rel.expressions.transforms;

import static com.datastrato.gravitino.rel.partitions.Partitions.EMPTY_PARTITIONS;

import com.datastrato.gravitino.annotation.Evolving;
import com.datastrato.gravitino.rel.expressions.Expression;
import com.datastrato.gravitino.rel.expressions.NamedReference;
import com.datastrato.gravitino.rel.partitions.Partition;
import java.util.Objects;

/**
Expand All @@ -40,11 +43,12 @@ public interface Transform extends Expression {
Expression[] arguments();

/**
* @return The preassigned partitions in the partitioning. Currently, only ListTransform and
* RangeTransform need to deal with assignments
* @return The preassigned partitions in the partitioning. Currently, only {@link
* Transforms.ListTransform} and {@link Transforms.RangeTransform} need to deal with
* assignments
*/
default Expression[] assignments() {
return Expression.EMPTY_EXPRESSION;
default Partition[] assignments() {
return EMPTY_PARTITIONS;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason of this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the previous API confused Partitioning (same as Transform) with Partition, which is a wrong implementation, the assignments should be partitions, so here is the Partition instead of Expression to fix the mistake.

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import com.datastrato.gravitino.rel.expressions.NamedReference;
import com.datastrato.gravitino.rel.expressions.literals.Literal;
import com.datastrato.gravitino.rel.expressions.literals.Literals;
import com.datastrato.gravitino.rel.partitions.ListPartition;
import com.datastrato.gravitino.rel.partitions.RangePartition;
import com.google.common.collect.ObjectArrays;
import java.util.Arrays;
import java.util.Objects;
Expand Down Expand Up @@ -166,8 +168,20 @@ public static BucketTransform bucket(int numBuckets, String[]... fieldNames) {
* @return The created transform
*/
public static ListTransform list(String[]... fieldNames) {
return list(fieldNames, new ListPartition[0]);
}

/**
* Create a transform that includes multiple fields in a list with preassigned list partitions.
*
* @param fieldNames The field names to include in the list
* @param assignments The preassigned list partitions
* @return The created transform
*/
public static ListTransform list(String[][] fieldNames, ListPartition[] assignments) {
return new ListTransform(
Arrays.stream(fieldNames).map(NamedReference::field).toArray(NamedReference[]::new));
Arrays.stream(fieldNames).map(NamedReference::field).toArray(NamedReference[]::new),
assignments);
}

/**
Expand All @@ -177,7 +191,18 @@ public static ListTransform list(String[]... fieldNames) {
* @return The created transform
*/
public static RangeTransform range(String[] fieldName) {
return new RangeTransform(NamedReference.field(fieldName));
return range(fieldName, new RangePartition[0]);
}

/**
* Create a transform that returns the range of the input value with preassigned range partitions.
*
* @param fieldName The field name to transform
* @param assignments The preassigned range partitions
* @return The created transform
*/
public static RangeTransform range(String[] fieldName, RangePartition[] assignments) {
return new RangeTransform(NamedReference.field(fieldName), assignments);
}

/**
Expand Down Expand Up @@ -486,9 +511,16 @@ public int hashCode() {
public static final class ListTransform implements Transform {

private final NamedReference[] fields;
private final ListPartition[] assignments;

private ListTransform(NamedReference[] fields) {
this.fields = fields;
this.assignments = new ListPartition[0];
}

private ListTransform(NamedReference[] fields, ListPartition[] assignments) {
this.fields = fields;
this.assignments = assignments;
}

/** @return The field names to include in the list. */
Expand All @@ -510,9 +542,8 @@ public Expression[] arguments() {

/** @return The assignments to the transform. */
@Override
public Expression[] assignments() {
// todo: resolve this
return Transform.super.assignments();
public ListPartition[] assignments() {
return assignments;
}

@Override
Expand All @@ -537,9 +568,16 @@ public int hashCode() {
public static final class RangeTransform implements Transform {

private final NamedReference field;
private final RangePartition[] assignments;

private RangeTransform(NamedReference field) {
this.field = field;
this.assignments = new RangePartition[0];
}

private RangeTransform(NamedReference field, RangePartition[] assignments) {
this.field = field;
this.assignments = assignments;
}

/** @return The field name to transform. */
Expand All @@ -561,9 +599,8 @@ public Expression[] arguments() {

/** @return The assignments to the transform. */
@Override
public Expression[] assignments() {
// todo: resolve this
return Transform.super.assignments();
public RangePartition[] assignments() {
return assignments;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
/** The helper class for partition expressions. */
public class Partitions {

/** An empty array of partitions. */
public static Partition[] EMPTY_PARTITIONS = new Partition[0];

/**
* Creates a range partition.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import com.datastrato.gravitino.dto.rel.partitioning.DayPartitioningDTO;
import com.datastrato.gravitino.dto.rel.partitioning.IdentityPartitioningDTO;
import com.datastrato.gravitino.dto.rel.partitioning.Partitioning;
import com.datastrato.gravitino.dto.rel.partitioning.RangePartitioningDTO;
import com.datastrato.gravitino.dto.rel.partitions.RangePartitionDTO;
import com.datastrato.gravitino.dto.requests.CatalogCreateRequest;
import com.datastrato.gravitino.dto.requests.SchemaCreateRequest;
import com.datastrato.gravitino.dto.requests.SchemaUpdateRequest;
Expand Down Expand Up @@ -556,6 +558,60 @@ public void testCreatePartitionedTable() throws JsonProcessingException {
tableId, new Column[0], "comment", emptyMap, errorPartitioning));
Assertions.assertTrue(
ex3.getMessage().contains("\"columns\" field is required and cannot be empty"));

// Test partitioning with assignments
Partitioning[] partitioningWithAssignments = {
RangePartitioningDTO.of(
new String[] {columns[0].name()},
new RangePartitionDTO[] {
RangePartitionDTO.builder()
.withName("p1")
.withLower(
LiteralDTO.builder()
.withDataType(Types.IntegerType.get())
.withValue("1")
.build())
.withUpper(
LiteralDTO.builder()
.withDataType(Types.IntegerType.get())
.withValue("10")
.build())
.build()
})
};
expectedTable =
createMockTable(
"table1",
columns,
"comment",
Collections.emptyMap(),
partitioningWithAssignments,
DistributionDTO.NONE,
SortOrderDTO.EMPTY_SORT);

req =
new TableCreateRequest(
tableId.name(),
"comment",
columns,
Collections.emptyMap(),
SortOrderDTO.EMPTY_SORT,
DistributionDTO.NONE,
partitioningWithAssignments,
IndexDTO.EMPTY_INDEXES);
resp = new TableResponse(expectedTable);
buildMockResource(Method.POST, tablePath, req, resp, SC_OK);

table =
catalog
.asTableCatalog()
.createTable(
tableId,
fromDTOs(columns),
"comment",
Collections.emptyMap(),
partitioningWithAssignments);
assertTableEquals(fromDTO(expectedTable), table);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import static com.datastrato.gravitino.dto.rel.PartitionUtils.validateFieldExistence;

import com.datastrato.gravitino.dto.rel.ColumnDTO;
import com.datastrato.gravitino.dto.rel.partitions.ListPartitionDTO;
import com.datastrato.gravitino.rel.expressions.Expression;
import com.datastrato.gravitino.rel.expressions.NamedReference;
import java.util.Arrays;
Expand All @@ -17,26 +18,44 @@
public final class ListPartitioningDTO implements Partitioning {

/**
* Creates a new ListPartitioningDTO.
* Creates a new ListPartitioningDTO with no pre-assigned partitions.
*
* @param fieldNames The names of the fields to partition.
* @return The new ListPartitioningDTO.
*/
public static ListPartitioningDTO of(String[][] fieldNames) {
return new ListPartitioningDTO(fieldNames);
return of(fieldNames, new ListPartitionDTO[0]);
}

/**
* Creates a new ListPartitioningDTO.
*
* @param fieldNames The names of the fields to partition.
* @param assignments The pre-assigned list partitions.
* @return The new ListPartitioningDTO.
*/
public static ListPartitioningDTO of(String[][] fieldNames, ListPartitionDTO[] assignments) {
return new ListPartitioningDTO(fieldNames, assignments);
}

private final String[][] fieldNames;
private final ListPartitionDTO[] assignments;

private ListPartitioningDTO(String[][] fieldNames) {
private ListPartitioningDTO(String[][] fieldNames, ListPartitionDTO[] assignments) {
this.fieldNames = fieldNames;
this.assignments = assignments;
}

/** @return The names of the fields to partition. */
public String[][] fieldNames() {
return fieldNames;
}

@Override
public ListPartitionDTO[] assignments() {
return assignments;
}

/** @return The strategy of the partitioning. */
@Override
public Strategy strategy() {
Expand Down Expand Up @@ -65,34 +84,4 @@ public String name() {
public Expression[] arguments() {
return Arrays.stream(fieldNames).map(NamedReference::field).toArray(Expression[]::new);
}

/** The builder for ListPartitioningDTO. */
public static class Builder {
private String[][] fieldNames;

/**
* Set the field names for the builder.
*
* @param fieldNames The names of the fields to partition.
* @return The builder.
*/
public Builder withFieldNames(String[][] fieldNames) {
this.fieldNames = fieldNames;
return this;
}

/**
* Builds the ListPartitioningDTO.
*
* @return The ListPartitioningDTO.
*/
public ListPartitioningDTO build() {
return new ListPartitioningDTO(fieldNames);
}
}

/** @return the builder for creating a new instance of ListPartitioningDTO. */
public static Builder builder() {
return new Builder();
}
}
Loading
Loading