Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#1398] feat(table): Add index api for tables. #1399

Merged
merged 6 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions api/src/main/java/com/datastrato/gravitino/rel/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import com.datastrato.gravitino.rel.expressions.distributions.Distributions;
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.expressions.transforms.Transform;
import com.datastrato.gravitino.rel.indexes.Index;
import com.datastrato.gravitino.rel.indexes.Indexes;
import java.util.Collections;
import java.util.Map;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -48,6 +50,14 @@ default Distribution distribution() {
return Distributions.NONE;
}

/**
* @return The indexes of the table. If no indexes are specified, Indexes.EMPTY_INDEXES is
* returned.
*/
default Index[] index() {
return Indexes.EMPTY_INDEXES;
}

/** @return The comment of the table. Null is returned if no comment is set. */
@Nullable
default String comment() {
Expand Down
40 changes: 39 additions & 1 deletion api/src/main/java/com/datastrato/gravitino/rel/TableCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import com.datastrato.gravitino.rel.expressions.distributions.Distributions;
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.expressions.transforms.Transform;
import com.datastrato.gravitino.rel.indexes.Index;
import com.datastrato.gravitino.rel.indexes.Indexes;
import java.util.Map;

/**
Expand Down Expand Up @@ -197,14 +199,50 @@ default Table createTable(
* @throws NoSuchSchemaException If the schema does not exist.
* @throws TableAlreadyExistsException If the table already exists.
*/
Table createTable(
default Table createTable(
NameIdentifier ident,
Column[] columns,
String comment,
Map<String, String> properties,
Transform[] partitions,
Distribution distribution,
SortOrder[] sortOrders)
throws NoSuchSchemaException, TableAlreadyExistsException {
return createTable(
ident,
columns,
comment,
properties,
partitions,
distribution,
sortOrders,
Indexes.EMPTY_INDEXES);
}

/**
* Create a table in the catalog.
*
* @param ident A table identifier.
* @param columns The columns of the new table.
* @param comment The table comment.
* @param properties The table properties.
* @param distribution The distribution of the table
* @param sortOrders The sort orders of the table
* @param partitions The table partitioning.
* @param indexes The table indexes.
* @return The created table metadata.
* @throws NoSuchSchemaException If the schema does not exist.
* @throws TableAlreadyExistsException If the table already exists.
*/
Table createTable(
NameIdentifier ident,
Column[] columns,
String comment,
Map<String, String> properties,
Transform[] partitions,
Distribution distribution,
SortOrder[] sortOrders,
Index[] indexes)
throws NoSuchSchemaException, TableAlreadyExistsException;

/**
Expand Down
46 changes: 46 additions & 0 deletions api/src/main/java/com/datastrato/gravitino/rel/indexes/Index.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.gravitino.rel.indexes;

/**
* The Index interface defines methods for implementing table index columns. Currently, settings for
* PRIMARY_KEY and UNIQUE_KEY are provided.
*/
public interface Index {

/** @return The type of the index. eg: PRIMARY_KEY and UNIQUE_KEY. */
IndexType type();
Clearvive marked this conversation as resolved.
Show resolved Hide resolved

/** @return The name of the index. */
String name();

/**
* @return The field name under the table contained in the index. it is the column names, could be
* "a.b.c" for nested column, but normally it could only be "a".
*/
String[][] fieldNames();
Clearvive marked this conversation as resolved.
Show resolved Hide resolved
Clearvive marked this conversation as resolved.
Show resolved Hide resolved

enum IndexType {
Clearvive marked this conversation as resolved.
Show resolved Hide resolved
/**
* PRIMARY KEY index in a relational database is a field or a combination of fields that
* uniquely identifies each record in a table. It serves as a unique identifier for each row,
* ensuring that no two rows have the same key. The PRIMARY KEY is used to establish
* relationships between tables and enforce the entity integrity of a database. Additionally, it
* helps in indexing and organizing the data for efficient retrieval and maintenance.
*/
PRIMARY_KEY,
/**
* UNIQUE KEY in a relational database is a field or a combination of fields that ensures each
* record in a table has a distinct value or combination of values. Unlike a primary key, a
* UNIQUE KEY allows for the presence of null values, but it still enforces the constraint that
* no two records can have the same unique key value(s). UNIQUE KEYs are used to maintain data
* integrity by preventing duplicate entries in specific columns, and they can be applied to
* columns that are not designated as the primary key. The uniqueness constraint imposed by
* UNIQUE KEY helps in avoiding redundancy and ensuring data accuracy in the database.
*/
UNIQUE_KEY,
}
}
104 changes: 104 additions & 0 deletions api/src/main/java/com/datastrato/gravitino/rel/indexes/Indexes.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/
package com.datastrato.gravitino.rel.indexes;

/** Helper methods to create index to pass into Gravitino. */
public class Indexes {
Clearvive marked this conversation as resolved.
Show resolved Hide resolved

public static final Index[] EMPTY_INDEXES = new Index[0];

/**
* Create a unique index on columns. Like unique (a) or unique (a, b), for complex like unique
*
* @param name The name of the index
* @param fieldNames The field names under the table contained in the index.
* @return
*/
public static Index unique(String name, String[][] fieldNames) {
return of(Index.IndexType.UNIQUE_KEY, name, fieldNames);
}

/**
* Create a primary index on columns. Like primary (a), for complex like primary
*
* @param name The name of the index
* @param fieldNames The field names under the table contained in the index.
* @return
*/
public static Index primary(String name, String[][] fieldNames) {
return of(Index.IndexType.PRIMARY_KEY, name, fieldNames);
}

/**
* @param indexType The type of the index
* @param name The name of the index
* @param fieldNames The field names under the table contained in the index.
* @return
*/
public static Index of(Index.IndexType indexType, String name, String[][] fieldNames) {
return IndexImpl.builder()
.withIndexType(indexType)
.withName(name)
.withFieldNames(fieldNames)
.build();
}
Clearvive marked this conversation as resolved.
Show resolved Hide resolved

public static final class IndexImpl implements Index {
private final IndexType indexType;
private final String name;
private final String[][] fieldNames;

public IndexImpl(IndexType indexType, String name, String[][] fieldNames) {
this.indexType = indexType;
this.name = name;
this.fieldNames = fieldNames;
}

@Override
public IndexType type() {
return indexType;
}

@Override
public String name() {
return name;
}

@Override
public String[][] fieldNames() {
return fieldNames;
}

public static Builder builder() {
return new Builder();
}

/** Builder to create a index. */
public static class Builder {
protected IndexType indexType;
protected String name;
protected String[][] fieldNames;

public Indexes.IndexImpl.Builder withIndexType(IndexType indexType) {
this.indexType = indexType;
return this;
}

public Indexes.IndexImpl.Builder withName(String name) {
this.name = name;
return this;
}

public Indexes.IndexImpl.Builder withFieldNames(String[][] fieldNames) {
this.fieldNames = fieldNames;
return this;
}

public Index build() {
return new IndexImpl(indexType, name, fieldNames);
}
}
}
}
Clearvive marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.expressions.transforms.Transform;
import com.datastrato.gravitino.rel.expressions.transforms.Transforms;
import com.datastrato.gravitino.rel.indexes.Index;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -562,8 +563,12 @@ public Table createTable(
Map<String, String> properties,
Transform[] partitioning,
Distribution distribution,
SortOrder[] sortOrders)
SortOrder[] sortOrders,
Index[] indexes)
throws NoSuchSchemaException, TableAlreadyExistsException {
Preconditions.checkArgument(
FANNG1 marked this conversation as resolved.
Show resolved Hide resolved
indexes.length == 0,
Clearvive marked this conversation as resolved.
Show resolved Hide resolved
"Hive-catalog does not support indexes, since indexing was removed since 3.0");
NameIdentifier schemaIdent = NameIdentifier.of(tableIdent.namespace().levels());

validatePartitionForCreate(columns, partitioning);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.datastrato.gravitino.rel.expressions.distributions.Distributions;
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.expressions.transforms.Transform;
import com.datastrato.gravitino.rel.indexes.Index;
import com.datastrato.gravitino.utils.MapUtils;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
Expand Down Expand Up @@ -342,6 +343,7 @@ public boolean dropTable(NameIdentifier tableIdent) {
* @param comment The comment for the new table.
* @param properties The properties for the new table.
* @param partitioning The partitioning for the new table.
* @param indexes The indexes for the new table.
* @return The newly created JdbcTable instance.
* @throws NoSuchSchemaException If the schema for the table does not exist.
* @throws TableAlreadyExistsException If the table with the same name already exists.
Expand All @@ -354,8 +356,10 @@ public Table createTable(
Map<String, String> properties,
Transform[] partitioning,
Distribution distribution,
SortOrder[] sortOrders)
SortOrder[] sortOrders,
Index[] indexes)
throws NoSuchSchemaException, TableAlreadyExistsException {
Preconditions.checkArgument(indexes.length == 0, "Jdbc-catalog does not support indexes");
Preconditions.checkArgument(
null == distribution || distribution == Distributions.NONE,
"jdbc-catalog does not support distribution");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.datastrato.gravitino.rel.expressions.distributions.Distributions;
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.expressions.transforms.Transform;
import com.datastrato.gravitino.rel.indexes.Index;
import com.datastrato.gravitino.utils.MapUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
Expand Down Expand Up @@ -468,6 +469,7 @@ public boolean dropTable(NameIdentifier tableIdent) {
* @param comment The comment for the new table.
* @param properties The properties for the new table.
* @param partitioning The partitioning for the new table.
* @param indexes The indexes for the new table.
* @return The newly created IcebergTable instance.
* @throws NoSuchSchemaException If the schema for the table does not exist.
* @throws TableAlreadyExistsException If the table with the same name already exists.
Expand All @@ -480,8 +482,10 @@ public Table createTable(
Map<String, String> properties,
Transform[] partitioning,
Distribution distribution,
SortOrder[] sortOrders)
SortOrder[] sortOrders,
Index[] indexes)
throws NoSuchSchemaException, TableAlreadyExistsException {
Preconditions.checkArgument(indexes.length == 0, "Iceberg-catalog does not support indexes");
try {
if (!Distributions.NONE.equals(distribution)) {
throw new UnsupportedOperationException("Iceberg does not support distribution");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import com.datastrato.gravitino.rel.expressions.distributions.Distribution;
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.expressions.transforms.Transform;
import com.datastrato.gravitino.rel.indexes.Index;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import java.util.Arrays;
Expand Down Expand Up @@ -135,6 +136,7 @@ public Table loadTable(NameIdentifier ident) throws NoSuchTableException {
* @param comment The comment of the table.
* @param properties The properties of the table.
* @param partitioning The partitioning of the table.
* @param indexes The indexes of the table.
* @return The created {@link Table}.
* @throws NoSuchSchemaException if the schema with specified namespace does not exist.
* @throws TableAlreadyExistsException if the table with specified identifier already exists.
Expand All @@ -147,7 +149,8 @@ public Table createTable(
Map<String, String> properties,
Transform[] partitioning,
Distribution distribution,
SortOrder[] sortOrders)
SortOrder[] sortOrders,
Index[] indexes)
throws NoSuchSchemaException, TableAlreadyExistsException {
NameIdentifier.checkTable(ident);

Expand All @@ -159,7 +162,8 @@ public Table createTable(
properties,
toDTOs(sortOrders),
toDTO(distribution),
toDTOs(partitioning));
toDTOs(partitioning),
toDTOs(indexes));
req.validate();

TableResponse resp =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import com.datastrato.gravitino.rel.expressions.distributions.Strategy;
import com.datastrato.gravitino.rel.expressions.sorts.SortDirection;
import com.datastrato.gravitino.rel.expressions.sorts.SortOrder;
import com.datastrato.gravitino.rel.indexes.Indexes;
import com.datastrato.gravitino.rel.types.Type;
import com.datastrato.gravitino.rel.types.Types;
import com.fasterxml.jackson.core.JsonProcessingException;
Expand Down Expand Up @@ -360,7 +361,8 @@ public void testCreateTable() throws JsonProcessingException {
Collections.emptyMap(),
sortOrderDTOs,
DistributionDTO.NONE,
EMPTY_PARTITIONING);
EMPTY_PARTITIONING,
Indexes.EMPTY_INDEXES);
TableResponse resp = new TableResponse(expectedTable);
buildMockResource(Method.POST, tablePath, req, resp, SC_OK);

Expand Down Expand Up @@ -478,7 +480,8 @@ public void testCreatePartitionedTable() throws JsonProcessingException {
Collections.emptyMap(),
SortOrderDTO.EMPTY_SORT,
DistributionDTO.NONE,
EMPTY_PARTITIONING);
EMPTY_PARTITIONING,
Indexes.EMPTY_INDEXES);
TableResponse resp = new TableResponse(expectedTable);
buildMockResource(Method.POST, tablePath, req, resp, SC_OK);

Expand Down Expand Up @@ -510,7 +513,8 @@ public void testCreatePartitionedTable() throws JsonProcessingException {
Collections.emptyMap(),
SortOrderDTO.EMPTY_SORT,
DistributionDTO.NONE,
partitioning);
partitioning,
Indexes.EMPTY_INDEXES);
resp = new TableResponse(expectedTable);
buildMockResource(Method.POST, tablePath, req, resp, SC_OK);

Expand Down
Loading
Loading