Skip to content

Commit

Permalink
[#4867] feat(core): Add storage support for columns in Gravitino (#5078)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR aims to add storage support for columns in Gravitino.

### Why are the changes needed?

With this, we can also support managing columns in Gravitno, like set
tags, do column level privileges.

Fix: #4867 

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

UTs added.
  • Loading branch information
jerryshao authored Oct 15, 2024
1 parent e9acd15 commit 5f73e62
Show file tree
Hide file tree
Showing 28 changed files with 2,449 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.cfg.EnumFeature;
import com.fasterxml.jackson.databind.json.JsonMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
Expand Down Expand Up @@ -291,7 +292,13 @@ private static class AnyFieldMapperHolder {
.build()
.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY)
.registerModule(new JavaTimeModule())
.registerModule(new Jdk8Module());
.registerModule(new Jdk8Module())
.registerModule(
new SimpleModule()
.addDeserializer(Type.class, new TypeDeserializer())
.addSerializer(Type.class, new TypeSerializer())
.addDeserializer(Expression.class, new ColumnDefaultValueDeserializer())
.addSerializer(Expression.class, new ColumnDefaultValueSerializer()));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.gravitino.Auditable;
import org.apache.gravitino.Entity;
import org.apache.gravitino.Field;
import org.apache.gravitino.rel.Column;
import org.apache.gravitino.rel.expressions.Expression;
import org.apache.gravitino.rel.types.Type;

Expand Down Expand Up @@ -198,6 +199,11 @@ public Builder withAuditInfo(AuditInfo auditInfo) {

public ColumnEntity build() {
columnEntity.validate();

if (columnEntity.defaultValue == null) {
columnEntity.defaultValue = Column.DEFAULT_VALUE_NOT_SET;
}

return columnEntity;
}
}
Expand Down
20 changes: 13 additions & 7 deletions core/src/main/java/org/apache/gravitino/meta/TableEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

import com.google.common.base.Objects;
import com.google.common.collect.Maps;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import lombok.ToString;
import org.apache.gravitino.Auditable;
Expand All @@ -38,7 +39,7 @@ public class TableEntity implements Entity, Auditable, HasIdentifier {
public static final Field AUDIT_INFO =
Field.required("audit_info", AuditInfo.class, "The audit details of the table");
public static final Field COLUMNS =
Field.optional("columns", ColumnEntity[].class, "The columns of the table");
Field.optional("columns", List.class, "The columns of the table");

private Long id;

Expand All @@ -48,7 +49,7 @@ public class TableEntity implements Entity, Auditable, HasIdentifier {

private Namespace namespace;

private ColumnEntity[] columns;
private List<ColumnEntity> columns;

/**
* Returns a map of the fields and their corresponding values for this table.
Expand Down Expand Up @@ -116,7 +117,7 @@ public Namespace namespace() {
return namespace;
}

public ColumnEntity[] columns() {
public List<ColumnEntity> columns() {
return columns;
}

Expand All @@ -134,12 +135,12 @@ public boolean equals(Object o) {
&& Objects.equal(name, baseTable.name)
&& Objects.equal(namespace, baseTable.namespace)
&& Objects.equal(auditInfo, baseTable.auditInfo)
&& Arrays.equals(columns, baseTable.columns);
&& Objects.equal(columns, baseTable.columns);
}

@Override
public int hashCode() {
return Objects.hashCode(id, name, auditInfo, Arrays.hashCode(columns));
return Objects.hashCode(id, name, auditInfo, columns);
}

public static class Builder {
Expand Down Expand Up @@ -170,13 +171,18 @@ public Builder withNamespace(Namespace namespace) {
return this;
}

public Builder withColumns(ColumnEntity[] columns) {
public Builder withColumns(List<ColumnEntity> columns) {
tableEntity.columns = columns;
return this;
}

public TableEntity build() {
tableEntity.validate();

if (tableEntity.columns == null) {
tableEntity.columns = Collections.emptyList();
}

return tableEntity;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.apache.gravitino.storage.relational.service.OwnerMetaService;
import org.apache.gravitino.storage.relational.service.RoleMetaService;
import org.apache.gravitino.storage.relational.service.SchemaMetaService;
import org.apache.gravitino.storage.relational.service.TableColumnMetaService;
import org.apache.gravitino.storage.relational.service.TableMetaService;
import org.apache.gravitino.storage.relational.service.TagMetaService;
import org.apache.gravitino.storage.relational.service.TopicMetaService;
Expand Down Expand Up @@ -292,6 +293,8 @@ public int hardDeleteLegacyData(Entity.EntityType entityType, long legacyTimelin
.deleteTagMetasByLegacyTimeline(
legacyTimeline, GARBAGE_COLLECTOR_SINGLE_DELETION_LIMIT);
case COLUMN:
return TableColumnMetaService.getInstance()
.deleteColumnsByLegacyTimeline(legacyTimeline, GARBAGE_COLLECTOR_SINGLE_DELETION_LIMIT);
case AUDIT:
return 0;
// TODO: Implement hard delete logic for these entity types.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public String startH2Database(Config config) {
Statement statement = connection.createStatement()) {
String sqlContent =
FileUtils.readFileToString(
new File(gravitinoHome + "/scripts/h2/schema-0.6.0-h2.sql"), StandardCharsets.UTF_8);
new File(gravitinoHome + "/scripts/h2/schema-0.7.0-h2.sql"), StandardCharsets.UTF_8);

statement.execute(sqlContent);
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.gravitino.storage.relational.mapper;

import java.util.List;
import org.apache.gravitino.storage.relational.po.ColumnPO;
import org.apache.ibatis.annotations.DeleteProvider;
import org.apache.ibatis.annotations.InsertProvider;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.SelectProvider;
import org.apache.ibatis.annotations.UpdateProvider;

public interface TableColumnMapper {

String COLUMN_TABLE_NAME = "table_column_version_info";

@SelectProvider(
type = TableColumnSQLProviderFactory.class,
method = "listColumnPOsByTableIdAndVersion")
List<ColumnPO> listColumnPOsByTableIdAndVersion(
@Param("tableId") Long tableId, @Param("tableVersion") Long tableVersion);

@InsertProvider(type = TableColumnSQLProviderFactory.class, method = "insertColumnPOs")
void insertColumnPOs(@Param("columnPOs") List<ColumnPO> columnPOs);

@UpdateProvider(type = TableColumnSQLProviderFactory.class, method = "softDeleteColumnsByTableId")
Integer softDeleteColumnsByTableId(@Param("tableId") Long tableId);

@UpdateProvider(
type = TableColumnSQLProviderFactory.class,
method = "softDeleteColumnsByMetalakeId")
Integer softDeleteColumnsByMetalakeId(@Param("metalakeId") Long metalakeId);

@UpdateProvider(
type = TableColumnSQLProviderFactory.class,
method = "softDeleteColumnsByCatalogId")
Integer softDeleteColumnsByCatalogId(@Param("catalogId") Long catalogId);

@UpdateProvider(
type = TableColumnSQLProviderFactory.class,
method = "softDeleteColumnsBySchemaId")
Integer softDeleteColumnsBySchemaId(@Param("schemaId") Long schemaId);

@DeleteProvider(
type = TableColumnSQLProviderFactory.class,
method = "deleteColumnPOsByLegacyTimeline")
Integer deleteColumnPOsByLegacyTimeline(
@Param("legacyTimeline") Long legacyTimeline, @Param("limit") int limit);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.gravitino.storage.relational.mapper;

import com.google.common.collect.ImmutableMap;
import java.util.List;
import java.util.Map;
import org.apache.gravitino.storage.relational.JDBCBackend;
import org.apache.gravitino.storage.relational.mapper.provider.base.TableColumnBaseSQLProvider;
import org.apache.gravitino.storage.relational.mapper.provider.postgresql.TableColumnPostgreSQLProvider;
import org.apache.gravitino.storage.relational.po.ColumnPO;
import org.apache.gravitino.storage.relational.session.SqlSessionFactoryHelper;
import org.apache.ibatis.annotations.Param;

public class TableColumnSQLProviderFactory {

static class TableColumnH2Provider extends TableColumnBaseSQLProvider {}

static class TableColumnMySQLProvider extends TableColumnBaseSQLProvider {}

private static final Map<JDBCBackend.JDBCBackendType, TableColumnBaseSQLProvider>
TABLE_COLUMN_SQL_PROVIDERS =
ImmutableMap.of(
JDBCBackend.JDBCBackendType.MYSQL, new TableColumnMySQLProvider(),
JDBCBackend.JDBCBackendType.H2, new TableColumnH2Provider(),
JDBCBackend.JDBCBackendType.POSTGRESQL, new TableColumnPostgreSQLProvider());

public static TableColumnBaseSQLProvider getProvider() {
String databaseId =
SqlSessionFactoryHelper.getInstance()
.getSqlSessionFactory()
.getConfiguration()
.getDatabaseId();
JDBCBackend.JDBCBackendType jdbcBackendType =
JDBCBackend.JDBCBackendType.fromString(databaseId);
return TABLE_COLUMN_SQL_PROVIDERS.get(jdbcBackendType);
}

public static String listColumnPOsByTableIdAndVersion(
@Param("tableId") Long tableId, @Param("tableVersion") Long tableVersion) {
return getProvider().listColumnPOsByTableIdAndVersion(tableId, tableVersion);
}

public static String insertColumnPOs(@Param("columnPOs") List<ColumnPO> columnPOs) {
return getProvider().insertColumnPOs(columnPOs);
}

public static String softDeleteColumnsByTableId(@Param("tableId") Long tableId) {
return getProvider().softDeleteColumnsByTableId(tableId);
}

public static String deleteColumnPOsByLegacyTimeline(
@Param("legacyTimeline") Long legacyTimeline, @Param("limit") int limit) {
return getProvider().deleteColumnPOsByLegacyTimeline(legacyTimeline, limit);
}

public static String softDeleteColumnsByMetalakeId(@Param("metalakeId") Long metalakeId) {
return getProvider().softDeleteColumnsByMetalakeId(metalakeId);
}

public static String softDeleteColumnsByCatalogId(@Param("catalogId") Long catalogId) {
return getProvider().softDeleteColumnsByCatalogId(catalogId);
}

public static String softDeleteColumnsBySchemaId(@Param("schemaId") Long schemaId) {
return getProvider().softDeleteColumnsBySchemaId(schemaId);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.gravitino.storage.relational.mapper.provider.base;

import java.util.List;
import org.apache.gravitino.storage.relational.mapper.TableColumnMapper;
import org.apache.gravitino.storage.relational.po.ColumnPO;
import org.apache.ibatis.annotations.Param;

public class TableColumnBaseSQLProvider {

public String listColumnPOsByTableIdAndVersion(
@Param("tableId") Long tableId, @Param("tableVersion") Long tableVersion) {
return "SELECT t1.column_id AS columnId, t1.column_name AS columnName,"
+ " t1.metalake_id AS metalakeId, t1.catalog_id AS catalogId,"
+ " t1.schema_id AS schemaId, t1.table_id AS tableId,"
+ " t1.table_version AS tableVersion, t1.column_type AS columnType,"
+ " t1.column_comment AS columnComment, t1.column_nullable AS nullable,"
+ " t1.column_auto_increment AS autoIncrement,"
+ " t1.column_default_value AS defaultValue, t1.column_op_type AS columnOpType,"
+ " t1.deleted_at AS deletedAt, t1.audit_info AS auditInfo"
+ " FROM "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " t1 JOIN ("
+ " SELECT column_id, MAX(table_version) AS max_table_version"
+ " FROM "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " WHERE table_id = #{tableId} AND table_version <= #{tableVersion} AND deleted_at = 0"
+ " GROUP BY column_id) t2"
+ " ON t1.column_id = t2.column_id AND t1.table_version = t2.max_table_version";
}

public String insertColumnPOs(@Param("columnPOs") List<ColumnPO> columnPOs) {
return "<script>"
+ "INSERT INTO "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ "(column_id, column_name, metalake_id, catalog_id, schema_id, table_id, table_version,"
+ " column_type, column_comment, column_nullable, column_auto_increment, "
+ " column_default_value, column_op_type, deleted_at, audit_info)"
+ " VALUES "
+ "<foreach collection='columnPOs' item='item' separator=','>"
+ "(#{item.columnId}, #{item.columnName}, #{item.metalakeId}, #{item.catalogId},"
+ " #{item.schemaId}, #{item.tableId}, #{item.tableVersion}, #{item.columnType}, "
+ " #{item.columnComment}, #{item.nullable}, #{item.autoIncrement}, #{item.defaultValue},"
+ " #{item.columnOpType}, #{item.deletedAt}, #{item.auditInfo})"
+ "</foreach>"
+ "</script>";
}

public String softDeleteColumnsByTableId(@Param("tableId") Long tableId) {
return "UPDATE "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " SET deleted_at = (UNIX_TIMESTAMP() * 1000.0)"
+ " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000"
+ " WHERE table_id = #{tableId} AND deleted_at = 0";
}

public String softDeleteColumnsByMetalakeId(@Param("metalakeId") Long metalakeId) {
return "UPDATE "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " SET deleted_at = (UNIX_TIMESTAMP() * 1000.0)"
+ " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000"
+ " WHERE metalake_id = #{metalakeId} AND deleted_at = 0";
}

public String softDeleteColumnsByCatalogId(@Param("catalogId") Long catalogId) {
return "UPDATE "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " SET deleted_at = (UNIX_TIMESTAMP() * 1000.0)"
+ " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000"
+ " WHERE catalog_id = #{catalogId} AND deleted_at = 0";
}

public String softDeleteColumnsBySchemaId(@Param("schemaId") Long schemaId) {
return "UPDATE "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " SET deleted_at = (UNIX_TIMESTAMP() * 1000.0)"
+ " + EXTRACT(MICROSECOND FROM CURRENT_TIMESTAMP(3)) / 1000"
+ " WHERE schema_id = #{schemaId} AND deleted_at = 0";
}

public String deleteColumnPOsByLegacyTimeline(
@Param("legacyTimeline") Long legacyTimeline, @Param("limit") int limit) {
return "DELETE FROM "
+ TableColumnMapper.COLUMN_TABLE_NAME
+ " WHERE deleted_at > 0 AND deleted_at < #{legacyTimeline} LIMIT #{limit}";
}
}
Loading

0 comments on commit 5f73e62

Please sign in to comment.