diff --git a/inlong-distribution/pom.xml b/inlong-distribution/pom.xml index 8a85518a0a5..d86259c5fa8 100644 --- a/inlong-distribution/pom.xml +++ b/inlong-distribution/pom.xml @@ -35,42 +35,6 @@ - - org.apache.maven.plugins - maven-assembly-plugin - 3.1.0 - - apache-${project.parent.artifactId}-${project.version} - - - - release - - single - - package - - - src/main/assemblies/release.xml - - - - - sort-plugin - - single - - package - - - src/main/assemblies/sort-connectors-v1.13.xml - src/main/assemblies/sort-connectors-v1.15.xml - src/main/assemblies/sort-connectors-v1.18.xml - - - - - org.codehaus.mojo exec-maven-plugin @@ -94,4 +58,176 @@ + + + + flink-all-version + + true + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.1.0 + + apache-${project.parent.artifactId}-${project.version} + + + + release + + single + + package + + + src/main/assemblies/release.xml + + + + + sort-plugin + + single + + package + + + src/main/assemblies/sort-connectors-v1.13.xml + src/main/assemblies/sort-connectors-v1.15.xml + src/main/assemblies/sort-connectors-v1.18.xml + + + + + + + + + + v1.13 + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.1.0 + + apache-${project.parent.artifactId}-${project.version} + + + + release + + single + + package + + + src/main/assemblies/release.xml + + + + + sort-plugin + + single + + package + + + src/main/assemblies/sort-connectors-v1.13.xml + + + + + + + + + + v1.15 + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.1.0 + + apache-${project.parent.artifactId}-${project.version} + + + + release + + single + + package + + + src/main/assemblies/release.xml + + + + + sort-plugin + + single + + package + + + src/main/assemblies/sort-connectors-v1.15.xml + + + + + + + + + + v1.18 + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.1.0 + + apache-${project.parent.artifactId}-${project.version} + + + + release + + single + + package + + + src/main/assemblies/release.xml + + + + + sort-plugin + + single + + package + + + src/main/assemblies/sort-connectors-v1.18.xml + + + + + + + + + diff --git a/inlong-sort/pom.xml b/inlong-sort/pom.xml index 3aee7983b98..ced807a53ba 100644 --- a/inlong-sort/pom.xml +++ b/inlong-sort/pom.xml @@ -204,6 +204,7 @@ 1.18.1 1.18 2.12 + flink-test-utils diff --git a/inlong-sort/sort-core/pom.xml b/inlong-sort/sort-core/pom.xml index a1be0b20585..fa7396ec70c 100644 --- a/inlong-sort/sort-core/pom.xml +++ b/inlong-sort/sort-core/pom.xml @@ -49,23 +49,6 @@ sort-format-base ${project.version} - - - org.apache.flink - flink-core - provided - - - org.apache.flink - flink-table-common - provided - - - org.apache.inlong - sort-flink-dependencies-${sort.flink.version} - ${project.version} - provided - @@ -100,12 +83,6 @@ ${project.version} test - - org.apache.inlong - sort-format-json-${sort.flink.version} - ${project.version} - test - org.apache.inlong sort-format-csv @@ -126,6 +103,29 @@ true + + + org.apache.flink + flink-core + provided + + + org.apache.flink + flink-table-common + provided + + + org.apache.inlong + sort-flink-dependencies-${sort.flink.version} + ${project.version} + provided + + + org.apache.inlong + sort-format-json-${sort.flink.version} + ${project.version} + test + org.apache.inlong sort-connector-tubemq @@ -245,6 +245,29 @@ v1.15 + + + org.apache.flink + flink-core + provided + + + org.apache.flink + flink-table-common + provided + + + org.apache.inlong + sort-flink-dependencies-${sort.flink.version} + ${project.version} + provided + + + org.apache.inlong + sort-format-json-${sort.flink.version} + ${project.version} + test + org.apache.inlong sort-connector-tubemq-v1.15 @@ -327,6 +350,49 @@ + + v1.18 + + + org.apache.flink + flink-core + ${flink.version} + provided + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + org.apache.inlong + sort-flink-dependencies-${sort.flink.version} + ${project.version} + provided + + + org.apache.inlong + sort-connector-pulsar-v1.18 + ${project.version} + test + + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${plugin.surefire.version} + + + org.apache.inlong.sort.function.* + + + + + + diff --git a/inlong-sort/sort-dist/pom.xml b/inlong-sort/sort-dist/pom.xml index a99d45af102..9bae28294c1 100644 --- a/inlong-sort/sort-dist/pom.xml +++ b/inlong-sort/sort-dist/pom.xml @@ -204,5 +204,45 @@ + + v1.18 + + + org.apache.inlong + sort-format-json-v1.18 + ${project.version} + + + org.apache.flink + flink-sql-parquet + ${flink.version} + + + org.apache.flink + flink-sql-orc + ${flink.version} + + + org.apache.flink + flink-csv + ${flink.version} + + + org.apache.flink + flink-json + ${flink.version} + + + org.apache.flink + flink-sql-avro + ${flink.version} + + + org.apache.inlong + audit-sdk + ${project.version} + + + diff --git a/inlong-sort/sort-flink/base/pom.xml b/inlong-sort/sort-flink/base/pom.xml index 9bd066065cd..5f3eef5bf85 100644 --- a/inlong-sort/sort-flink/base/pom.xml +++ b/inlong-sort/sort-flink/base/pom.xml @@ -39,19 +39,6 @@ ${project.version} - - org.apache.inlong - sort-format-json-${sort.flink.version} - ${project.version} - provided - - - - org.apache.flink - flink-connector-base - ${flink.version} - - org.apache.inlong sort-format-base @@ -69,15 +56,89 @@ ${project.version} provided - - org.apache.flink - flink-core - - - org.apache.flink - flink-table-common - - + + + v1.13 + + true + + + + org.apache.flink + flink-connector-base + ${flink.version} + + + org.apache.inlong + sort-format-json-${sort.flink.version} + ${project.version} + provided + + + org.apache.flink + flink-core + ${flink.version} + + + org.apache.flink + flink-table-common + ${flink.version} + + + + + v1.15 + + + org.apache.flink + flink-connector-base + ${flink.version} + + + org.apache.inlong + sort-format-json-${sort.flink.version} + ${project.version} + provided + + + org.apache.flink + flink-core + ${flink.version} + + + org.apache.flink + flink-table-common + ${flink.version} + + + + + v1.18 + + + org.apache.flink + flink-connector-base + ${flink.version} + + + org.apache.inlong + sort-format-json-${sort.flink.version} + ${project.version} + provided + + + org.apache.flink + flink-core + ${flink.version} + + + org.apache.flink + flink-table-common + ${flink.version} + + + + diff --git a/inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/jdbc/src/main/resources/META-INF.services/org.apache.flink.table.factories.Factory b/inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/jdbc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory similarity index 100% rename from inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/jdbc/src/main/resources/META-INF.services/org.apache.flink.table.factories.Factory rename to inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/jdbc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory diff --git a/inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/pulsar/src/main/resources/META-INF.services/org.apache.flink.table.factories.Factory b/inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/pulsar/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory similarity index 100% rename from inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/pulsar/src/main/resources/META-INF.services/org.apache.flink.table.factories.Factory rename to inlong-sort/sort-flink/sort-flink-v1.18/sort-connectors/pulsar/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory diff --git a/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/base/TableFormatUtils.java b/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/base/TableFormatUtils.java index 7d41821f351..5cac2633926 100644 --- a/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/base/TableFormatUtils.java +++ b/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/base/TableFormatUtils.java @@ -59,17 +59,13 @@ import org.apache.inlong.common.pojo.sort.dataflow.field.format.VarBinaryFormatInfo; import org.apache.inlong.common.pojo.sort.dataflow.field.format.VarCharFormatInfo; -import org.apache.flink.api.common.serialization.DeserializationSchema; -import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.common.typeinfo.Types; import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.TableColumn; import org.apache.flink.table.api.TableSchema; import org.apache.flink.table.api.ValidationException; import org.apache.flink.table.descriptors.DescriptorProperties; -import org.apache.flink.table.factories.DeserializationSchemaFactory; -import org.apache.flink.table.factories.SerializationSchemaFactory; -import org.apache.flink.table.factories.TableFactoryService; import org.apache.flink.table.types.DataType; import org.apache.flink.table.types.logical.ArrayType; import org.apache.flink.table.types.logical.BigIntType; @@ -102,7 +98,6 @@ import java.util.Map; import java.util.stream.IntStream; -import static org.apache.flink.table.factories.TableFormatFactoryBase.deriveSchema; import static org.apache.flink.util.Preconditions.checkState; import static org.apache.inlong.sort.formats.base.TableFormatConstants.FORMAT_DERIVE_SCHEMA; import static org.apache.inlong.sort.formats.base.TableFormatConstants.FORMAT_PROPERTY_VERSION; @@ -114,50 +109,12 @@ public class TableFormatUtils { private static final Logger LOG = LoggerFactory.getLogger(TableFormatUtils.class); - - /** - * Returns the {@link DeserializationSchema} described by the given - * properties. - * - * @param properties The properties describing the deserializer. - * @param classLoader The class loader for the deserializer. - * @param The type of the data. - * @return The {@link DeserializationSchema} described by the properties. - */ - public static DeserializationSchema getDeserializationSchema( - final Map properties, - final ClassLoader classLoader) { - @SuppressWarnings("unchecked") - final DeserializationSchemaFactory deserializationSchemaFactory = - TableFactoryService.find( - DeserializationSchemaFactory.class, - properties, - classLoader); - - return deserializationSchemaFactory.createDeserializationSchema(properties); - } - - /** - * Returns the {@link SerializationSchema} described by the given - * properties. - * - * @param properties The properties describing the serializer. - * @param classLoader The class loader for the serializer. - * @param The type of the data. - * @return The {@link SerializationSchema} described by the properties. - */ - public static SerializationSchema getSerializationSchema( - final Map properties, - final ClassLoader classLoader) { - @SuppressWarnings("unchecked") - final SerializationSchemaFactory serializationSchemaFactory = - TableFactoryService.find( - SerializationSchemaFactory.class, - properties, - classLoader); - - return serializationSchemaFactory.createSerializationSchema(properties); - } + private static final String SCHEMA = "schema"; + private static final String SCHEMA_PROCTIME = "proctime"; + private static final String SCHEMA_FROM = "from"; + private static final String ROWTIME_TIMESTAMPS_TYPE = "rowtime.timestamps.type"; + private static final String ROWTIME_TIMESTAMPS_TYPE_VALUE_FROM_FIELD = "from-field"; + private static final String ROWTIME_TIMESTAMPS_FROM = "rowtime.timestamps.from"; /** * Derive the format information for the given type. @@ -540,6 +497,49 @@ public static void validateSchema(DescriptorProperties descriptorProperties) { } } + public static TableSchema deriveSchema(Map properties) { + final DescriptorProperties descriptorProperties = new DescriptorProperties(); + descriptorProperties.putProperties(properties); + + final TableSchema.Builder builder = TableSchema.builder(); + + final TableSchema tableSchema = descriptorProperties.getTableSchema(SCHEMA); + for (int i = 0; i < tableSchema.getFieldCount(); i++) { + final TableColumn tableColumn = tableSchema.getTableColumns().get(i); + final String fieldName = tableColumn.getName(); + final DataType dataType = tableColumn.getType(); + if (!tableColumn.isPhysical()) { + // skip non-physical columns + continue; + } + final boolean isProctime = + descriptorProperties + .getOptionalBoolean(SCHEMA + '.' + i + '.' + SCHEMA_PROCTIME) + .orElse(false); + final String timestampKey = SCHEMA + '.' + i + '.' + ROWTIME_TIMESTAMPS_TYPE; + final boolean isRowtime = descriptorProperties.containsKey(timestampKey); + if (!isProctime && !isRowtime) { + // check for aliasing + final String aliasName = + descriptorProperties + .getOptionalString(SCHEMA + '.' + i + '.' + SCHEMA_FROM) + .orElse(fieldName); + builder.field(aliasName, dataType); + } + // only use the rowtime attribute if it references a field + else if (isRowtime + && descriptorProperties.isValue( + timestampKey, ROWTIME_TIMESTAMPS_TYPE_VALUE_FROM_FIELD)) { + final String aliasName = + descriptorProperties.getString( + SCHEMA + '.' + i + '.' + ROWTIME_TIMESTAMPS_FROM); + builder.field(aliasName, dataType); + } + } + + return builder.build(); + } + /** * Deserializes the basic field. */ diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/pom.xml b/inlong-sort/sort-formats/format-row/format-json-v1.18/pom.xml new file mode 100644 index 00000000000..4e19988579b --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/pom.xml @@ -0,0 +1,101 @@ + + + + 4.0.0 + + org.apache.inlong + format-row + 1.13.0-SNAPSHOT + + + sort-format-json-v1.18 + Apache InLong - Sort Format-Json-V1.18 + + + ${project.parent.parent.parent.parent.basedir} + 1.18.1 + 31.1-jre-17.0 + 2.14.2-17.0 + + + + + org.apache.flink + flink-json + ${flink.version} + + + + org.apache.inlong + sort-format-common + ${project.version} + provided + + + + org.apache.flink + flink-shaded-jackson + ${flink.jackson.version} + provided + + + + org.apache.flink + flink-core + ${flink.version} + provided + + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + + org.apache.inlong + sort-flink-dependencies-${sort.flink.version} + ${project.version} + provided + + + org.projectlombok + lombok + + + io.debezium + debezium-core + provided + + + org.apache.inlong + sort-common + ${project.version} + + + org.apache.flink + flink-shaded-guava + ${flink.shaded.guava.version} + provided + + + diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/MysqlBinLogData.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/MysqlBinLogData.java new file mode 100644 index 00000000000..a61f86dbf4a --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/MysqlBinLogData.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json; + +import lombok.AllArgsConstructor; +import lombok.Data; +import org.apache.flink.types.Row; + +import java.io.Serializable; +import java.util.Map; + +@Data +@AllArgsConstructor +public class MysqlBinLogData implements Serializable { + + private static final long serialVersionUID = 7819918248769501308L; + + public static final String MYSQL_METADATA_DATABASE = "mysql_metadata_database"; + + public static final String MYSQL_METADATA_TABLE = "mysql_metadata_table"; + + public static final String MYSQL_METADATA_EVENT_TIME = "mysql_metadata_event_time"; + + public static final String MYSQL_METADATA_IS_DDL = "mysql_metadata_is_ddl"; + + public static final String MYSQL_METADATA_EVENT_TYPE = "mysql_metadata_event_type"; + + public static final String MYSQL_METADATA_DATA = "mysql_metadata_data"; + + private Row physicalData; + + private Map metadataMap; +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJson.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJson.java new file mode 100644 index 00000000000..c0e74ce7a05 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJson.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.protocol.ddl.operations.Operation; + +import lombok.Builder; +import lombok.Data; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude.Include; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonTypeName; + +import javax.annotation.Nullable; + +import java.util.List; +import java.util.Map; + +@Builder +@JsonTypeName("canalJson") +@JsonInclude(Include.NON_NULL) +@Data +public class CanalJson { + + @JsonProperty("data") + private List> data; + @JsonProperty("es") + private long es; + @JsonProperty("table") + private String table; + @JsonProperty("type") + private String type; + @JsonProperty("database") + private String database; + @JsonProperty("ts") + private long ts; + @JsonProperty("sql") + private String sql; + @JsonProperty("mysqlType") + private Map mysqlType; + @JsonProperty("sqlType") + private Map sqlType; + @JsonProperty("isDdl") + private boolean isDdl; + @JsonProperty("pkNames") + private List pkNames; + @JsonProperty("schema") + private String schema; + @JsonProperty("oracleType") + private Map oracleType; + @JsonProperty("operation") + private Operation operation; + @JsonProperty("incremental") + private Boolean incremental; + @JsonProperty("dataSourceName") + private String dataSourceName; + + @JsonCreator + public CanalJson(@Nullable @JsonProperty("data") List> data, + @JsonProperty("es") long es, + @JsonProperty("table") String table, + @JsonProperty("type") String type, + @JsonProperty("database") String database, + @JsonProperty("ts") long ts, + @JsonProperty("sql") String sql, + @Nullable @JsonProperty("mysqlType") Map mysqlType, + @Nullable @JsonProperty("sqlType") Map sqlType, + @JsonProperty("isDdl") boolean isDdl, + @Nullable @JsonProperty("pkNames") List pkNames, + @JsonProperty("schema") String schema, + @Nullable @JsonProperty("oracleType") Map oracleType, + @JsonProperty("operation") Operation operation, + @JsonProperty("incremental") Boolean incremental, + @JsonProperty("dataSourceName") String dataSourceName) { + this.data = data; + this.es = es; + this.table = table; + this.type = type; + this.database = database; + this.ts = ts; + this.sql = sql; + this.mysqlType = mysqlType; + this.sqlType = sqlType; + this.isDdl = isDdl; + this.pkNames = pkNames; + this.schema = schema; + this.oracleType = oracleType; + this.operation = operation; + this.incremental = incremental; + this.dataSourceName = dataSourceName; + } + +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonDecodingFormat.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonDecodingFormat.java new file mode 100644 index 00000000000..f8e4ffe50e9 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonDecodingFormat.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonDeserializationSchema.MetadataConverter; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** {@link DecodingFormat} for Canal using JSON encoding. */ +public class CanalJsonDecodingFormat implements DecodingFormat> { + + // -------------------------------------------------------------------------------------------- + // Mutable attributes + // -------------------------------------------------------------------------------------------- + + private List metadataKeys; + + // -------------------------------------------------------------------------------------------- + // Canal-specific attributes + // -------------------------------------------------------------------------------------------- + + private final @Nullable String database; + + private final @Nullable String table; + + private final boolean ignoreParseErrors; + + private final TimestampFormat timestampFormat; + + public CanalJsonDecodingFormat( + String database, + String table, + boolean ignoreParseErrors, + TimestampFormat timestampFormat) { + this.database = database; + this.table = table; + this.ignoreParseErrors = ignoreParseErrors; + this.timestampFormat = timestampFormat; + this.metadataKeys = Collections.emptyList(); + } + + @Override + public DeserializationSchema createRuntimeDecoder( + DynamicTableSource.Context context, DataType physicalDataType) { + final List readableMetadata = + metadataKeys.stream() + .map( + k -> Stream.of(ReadableMetadata.values()) + .filter(rm -> rm.key.equals(k)) + .findFirst() + .orElseThrow(IllegalStateException::new)) + .collect(Collectors.toList()); + final List metadataFields = + readableMetadata.stream() + .map(m -> DataTypes.FIELD(m.key, m.dataType)) + .collect(Collectors.toList()); + final DataType producedDataType = + DataTypeUtils.appendRowFields(physicalDataType, metadataFields); + final TypeInformation producedTypeInfo = + context.createTypeInformation(producedDataType); + return CanalJsonDeserializationSchema.builder( + physicalDataType, readableMetadata, producedTypeInfo) + .setDatabase(database) + .setTable(table) + .setIgnoreParseErrors(ignoreParseErrors) + .setTimestampFormat(timestampFormat) + .build(); + } + + @Override + public Map listReadableMetadata() { + final Map metadataMap = new LinkedHashMap<>(); + Stream.of(ReadableMetadata.values()) + .forEachOrdered(m -> metadataMap.put(m.key, m.dataType)); + return metadataMap; + } + + @Override + public void applyReadableMetadata(List metadataKeys) { + this.metadataKeys = metadataKeys; + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.newBuilder() + .addContainedKind(RowKind.INSERT) + .addContainedKind(RowKind.UPDATE_BEFORE) + .addContainedKind(RowKind.UPDATE_AFTER) + .addContainedKind(RowKind.DELETE) + .build(); + } + + // -------------------------------------------------------------------------------------------- + // Metadata handling + // -------------------------------------------------------------------------------------------- + + /** List of metadata that can be read with this format. */ + public enum ReadableMetadata { + + DATABASE( + "database", + DataTypes.STRING().nullable(), + DataTypes.FIELD("database", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getString(pos); + } + + @Override + public Object convert(Object in) { + return StringData.fromString(in.toString()); + } + }), + + TABLE( + "table", + DataTypes.STRING().nullable(), + DataTypes.FIELD("table", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getString(pos); + } + + @Override + public Object convert(Object in) { + return StringData.fromString(in.toString()); + } + }), + + SQL_TYPE( + "sql-type", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.INT().nullable()).nullable(), + DataTypes.FIELD( + "sqlType", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.INT().nullable())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getMap(pos); + } + + @Override + public Object convert(Object in) { + return new GenericMapData((Map) in); + } + }), + + PK_NAMES( + "pk-names", + DataTypes.ARRAY(DataTypes.STRING()).nullable(), + DataTypes.FIELD("pkNames", DataTypes.ARRAY(DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getArray(pos); + } + + @Override + public Object convert(Object in) { + return new GenericArrayData((Object[]) in); + } + }), + + INGESTION_TIMESTAMP( + "ingestion-timestamp", + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3).nullable(), + DataTypes.FIELD("ts", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return TimestampData.fromEpochMillis(row.getLong(pos)); + } + + @Override + public Object convert(Object in) { + return in; + } + }), + + EVENT_TIMESTAMP( + "event-timestamp", + DataTypes.BIGINT().nullable(), + DataTypes.FIELD("es", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getLong(pos); + } + + @Override + public Object convert(Object in) { + return in; + } + }), + + IS_DDL( + "is-ddl", + DataTypes.BOOLEAN().nullable(), + DataTypes.FIELD("isDdl", DataTypes.BOOLEAN()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getBoolean(pos); + } + + @Override + public Object convert(Object in) { + return in; + } + }); + + final String key; + + final DataType dataType; + + final DataTypes.Field requiredJsonField; + + final MetadataConverter converter; + + ReadableMetadata( + String key, + DataType dataType, + DataTypes.Field requiredJsonField, + MetadataConverter converter) { + this.key = key; + this.dataType = dataType; + this.requiredJsonField = requiredJsonField; + this.converter = converter; + } + + public String getKey() { + return key; + } + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonDeserializationSchema.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonDeserializationSchema.java new file mode 100644 index 00000000000..bd8ffd469a3 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonDeserializationSchema.java @@ -0,0 +1,419 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonDecodingFormat.ReadableMetadata; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonRowDataDeserializationSchema; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.Collector; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static java.lang.String.format; +import static org.apache.inlong.sort.formats.json.canal.CanalUtils.getMysqlMetadataKey; + +/** + * Deserialization schema from Canal JSON to Flink Table/SQL internal data structure {@link + * RowData}. The deserialization schema knows Canal's schema definition and can extract the database + * data and convert into {@link RowData} with {@link RowKind}. + * + *

Deserializes a byte[] message as a JSON object and reads the specified fields.

+ * + *

Failures during deserialization are forwarded as wrapped IOExceptions.

+ * + * @see Alibaba Canal + */ +public final class CanalJsonDeserializationSchema implements DeserializationSchema { + + private static final long serialVersionUID = 1L; + + private static final String FIELD_OLD = "old"; + private static final String OP_INSERT = "INSERT"; + private static final String OP_UPDATE = "UPDATE"; + private static final String OP_DELETE = "DELETE"; + private static final String OP_CREATE = "CREATE"; + + /** The deserializer to deserialize Canal JSON data. */ + private final JsonRowDataDeserializationSchema jsonDeserializer; + + /** Metadata to be extracted for every record. */ + private final MetadataConverter[] metadataConverters; + + private final List requestedMetadata; + + /** {@link TypeInformation} of the produced {@link RowData} (physical + meta data). */ + private final TypeInformation producedTypeInfo; + + /** Only read changelogs from the specific database. */ + private final @Nullable String database; + + /** Only read changelogs from the specific table. */ + private final @Nullable String table; + + /** Flag indicating whether to ignore invalid fields/rows (default: throw an exception). */ + private final boolean ignoreParseErrors; + + /** Names of fields. */ + private final List fieldNames; + + /** Number of fields. */ + private final int fieldCount; + + /** Pattern of the specific database. */ + private final Pattern databasePattern; + + /** Pattern of the specific table. */ + private final Pattern tablePattern; + + private CanalJsonDeserializationSchema( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo, + @Nullable String database, + @Nullable String table, + boolean ignoreParseErrors, + TimestampFormat timestampFormat) { + final RowType jsonRowType = createJsonRowType(physicalDataType, requestedMetadata); + this.jsonDeserializer = + new JsonRowDataDeserializationSchema( + jsonRowType, + // the result type is never used, so it's fine to pass in the produced type + // info + producedTypeInfo, + false, // ignoreParseErrors already contains the functionality of + // failOnMissingField + ignoreParseErrors, + timestampFormat); + this.metadataConverters = createMetadataConverters(jsonRowType, requestedMetadata); + this.requestedMetadata = requestedMetadata; + this.producedTypeInfo = producedTypeInfo; + this.database = database; + this.table = table; + this.ignoreParseErrors = ignoreParseErrors; + final RowType physicalRowType = ((RowType) physicalDataType.getLogicalType()); + this.fieldNames = physicalRowType.getFieldNames(); + this.fieldCount = physicalRowType.getFieldCount(); + this.databasePattern = database == null ? null : Pattern.compile(database); + this.tablePattern = table == null ? null : Pattern.compile(table); + } + + // ------------------------------------------------------------------------------------------ + // Builder + // ------------------------------------------------------------------------------------------ + + /** Creates A builder for building a {@link CanalJsonDeserializationSchema}. */ + public static Builder builder( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo) { + return new Builder(physicalDataType, requestedMetadata, producedTypeInfo); + } + + /** A builder for creating a {@link CanalJsonDeserializationSchema}. */ + @Internal + public static final class Builder { + + private final DataType physicalDataType; + private final List requestedMetadata; + private final TypeInformation producedTypeInfo; + private String database = null; + private String table = null; + private boolean ignoreParseErrors = false; + private TimestampFormat timestampFormat = TimestampFormat.SQL; + + private Builder( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo) { + this.physicalDataType = physicalDataType; + this.requestedMetadata = requestedMetadata; + this.producedTypeInfo = producedTypeInfo; + } + + public Builder setDatabase(String database) { + this.database = database; + return this; + } + + public Builder setTable(String table) { + this.table = table; + return this; + } + + public Builder setIgnoreParseErrors(boolean ignoreParseErrors) { + this.ignoreParseErrors = ignoreParseErrors; + return this; + } + + public Builder setTimestampFormat(TimestampFormat timestampFormat) { + this.timestampFormat = timestampFormat; + return this; + } + + public CanalJsonDeserializationSchema build() { + return new CanalJsonDeserializationSchema( + physicalDataType, + requestedMetadata, + producedTypeInfo, + database, + table, + ignoreParseErrors, + timestampFormat); + } + } + + // ------------------------------------------------------------------------------------------ + + @Override + public RowData deserialize(byte[] message) throws IOException { + throw new RuntimeException( + "Please invoke DeserializationSchema#deserialize(byte[], Collector) instead."); + } + + @Override + public void deserialize(@Nullable byte[] message, Collector out) throws IOException { + if (message == null || message.length == 0) { + return; + } + try { + final JsonNode root = jsonDeserializer.deserializeToJsonNode(message); + if (database != null) { + if (!databasePattern + .matcher(root.get(ReadableMetadata.DATABASE.key).asText()) + .matches()) { + return; + } + } + if (table != null) { + if (!tablePattern + .matcher(root.get(ReadableMetadata.TABLE.key).asText()) + .matches()) { + return; + } + } + final GenericRowData row = (GenericRowData) jsonDeserializer.convertToRowData(root); + String type = row.getString(2).toString(); // "type" field + if (OP_INSERT.equals(type)) { + // "data" field is an array of row, contains inserted rows + ArrayData data = row.getArray(0); + for (int i = 0; i < data.size(); i++) { + GenericRowData insert = (GenericRowData) data.getRow(i, fieldCount); + insert.setRowKind(RowKind.INSERT); + emitRow(row, insert, out); + } + } else if (OP_UPDATE.equals(type)) { + // "data" field is an array of row, contains new rows + ArrayData data = row.getArray(0); + // "old" field is an array of row, contains old values + ArrayData old = row.getArray(1); + for (int i = 0; i < data.size(); i++) { + // the underlying JSON deserialization schema always produce GenericRowData. + GenericRowData after = (GenericRowData) data.getRow(i, fieldCount); + GenericRowData before = (GenericRowData) old.getRow(i, fieldCount); + final JsonNode oldField = root.get(FIELD_OLD); + for (int f = 0; f < fieldCount; f++) { + if (before.isNullAt(f) && oldField.findValue(fieldNames.get(f)) == null) { + // fields in "old" (before) means the fields are changed + // fields not in "old" (before) means the fields are not changed + // so we just copy the not changed fields into before + before.setField(f, after.getField(f)); + } + } + before.setRowKind(RowKind.UPDATE_BEFORE); + after.setRowKind(RowKind.UPDATE_AFTER); + emitRow(row, before, out); + emitRow(row, after, out); + } + } else if (OP_DELETE.equals(type)) { + // "data" field is an array of row, contains deleted rows + ArrayData data = row.getArray(0); + for (int i = 0; i < data.size(); i++) { + GenericRowData insert = (GenericRowData) data.getRow(i, fieldCount); + insert.setRowKind(RowKind.DELETE); + emitRow(row, insert, out); + } + } else if (OP_CREATE.equals(type)) { + // "data" field is null and "type" is "CREATE" which means + // this is a DDL change event, and we should skip it. + return; + } else { + if (!ignoreParseErrors) { + throw new IOException( + format( + "Unknown \"type\" value \"%s\". The Canal JSON message is '%s'", + type, new String(message))); + } + } + } catch (Throwable t) { + // a big try catch to protect the processing. + if (!ignoreParseErrors) { + throw new IOException( + format("Corrupt Canal JSON message '%s'.", new String(message)), t); + } + } + } + + private void emitRow(GenericRowData rootRow, GenericRowData physicalRow, Collector out) { + final int physicalArity = physicalRow.getArity(); + final int metadataArity = metadataConverters.length; + final GenericRowData producedRow = new GenericRowData(physicalRow.getRowKind(), physicalArity + 1); + + for (int physicalPos = 0; physicalPos < physicalArity; physicalPos++) { + producedRow.setField(physicalPos + 1, physicalRow.getField(physicalPos)); + } + + // Put metadata in the first field of the emitted RowData + Map metadataMap = new HashMap<>(); + + for (int metadataPos = 0; metadataPos < metadataArity; metadataPos++) { + metadataMap.put( + StringData.fromString(getMysqlMetadataKey(requestedMetadata.get(metadataPos))), + StringData.fromString(metadataConverters[metadataPos].convert(rootRow).toString())); + } + producedRow.setField(0, new GenericMapData(metadataMap)); + + out.collect(producedRow); + } + + @Override + public boolean isEndOfStream(RowData nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + return producedTypeInfo; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CanalJsonDeserializationSchema that = (CanalJsonDeserializationSchema) o; + return Objects.equals(jsonDeserializer, that.jsonDeserializer) + && Objects.equals(producedTypeInfo, that.producedTypeInfo) + && Objects.equals(database, that.database) + && Objects.equals(table, that.table) + && ignoreParseErrors == that.ignoreParseErrors + && fieldCount == that.fieldCount; + } + + @Override + public int hashCode() { + return Objects.hash( + jsonDeserializer, + producedTypeInfo, + database, + table, + ignoreParseErrors, + fieldCount); + } + + // -------------------------------------------------------------------------------------------- + + private static RowType createJsonRowType( + DataType physicalDataType, List readableMetadata) { + // Canal JSON contains other information, e.g. "ts", "sql", but we don't need them + DataType root = + DataTypes.ROW( + DataTypes.FIELD("data", DataTypes.ARRAY(physicalDataType)), + DataTypes.FIELD("old", DataTypes.ARRAY(physicalDataType)), + DataTypes.FIELD("type", DataTypes.STRING()), + ReadableMetadata.DATABASE.requiredJsonField, + ReadableMetadata.TABLE.requiredJsonField); + // append fields that are required for reading metadata in the root + final List rootMetadataFields = + readableMetadata.stream() + .filter(m -> m != ReadableMetadata.DATABASE && m != ReadableMetadata.TABLE) + .map(m -> m.requiredJsonField) + .distinct() + .collect(Collectors.toList()); + return (RowType) DataTypeUtils.appendRowFields(root, rootMetadataFields).getLogicalType(); + } + + private static MetadataConverter[] createMetadataConverters( + RowType jsonRowType, List requestedMetadata) { + return requestedMetadata.stream() + .map(m -> convert(jsonRowType, m)) + .toArray(MetadataConverter[]::new); + } + + private static MetadataConverter convert(RowType jsonRowType, ReadableMetadata metadata) { + final int pos = jsonRowType.getFieldNames().indexOf(metadata.requiredJsonField.getName()); + return new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData root, int unused) { + return metadata.converter.convert(root, pos); + } + + @Override + public Object convert(Object in) { + return metadata.converter.convert(in); + } + }; + } + + // -------------------------------------------------------------------------------------------- + + /** + * Converter that extracts a metadata field from the row that comes out of the JSON schema and + * converts it to the desired data type. + */ + interface MetadataConverter extends Serializable { + + // Method for top-level access. + default Object convert(GenericRowData row) { + return convert(row, -1); + } + + Object convert(GenericRowData row, int pos); + + Object convert(Object in); + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedDecodingFormat.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedDecodingFormat.java new file mode 100644 index 00000000000..c435845a488 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedDecodingFormat.java @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedDeserializationSchema.MetadataConverter; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * {@link DecodingFormat} for Canal using JSON encoding. + * different from flink:1.13.5. This support more metadata. + */ +public class CanalJsonEnhancedDecodingFormat implements DecodingFormat> { + + // -------------------------------------------------------------------------------------------- + // Canal-specific attributes + // -------------------------------------------------------------------------------------------- + @Nullable + private final String database; + @Nullable + private final String table; + private final boolean ignoreParseErrors; + private final TimestampFormat timestampFormat; + + // -------------------------------------------------------------------------------------------- + // Mutable attributes + // -------------------------------------------------------------------------------------------- + private List metadataKeys; + + public CanalJsonEnhancedDecodingFormat( + String database, + String table, + boolean ignoreParseErrors, + TimestampFormat timestampFormat) { + this.database = database; + this.table = table; + this.ignoreParseErrors = ignoreParseErrors; + this.timestampFormat = timestampFormat; + this.metadataKeys = Collections.emptyList(); + } + + @Override + public DeserializationSchema createRuntimeDecoder( + DynamicTableSource.Context context, DataType physicalDataType) { + final List readableMetadata = metadataKeys.stream() + .map(k -> Stream.of(ReadableMetadata.values()) + .filter(rm -> rm.key.equals(k)) + .findFirst() + .orElseThrow(IllegalStateException::new)) + .collect(Collectors.toList()); + final List metadataFields = readableMetadata.stream() + .map(m -> DataTypes.FIELD(m.key, m.dataType)) + .collect(Collectors.toList()); + final DataType producedDataType = DataTypeUtils.appendRowFields(physicalDataType, metadataFields); + final TypeInformation producedTypeInfo = context.createTypeInformation(producedDataType); + return CanalJsonEnhancedDeserializationSchema.builder(physicalDataType, readableMetadata, producedTypeInfo) + .setDatabase(database) + .setTable(table) + .setIgnoreParseErrors(ignoreParseErrors) + .setTimestampFormat(timestampFormat) + .build(); + } + + @Override + public Map listReadableMetadata() { + final Map metadataMap = new LinkedHashMap<>(); + Stream.of(ReadableMetadata.values()) + .forEachOrdered(m -> metadataMap.put(m.key, m.dataType)); + return metadataMap; + } + + @Override + public void applyReadableMetadata(List metadataKeys) { + this.metadataKeys = metadataKeys; + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.newBuilder() + .addContainedKind(RowKind.INSERT) + .addContainedKind(RowKind.UPDATE_BEFORE) + .addContainedKind(RowKind.UPDATE_AFTER) + .addContainedKind(RowKind.DELETE) + .build(); + } + + // -------------------------------------------------------------------------------------------- + // Metadata handling + // -------------------------------------------------------------------------------------------- + + /** + * List of metadata that can be read with this format. + */ + public enum ReadableMetadata { + + DATABASE( + "database", + DataTypes.STRING().nullable(), + DataTypes.FIELD("database", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + + TABLE( + "table", + DataTypes.STRING().nullable(), + DataTypes.FIELD("table", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + + SQL_TYPE( + "sql-type", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.INT().nullable()).nullable(), + DataTypes.FIELD( + "sqlType", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.INT().nullable())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getMap(pos); + } + }), + + PK_NAMES( + "pk-names", + DataTypes.ARRAY(DataTypes.STRING()).nullable(), + DataTypes.FIELD("pkNames", DataTypes.ARRAY(DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getArray(pos); + } + }), + + INGESTION_TIMESTAMP( + "ingestion-timestamp", + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3).nullable(), + DataTypes.FIELD("ts", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return TimestampData.fromEpochMillis(row.getLong(pos)); + } + }), + + EVENT_TIMESTAMP( + "event-timestamp", + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3).nullable(), + DataTypes.FIELD("es", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return TimestampData.fromEpochMillis(row.getLong(pos)); + } + }), + // additional metadata + /** + * It is deprecated, please use {@link this#TYPE} instead + */ + @Deprecated + OP_TYPE( + "op-type", + DataTypes.STRING().nullable(), + DataTypes.FIELD("opType", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + TYPE( + "type", + DataTypes.STRING().nullable(), + DataTypes.FIELD("type", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + IS_DDL( + "is-ddl", + DataTypes.BOOLEAN().nullable(), + DataTypes.FIELD("isDdl", DataTypes.BOOLEAN()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getBoolean(pos); + } + }), + + MYSQL_TYPE( + "mysql-type", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.STRING().nullable()).nullable(), + DataTypes.FIELD("mysqlType", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getMap(pos); + } + }), + BATCH_ID( + "batch-id", + DataTypes.BIGINT().nullable(), + DataTypes.FIELD("batchId", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getLong(pos); + } + }), + UPDATE_BEFORE( + "update-before", + DataTypes.ARRAY( + DataTypes.MAP( + DataTypes.STRING().nullable(), + DataTypes.STRING().nullable()) + .nullable()) + .nullable(), + DataTypes.FIELD("updateBefore", DataTypes.ARRAY( + DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING()))), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getArray(pos); + } + }); + + final String key; + + final DataType dataType; + + final DataTypes.Field requiredJsonField; + + final MetadataConverter converter; + + ReadableMetadata( + String key, + DataType dataType, + DataTypes.Field requiredJsonField, + MetadataConverter converter) { + this.key = key; + this.dataType = dataType; + this.requiredJsonField = requiredJsonField; + this.converter = converter; + } + + public String getKey() { + return key; + } + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedDeserializationSchema.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedDeserializationSchema.java new file mode 100644 index 00000000000..a4c326ef793 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedDeserializationSchema.java @@ -0,0 +1,437 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedDecodingFormat.ReadableMetadata; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonRowDataDeserializationSchema; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.Collector; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.io.Serializable; +import java.util.List; +import java.util.Objects; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static java.lang.String.format; + +/** + * Deserialization schema from Canal JSON to Flink Table/SQL internal data structure {@link + * RowData}. The deserialization schema knows Canal's schema definition and can extract the database + * data and convert into {@link RowData} with {@link RowKind}. + * + *

Deserializes a byte[] message as a JSON object and reads the specified fields.

+ * + *

Failures during deserialization are forwarded as wrapped IOExceptions.

+ * + * @see Alibaba Canal + */ +public final class CanalJsonEnhancedDeserializationSchema implements DeserializationSchema { + + private static final long serialVersionUID = 1L; + + private static final String FIELD_OLD = "old"; + private static final String OP_INSERT = "INSERT"; + private static final String OP_UPDATE = "UPDATE"; + private static final String OP_DELETE = "DELETE"; + private static final String OP_CREATE = "CREATE"; + + /** + * The deserializer to deserialize Canal JSON data. + */ + private final JsonRowDataDeserializationSchema jsonDeserializer; + + /** + * Flag that indicates that an additional projection is required for metadata. + */ + private final boolean hasMetadata; + + /** + * Metadata to be extracted for every record. + */ + private final MetadataConverter[] metadataConverters; + + /** + * {@link TypeInformation} of the produced {@link RowData} (physical + meta data). + */ + private final TypeInformation producedTypeInfo; + + /** + * Only read changelogs from the specific database. + */ + private final @Nullable String database; + + /** + * Only read changelogs from the specific table. + */ + private final @Nullable String table; + + /** + * Flag indicating whether to ignore invalid fields/rows (default: throw an exception). + */ + private final boolean ignoreParseErrors; + + /** + * Names of fields. + */ + private final List fieldNames; + + /** + * Number of fields. + */ + private final int fieldCount; + + /** + * Pattern of the specific database. + */ + private final Pattern databasePattern; + + /** + * Pattern of the specific table. + */ + private final Pattern tablePattern; + + private CanalJsonEnhancedDeserializationSchema( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo, + @Nullable String database, + @Nullable String table, + boolean ignoreParseErrors, + TimestampFormat timestampFormat) { + final RowType jsonRowType = createJsonRowType(physicalDataType, requestedMetadata); + this.jsonDeserializer = + new JsonRowDataDeserializationSchema( + jsonRowType, + // the result type is never used, so it's fine to pass in the produced type + // info + producedTypeInfo, + false, // ignoreParseErrors already contains the functionality of + // failOnMissingField + ignoreParseErrors, + timestampFormat); + this.hasMetadata = requestedMetadata.size() > 0; + this.metadataConverters = createMetadataConverters(jsonRowType, requestedMetadata); + this.producedTypeInfo = producedTypeInfo; + this.database = database; + this.table = table; + this.ignoreParseErrors = ignoreParseErrors; + final RowType physicalRowType = ((RowType) physicalDataType.getLogicalType()); + this.fieldNames = physicalRowType.getFieldNames(); + this.fieldCount = physicalRowType.getFieldCount(); + this.databasePattern = database == null ? null : Pattern.compile(database); + this.tablePattern = table == null ? null : Pattern.compile(table); + } + + // ------------------------------------------------------------------------------------------ + // Builder + // ------------------------------------------------------------------------------------------ + + /** + * Creates A builder for building a {@link CanalJsonEnhancedDeserializationSchema}. + */ + public static Builder builder( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo) { + return new Builder(physicalDataType, requestedMetadata, producedTypeInfo); + } + + private static RowType createJsonRowType( + DataType physicalDataType, List readableMetadata) { + // Canal JSON contains other information, e.g. "ts", "sql", but we don't need them + DataType root = + DataTypes.ROW( + DataTypes.FIELD("data", DataTypes.ARRAY(physicalDataType)), + DataTypes.FIELD("old", DataTypes.ARRAY(physicalDataType)), + ReadableMetadata.TYPE.requiredJsonField, + ReadableMetadata.DATABASE.requiredJsonField, + ReadableMetadata.TABLE.requiredJsonField); + // append fields that are required for reading metadata in the root + final List rootMetadataFields = + readableMetadata.stream() + .filter(m -> m != ReadableMetadata.DATABASE + && m != ReadableMetadata.TABLE + && m != ReadableMetadata.TYPE) + .map(m -> m.requiredJsonField) + .distinct() + .collect(Collectors.toList()); + return (RowType) DataTypeUtils.appendRowFields(root, rootMetadataFields).getLogicalType(); + } + + // ------------------------------------------------------------------------------------------ + + private static MetadataConverter[] createMetadataConverters( + RowType jsonRowType, List requestedMetadata) { + return requestedMetadata.stream() + .map(m -> convert(jsonRowType, m)) + .toArray(MetadataConverter[]::new); + } + + private static MetadataConverter convert(RowType jsonRowType, ReadableMetadata metadata) { + final int pos = jsonRowType.getFieldNames().indexOf(metadata.requiredJsonField.getName()); + return new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData root, int unused) { + return metadata.converter.convert(root, pos); + } + }; + } + + @Override + public RowData deserialize(byte[] message) throws IOException { + throw new RuntimeException( + "Please invoke DeserializationSchema#deserialize(byte[], Collector) instead."); + } + + @Override + public void deserialize(@Nullable byte[] message, Collector out) throws IOException { + if (message == null || message.length == 0) { + return; + } + try { + final JsonNode root = jsonDeserializer.deserializeToJsonNode(message); + if (database != null) { + if (!databasePattern + .matcher(root.get(ReadableMetadata.DATABASE.key).asText()) + .matches()) { + return; + } + } + if (table != null) { + if (!tablePattern + .matcher(root.get(ReadableMetadata.TABLE.key).asText()) + .matches()) { + return; + } + } + final GenericRowData row = (GenericRowData) jsonDeserializer.convertToRowData(root); + String type = row.getString(2).toString(); // "type" field + if (OP_INSERT.equals(type)) { + // "data" field is an array of row, contains inserted rows + ArrayData data = row.getArray(0); + for (int i = 0; i < data.size(); i++) { + GenericRowData insert = (GenericRowData) data.getRow(i, fieldCount); + insert.setRowKind(RowKind.INSERT); + emitRow(row, insert, out); + } + } else if (OP_UPDATE.equals(type)) { + // "data" field is an array of row, contains new rows + ArrayData data = row.getArray(0); + // "old" field is an array of row, contains old values + ArrayData old = row.getArray(1); + for (int i = 0; i < data.size(); i++) { + // the underlying JSON deserialization schema always produce GenericRowData. + GenericRowData after = (GenericRowData) data.getRow(i, fieldCount); + GenericRowData before = (GenericRowData) old.getRow(i, fieldCount); + final JsonNode oldField = root.get(FIELD_OLD); + for (int f = 0; f < fieldCount; f++) { + if (before.isNullAt(f) && oldField.findValue(fieldNames.get(f)) == null) { + // fields in "old" (before) means the fields are changed + // fields not in "old" (before) means the fields are not changed + // so we just copy the not changed fields into before + before.setField(f, after.getField(f)); + } + } + before.setRowKind(RowKind.UPDATE_BEFORE); + after.setRowKind(RowKind.UPDATE_AFTER); + emitRow(row, before, out); + emitRow(row, after, out); + } + } else if (OP_DELETE.equals(type)) { + // "data" field is an array of row, contains deleted rows + ArrayData data = row.getArray(0); + for (int i = 0; i < data.size(); i++) { + GenericRowData insert = (GenericRowData) data.getRow(i, fieldCount); + insert.setRowKind(RowKind.DELETE); + emitRow(row, insert, out); + } + } else if (OP_CREATE.equals(type)) { + // "data" field is null and "type" is "CREATE" which means + // this is a DDL change event, and we should skip it. + return; + } else { + if (!ignoreParseErrors) { + throw new IOException( + format( + "Unknown \"type\" value \"%s\". The Canal JSON message is '%s'", + type, new String(message))); + } + } + } catch (Throwable t) { + // a big try catch to protect the processing. + if (!ignoreParseErrors) { + throw new IOException( + format("Corrupt Canal JSON message '%s'.", new String(message)), t); + } + } + } + + private void emitRow( + GenericRowData rootRow, GenericRowData physicalRow, Collector out) { + // shortcut in case no output projection is required + if (!hasMetadata) { + out.collect(physicalRow); + return; + } + final int physicalArity = physicalRow.getArity(); + final int metadataArity = metadataConverters.length; + final GenericRowData producedRow = + new GenericRowData(physicalRow.getRowKind(), physicalArity + metadataArity); + for (int physicalPos = 0; physicalPos < physicalArity; physicalPos++) { + producedRow.setField(physicalPos, physicalRow.getField(physicalPos)); + } + for (int metadataPos = 0; metadataPos < metadataArity; metadataPos++) { + producedRow.setField( + physicalArity + metadataPos, metadataConverters[metadataPos].convert(rootRow)); + } + out.collect(producedRow); + } + + @Override + public boolean isEndOfStream(RowData nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + return producedTypeInfo; + } + + // -------------------------------------------------------------------------------------------- + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CanalJsonEnhancedDeserializationSchema that = (CanalJsonEnhancedDeserializationSchema) o; + return Objects.equals(jsonDeserializer, that.jsonDeserializer) + && hasMetadata == that.hasMetadata + && Objects.equals(producedTypeInfo, that.producedTypeInfo) + && Objects.equals(database, that.database) + && Objects.equals(table, that.table) + && ignoreParseErrors == that.ignoreParseErrors + && fieldCount == that.fieldCount; + } + + @Override + public int hashCode() { + return Objects.hash( + jsonDeserializer, + hasMetadata, + producedTypeInfo, + database, + table, + ignoreParseErrors, + fieldCount); + } + + /** + * Converter that extracts a metadata field from the row that comes out of the JSON schema and + * converts it to the desired data type. + */ + interface MetadataConverter extends Serializable { + + // Method for top-level access. + default Object convert(GenericRowData row) { + return convert(row, -1); + } + + Object convert(GenericRowData row, int pos); + } + + // -------------------------------------------------------------------------------------------- + + /** + * A builder for creating a {@link CanalJsonEnhancedDeserializationSchema}. + */ + @Internal + public static final class Builder { + + private final DataType physicalDataType; + private final List requestedMetadata; + private final TypeInformation producedTypeInfo; + private String database = null; + private String table = null; + private boolean ignoreParseErrors = false; + private TimestampFormat timestampFormat = TimestampFormat.SQL; + + private Builder( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo) { + this.physicalDataType = physicalDataType; + this.requestedMetadata = requestedMetadata; + this.producedTypeInfo = producedTypeInfo; + } + + public Builder setDatabase(String database) { + this.database = database; + return this; + } + + public Builder setTable(String table) { + this.table = table; + return this; + } + + public Builder setIgnoreParseErrors(boolean ignoreParseErrors) { + this.ignoreParseErrors = ignoreParseErrors; + return this; + } + + public Builder setTimestampFormat(TimestampFormat timestampFormat) { + this.timestampFormat = timestampFormat; + return this; + } + + public CanalJsonEnhancedDeserializationSchema build() { + return new CanalJsonEnhancedDeserializationSchema( + physicalDataType, + requestedMetadata, + producedTypeInfo, + database, + table, + ignoreParseErrors, + timestampFormat); + } + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedEncodingFormat.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedEncodingFormat.java new file mode 100644 index 00000000000..5dc92da6252 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedEncodingFormat.java @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedSerializationSchema.MetadataConverter; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonFormatOptions; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink.Context; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.types.RowKind; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * {@link EncodingFormat} for Canal using JSON encoding. + * + * different from flink:1.13.5. This can apply metadata, sink metadata into canal format + */ +public class CanalJsonEnhancedEncodingFormat implements EncodingFormat> { + + private final TimestampFormat timestampFormat; + private final JsonFormatOptions.MapNullKeyMode mapNullKeyMode; + private final String mapNullKeyLiteral; + private List metadataKeys; + private boolean encodeDecimalAsPlainNumber; + + public CanalJsonEnhancedEncodingFormat( + TimestampFormat timestampFormat, + JsonFormatOptions.MapNullKeyMode mapNullKeyMode, + String mapNullKeyLiteral, + boolean encodeDecimalAsPlainNumber) { + this.timestampFormat = timestampFormat; + this.mapNullKeyMode = mapNullKeyMode; + this.mapNullKeyLiteral = mapNullKeyLiteral; + this.encodeDecimalAsPlainNumber = encodeDecimalAsPlainNumber; + this.metadataKeys = Collections.emptyList(); + } + + @Override + public SerializationSchema createRuntimeEncoder(Context context, DataType physicalDataType) { + final List writeableMetadata = + metadataKeys.stream() + .map( + k -> Stream.of(WriteableMetadata.values()) + .filter(rm -> rm.key.equals(k)) + .findFirst() + .orElseThrow(IllegalStateException::new)) + .collect(Collectors.toList()); + return new CanalJsonEnhancedSerializationSchema( + physicalDataType, + writeableMetadata, + timestampFormat, + mapNullKeyMode, + mapNullKeyLiteral, + encodeDecimalAsPlainNumber); + } + + @Override + public Map listWritableMetadata() { + return Arrays.stream(WriteableMetadata.values()) + .collect( + Collectors.toMap(m -> m.key, m -> m.dataType)); + } + + @Override + public void applyWritableMetadata(List metadataKeys) { + this.metadataKeys = metadataKeys; + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.newBuilder() + .addContainedKind(RowKind.INSERT) + .addContainedKind(RowKind.UPDATE_BEFORE) + .addContainedKind(RowKind.UPDATE_AFTER) + .addContainedKind(RowKind.DELETE) + .build(); + } + + // -------------------------------------------------------------------------------------------- + // Metadata handling + // -------------------------------------------------------------------------------------------- + + /** + * List of metadata that can write into this format. + * canal json inner data type + */ + enum WriteableMetadata { + + DATABASE( + "database", + DataTypes.STRING().nullable(), + DataTypes.FIELD("database", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + TABLE( + "table", + DataTypes.STRING().nullable(), + DataTypes.FIELD("table", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + SQL_TYPE( + "sql-type", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.INT().nullable()).nullable(), + DataTypes.FIELD( + "sqlType", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.INT().nullable())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getMap(pos); + } + }), + PK_NAMES( + "pk-names", + DataTypes.ARRAY(DataTypes.STRING()).nullable(), + DataTypes.FIELD("pkNames", DataTypes.ARRAY(DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getArray(pos); + } + }), + INGESTION_TIMESTAMP( + "ingestion-timestamp", + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3).nullable(), + DataTypes.FIELD("ts", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getTimestamp(pos, 3).getMillisecond(); + } + }), + EVENT_TIMESTAMP( + "event-timestamp", + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3).nullable(), + DataTypes.FIELD("es", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getTimestamp(pos, 3).getMillisecond(); + } + }), + // additional metadata + TYPE( + "type", + DataTypes.STRING().nullable(), + DataTypes.FIELD("type", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + /** + * It is deprecated, please use {@link this#TYPE} instead + */ + @Deprecated + OP_TYPE( + "op-type", + DataTypes.STRING().nullable(), + DataTypes.FIELD("opType", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getString(pos); + } + }), + IS_DDL( + "is-ddl", + DataTypes.BOOLEAN().nullable(), + DataTypes.FIELD("isDdl", DataTypes.BOOLEAN()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getBoolean(pos); + } + }), + + MYSQL_TYPE( + "mysql-type", + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.STRING().nullable()).nullable(), + DataTypes.FIELD("mysqlType", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getMap(pos); + } + }), + BATCH_ID( + "batch-id", + DataTypes.BIGINT().nullable(), + DataTypes.FIELD("batchId", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getLong(pos); + } + }), + UPDATE_BEFORE( + "update-before", + DataTypes.ARRAY( + DataTypes.MAP( + DataTypes.STRING().nullable(), DataTypes.STRING().nullable()).nullable()) + .nullable(), + DataTypes.FIELD("updateBefore", DataTypes.ARRAY( + DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING()))), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(RowData row, int pos) { + if (row.isNullAt(pos)) { + return null; + } + return row.getArray(pos); + } + }); + + final String key; + + final DataType dataType; + + final DataTypes.Field requiredJsonField; + + final MetadataConverter converter; + + WriteableMetadata( + String key, + DataType dataType, + DataTypes.Field requiredJsonField, + MetadataConverter converter) { + this.key = key; + this.dataType = dataType; + this.requiredJsonField = requiredJsonField; + this.converter = converter; + } + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedFormatFactory.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedFormatFactory.java new file mode 100644 index 00000000000..09d5f75651d --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedFormatFactory.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonFormatOptions; +import org.apache.flink.formats.json.JsonFormatOptionsUtil; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DeserializationFormatFactory; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.SerializationFormatFactory; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.apache.flink.formats.json.JsonFormatOptions.ENCODE_DECIMAL_AS_PLAIN_NUMBER; +import static org.apache.flink.formats.json.canal.CanalJsonFormatOptions.DATABASE_INCLUDE; +import static org.apache.flink.formats.json.canal.CanalJsonFormatOptions.IGNORE_PARSE_ERRORS; +import static org.apache.flink.formats.json.canal.CanalJsonFormatOptions.JSON_MAP_NULL_KEY_LITERAL; +import static org.apache.flink.formats.json.canal.CanalJsonFormatOptions.JSON_MAP_NULL_KEY_MODE; +import static org.apache.flink.formats.json.canal.CanalJsonFormatOptions.TABLE_INCLUDE; +import static org.apache.flink.formats.json.canal.CanalJsonFormatOptions.TIMESTAMP_FORMAT; + +/** + * Format factory for providing configured instances of Canal JSON to RowData {@link + * DeserializationSchema}. + * Different from flink:1.13.5.This can sink metadata. + */ +public class CanalJsonEnhancedFormatFactory + implements + DeserializationFormatFactory, + SerializationFormatFactory { + + public static final String IDENTIFIER = "canal-json-inlong"; + + @Override + public DecodingFormat> createDecodingFormat( + DynamicTableFactory.Context context, ReadableConfig formatOptions) { + FactoryUtil.validateFactoryOptions(this, formatOptions); + JsonFormatOptionsUtil.validateDecodingFormatOptions(formatOptions); + + final String database = formatOptions.getOptional(DATABASE_INCLUDE).orElse(null); + final String table = formatOptions.getOptional(TABLE_INCLUDE).orElse(null); + final boolean ignoreParseErrors = formatOptions.get(IGNORE_PARSE_ERRORS); + final TimestampFormat timestampFormat = JsonFormatOptionsUtil.getTimestampFormat(formatOptions); + + return new CanalJsonEnhancedDecodingFormat(database, table, ignoreParseErrors, timestampFormat); + } + + @Override + public EncodingFormat> createEncodingFormat( + DynamicTableFactory.Context context, ReadableConfig formatOptions) { + + FactoryUtil.validateFactoryOptions(this, formatOptions); + JsonFormatOptionsUtil.validateDecodingFormatOptions(formatOptions); + + TimestampFormat timestampFormat = JsonFormatOptionsUtil.getTimestampFormat(formatOptions); + JsonFormatOptions.MapNullKeyMode mapNullKeyMode = JsonFormatOptionsUtil.getMapNullKeyMode(formatOptions); + String mapNullKeyLiteral = formatOptions.get(JSON_MAP_NULL_KEY_LITERAL); + + final boolean encodeDecimalAsPlainNumber = + formatOptions.get(ENCODE_DECIMAL_AS_PLAIN_NUMBER); + + return new CanalJsonEnhancedEncodingFormat(timestampFormat, mapNullKeyMode, + mapNullKeyLiteral, encodeDecimalAsPlainNumber); + } + + @Override + public String factoryIdentifier() { + return IDENTIFIER; + } + + @Override + public Set> requiredOptions() { + return Collections.emptySet(); + } + + @Override + public Set> optionalOptions() { + Set> options = new HashSet<>(); + options.add(IGNORE_PARSE_ERRORS); + options.add(TIMESTAMP_FORMAT); + options.add(DATABASE_INCLUDE); + options.add(TABLE_INCLUDE); + options.add(JSON_MAP_NULL_KEY_MODE); + options.add(JSON_MAP_NULL_KEY_LITERAL); + options.add(ENCODE_DECIMAL_AS_PLAIN_NUMBER); + return options; + } +} \ No newline at end of file diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedSerializationSchema.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedSerializationSchema.java new file mode 100644 index 00000000000..a70c7bebf01 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedSerializationSchema.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedEncodingFormat.WriteableMetadata; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonFormatOptions; +import org.apache.flink.formats.json.JsonRowDataSerializationSchema; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; + +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * Serialization schema that serializes an object of Flink Table/SQL internal data structure {@link + * RowData} into a Canal JSON bytes. + * Different from flink:1.13.5.This can write metadata. + * + * @see Alibaba Canal + */ +public class CanalJsonEnhancedSerializationSchema implements SerializationSchema { + + private static final long serialVersionUID = 1L; + + private static final StringData OP_INSERT = StringData.fromString("INSERT"); + private static final StringData OP_DELETE = StringData.fromString("DELETE"); + /** + * The serializer to serialize Canal JSON data. + */ + private final JsonRowDataSerializationSchema jsonSerializer; + private final RowData.FieldGetter[] physicalFieldGetter; + private final RowData.FieldGetter[] wirteableMetadataFieldGetter; + /** + * row schema that json serializer can parse output row to json format + */ + private final RowType jsonRowType; + /** + * The index in writeableMetadata of {@link WriteableMetadata#TYPE} + */ + private final int typeIndex; + private transient GenericRowData reuse; + + /** + * Constructor of CanalJsonEnhancedSerializationSchema. + */ + public CanalJsonEnhancedSerializationSchema( + DataType physicalDataType, + List writeableMetadata, + TimestampFormat timestampFormat, + JsonFormatOptions.MapNullKeyMode mapNullKeyMode, + String mapNullKeyLiteral, + boolean encodeDecimalAsPlainNumber) { + final List physicalChildren = physicalDataType.getLogicalType().getChildren(); + this.jsonRowType = createJsonRowType(physicalDataType, writeableMetadata); + typeIndex = writeableMetadata.indexOf(WriteableMetadata.TYPE); + this.physicalFieldGetter = IntStream.range(0, physicalChildren.size()) + .mapToObj(targetField -> RowData.createFieldGetter(physicalChildren.get(targetField), targetField)) + .toArray(RowData.FieldGetter[]::new); + this.wirteableMetadataFieldGetter = + IntStream.range(physicalChildren.size(), physicalChildren.size() + writeableMetadata.size()) + .mapToObj(targetField -> new RowData.FieldGetter() { + + @Nullable + @Override + public Object getFieldOrNull(RowData row) { + WriteableMetadata curWriteableMetadata = writeableMetadata + .get(targetField - physicalChildren.size()); + return curWriteableMetadata.converter.convert(row, targetField); + } + }).toArray(RowData.FieldGetter[]::new); + + this.jsonSerializer = + new JsonRowDataSerializationSchema( + jsonRowType, + timestampFormat, + mapNullKeyMode, + mapNullKeyLiteral, + encodeDecimalAsPlainNumber); + } + + private static RowType createJsonRowType(DataType physicalDataType, List writeableMetadata) { + // Canal JSON contains other information, e.g. "database", "ts" + // but we don't need them + // and we don't need "old" , because can not support UPDATE_BEFORE,UPDATE_AFTER + DataType root = + DataTypes.ROW( + DataTypes.FIELD("data", DataTypes.ARRAY(physicalDataType)), + WriteableMetadata.TYPE.requiredJsonField); + // append fields that are required for reading metadata in the root + final List metadataFields = + writeableMetadata.stream().filter(m -> m != WriteableMetadata.TYPE) + .map(m -> m.requiredJsonField) + .distinct() + .collect(Collectors.toList()); + return (RowType) DataTypeUtils.appendRowFields(root, metadataFields).getLogicalType(); + } + + /** + * Init for this serialization + * In this method, it initializes {@link this#reuse}, the size of the {@link this#reuse} will be + * length of physicalFields add the length of metadata fields.Here we put the physical field into a array whose key + * is 'data', and put it in the zeroth element of the {@link this#reuse}, and put the {@link WriteableMetadata#TYPE} + * in the first element of the {@link this#reuse},so when the metadata field does not contain + * {@link WriteableMetadata#TYPE}, it's size is two + the number of metadata fields, when included, it's size is + * one + the number of metadata fields + * + * @param context The context used for initialization + */ + @Override + public void open(InitializationContext context) { + int size = 2 + wirteableMetadataFieldGetter.length; + if (typeIndex != -1) { + size--; + } + reuse = new GenericRowData(size); + } + + /** + * Serialize the row with ignore the {@link WriteableMetadata#TYPE} + */ + @Override + public byte[] serialize(RowData row) { + try { + // physical data injection + GenericRowData physicalData = new GenericRowData(physicalFieldGetter.length); + IntStream.range(0, physicalFieldGetter.length) + .forEach(targetField -> physicalData.setField(targetField, + physicalFieldGetter[targetField].getFieldOrNull(row))); + ArrayData arrayData = new GenericArrayData(new RowData[]{physicalData}); + reuse.setField(0, arrayData); + + // mete data injection + StringData opType = rowKind2String(row.getRowKind()); + reuse.setField(1, opType); + if (typeIndex != -1) { + IntStream.range(0, wirteableMetadataFieldGetter.length) + .forEach(metaIndex -> { + if (metaIndex < typeIndex) { + reuse.setField(metaIndex + 2, + wirteableMetadataFieldGetter[metaIndex].getFieldOrNull(row)); + } else if (metaIndex > typeIndex) { + reuse.setField(metaIndex + 1, + wirteableMetadataFieldGetter[metaIndex].getFieldOrNull(row)); + } + }); + } else { + IntStream.range(0, wirteableMetadataFieldGetter.length) + .forEach(metaIndex -> reuse + .setField(metaIndex + 2, wirteableMetadataFieldGetter[metaIndex].getFieldOrNull(row))); + } + return jsonSerializer.serialize(reuse); + } catch (Throwable t) { + throw new RuntimeException("Could not serialize row '" + row + "'.", t); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CanalJsonEnhancedSerializationSchema that = (CanalJsonEnhancedSerializationSchema) o; + return Objects.equals(jsonSerializer, that.jsonSerializer); + } + + @Override + public int hashCode() { + return Objects.hash(jsonSerializer); + } + + private StringData rowKind2String(RowKind rowKind) { + switch (rowKind) { + case INSERT: + case UPDATE_AFTER: + return OP_INSERT; + case UPDATE_BEFORE: + case DELETE: + return OP_DELETE; + default: + throw new UnsupportedOperationException( + "Unsupported operation '" + rowKind + "' for row kind."); + } + } + + // -------------------------------------------------------------------------------------------- + + /** + * Converter that load a metadata field from the row that comes out of the input RowData. + * Finally all metadata field will splice into a GenericRowData, then json Serializer serialize it into json string. + */ + interface MetadataConverter extends Serializable { + + Object convert(RowData inputRow, int pos); + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonSerializationSchema.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonSerializationSchema.java new file mode 100644 index 00000000000..0cb92e77cf9 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalJsonSerializationSchema.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.MysqlBinLogData; +import org.apache.inlong.sort.formats.json.canal.CanalJsonDecodingFormat.ReadableMetadata; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.formats.json.JsonRowSerializationSchema; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.DataTypes.Field; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.runtime.types.TypeInfoDataTypeConverter; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.Row; +import org.apache.flink.types.RowKind; + +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType; + +/** + * Copied from apache flink project with a litter change. + * + * Serialization schema that serializes an object of Flink Table/SQL internal data structure {@link + * RowData} into a Canal JSON bytes. + * + * @see Alibaba Canal + */ +public class CanalJsonSerializationSchema implements SerializationSchema { + + private static final long serialVersionUID = 1L; + + private static final String OP_INSERT = "INSERT"; + private static final String OP_DELETE = "DELETE"; + + private transient Row reuse; + + private final JsonRowSerializationSchema jsonSerializer; + + private final Map fieldIndexToMetadata; + + private final boolean isMigrateAll; + + private final ObjectMapper objectMapper; + + public CanalJsonSerializationSchema( + RowType physicalRowType, + Map fieldIndexToMetadata, + boolean isMigrateAll) { + this.isMigrateAll = isMigrateAll; + + if (isMigrateAll) { + this.objectMapper = new ObjectMapper(); + } else { + this.objectMapper = null; + } + + RowTypeInfo rowTypeInfo = createJsonRowType(fromLogicalToDataType(physicalRowType), + fieldIndexToMetadata.values(), isMigrateAll); + jsonSerializer = JsonRowSerializationSchema.builder().withTypeInfo(rowTypeInfo).build(); + + this.fieldIndexToMetadata = fieldIndexToMetadata; + } + + @Override + public void open(InitializationContext context) { + reuse = new Row(2 + fieldIndexToMetadata.size()); + } + + @Override + public byte[] serialize(Row row) { + try { + MysqlBinLogData mysqlBinLogData = getMysqlBinLongData(row); + + Object[] arrayData = new Object[1]; + if (isMigrateAll) { + String mapStr = mysqlBinLogData.getPhysicalData().getFieldAs(0); + arrayData[0] = convertStringToMap(mapStr); + } else { + arrayData[0] = mysqlBinLogData.getPhysicalData(); + } + reuse.setField(0, arrayData); + reuse.setField(1, rowKind2String(row.getKind())); + + // Set metadata + Map metadataMap = mysqlBinLogData.getMetadataMap(); + int index = 2; + for (ReadableMetadata readableMetadata : fieldIndexToMetadata.values()) { + reuse.setField(index, metadataMap.get(readableMetadata.key)); + index++; + } + + return jsonSerializer.serialize(reuse); + } catch (Throwable t) { + throw new RuntimeException("Could not serialize row '" + row + "'.", t); + } + } + + public static String rowKind2String(RowKind rowKind) { + switch (rowKind) { + case INSERT: + case UPDATE_AFTER: + return OP_INSERT; + case UPDATE_BEFORE: + case DELETE: + return OP_DELETE; + default: + throw new UnsupportedOperationException( + "Unsupported operation '" + rowKind + "' for row kind."); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CanalJsonSerializationSchema that = (CanalJsonSerializationSchema) o; + return Objects.equals(jsonSerializer, that.jsonSerializer); + } + + @Override + public int hashCode() { + return Objects.hash(jsonSerializer); + } + + private static RowTypeInfo createJsonRowType( + DataType dataSchema, + Collection metadataSet, + boolean isMigrateAll) { + DataType root = DataTypes.ROW( + DataTypes.FIELD("data", DataTypes.ARRAY( + isMigrateAll ? DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING()) : dataSchema)), + DataTypes.FIELD("type", DataTypes.STRING())); + + final List metadataFields = + metadataSet.stream() + .map(m -> m.requiredJsonField) + .distinct() + .collect(Collectors.toList()); + + return (RowTypeInfo) TypeInfoDataTypeConverter.fromDataTypeToTypeInfo( + DataTypeUtils.appendRowFields(root, metadataFields)); + } + + private MysqlBinLogData getMysqlBinLongData(Row consumedRow) { + int consumedRowArity = consumedRow.getArity(); + Set metadataIndices = fieldIndexToMetadata.keySet(); + + Row physicalRow = new Row(consumedRowArity - metadataIndices.size()); + Map metadataMap = new HashMap<>(); + int physicalRowDataIndex = 0; + for (int i = 0; i < consumedRowArity; i++) { + if (!metadataIndices.contains(i)) { + physicalRow.setField(physicalRowDataIndex, consumedRow.getField(i)); + physicalRowDataIndex++; + } else { + metadataMap.put(fieldIndexToMetadata.get(i).key, consumedRow.getField(i)); + } + } + + physicalRow.setKind(consumedRow.getKind()); + + return new MysqlBinLogData(physicalRow, metadataMap); + } + + private Map convertStringToMap(String input) throws JsonProcessingException { + return objectMapper.readValue(input, Map.class); + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalUtils.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalUtils.java new file mode 100644 index 00000000000..1a3f3d4a52a --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/canal/CanalUtils.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.MysqlBinLogData; +import org.apache.inlong.sort.formats.json.canal.CanalJsonDecodingFormat.ReadableMetadata; + +public class CanalUtils { + + public static String getMysqlMetadataKey(ReadableMetadata readableMetadata) { + switch (readableMetadata) { + case DATABASE: + return MysqlBinLogData.MYSQL_METADATA_DATABASE; + case TABLE: + return MysqlBinLogData.MYSQL_METADATA_TABLE; + case IS_DDL: + return MysqlBinLogData.MYSQL_METADATA_IS_DDL; + case EVENT_TIMESTAMP: + return MysqlBinLogData.MYSQL_METADATA_EVENT_TIME; + default: + throw new IllegalArgumentException("Not supported yet"); + } + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJson.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJson.java new file mode 100644 index 00000000000..d479db4202e --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJson.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.debezium; + +import org.apache.inlong.sort.protocol.ddl.operations.Operation; + +import io.debezium.relational.history.TableChanges; +import io.debezium.relational.history.TableChanges.TableChange; +import lombok.Builder; +import lombok.Data; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude.Include; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonTypeName; + +import java.util.List; +import java.util.Map; + +@Builder +@JsonTypeName("canalJson") +@JsonInclude(Include.NON_NULL) +@Data +public class DebeziumJson { + + @JsonProperty("before") + private Map before; + @JsonProperty("after") + private Map after; + @JsonProperty("source") + private Source source; + @JsonProperty("tableChange") + private TableChanges.TableChange tableChange; + @JsonProperty("tsMs") + private long tsMs; + @JsonProperty("op") + private String op; + @JsonProperty("incremental") + private boolean incremental; + @JsonProperty("ddl") + private String ddl; + @JsonProperty("operation") + private Operation operation; + @JsonProperty("dataSourceName") + private String dataSourceName; + + public DebeziumJson(@JsonProperty("before") Map before, + @JsonProperty("after") Map after, + @JsonProperty("source") Source source, + @JsonProperty("tableChange") TableChange tableChange, + @JsonProperty("tsMs") long tsMs, @JsonProperty("op") String op, + @JsonProperty("incremental") boolean incremental, + @JsonProperty("ddl") String ddl, + @JsonProperty("operation") Operation operation, + @JsonProperty("dataSourceName") String dataSourceName) { + this.before = before; + this.after = after; + this.source = source; + this.tableChange = tableChange; + this.tsMs = tsMs; + this.op = op; + this.incremental = incremental; + this.ddl = ddl; + this.operation = operation; + this.dataSourceName = dataSourceName; + } + + @Builder + @Data + public static class Source { + + private String name; + private String db; + private String table; + private List pkNames; + private Map sqlType; + private Map mysqlType; + } + +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJsonDecodingFormat.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJsonDecodingFormat.java new file mode 100644 index 00000000000..ade3eac634a --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJsonDecodingFormat.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.debezium; + +import org.apache.inlong.sort.formats.json.debezium.DebeziumJsonDeserializationSchema.MetadataConverter; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Copied from apache flink project with a litter change. + * + * {@link DecodingFormat} for Debezium using JSON encoding. + **/ +public class DebeziumJsonDecodingFormat implements DecodingFormat> { + + // -------------------------------------------------------------------------------------------- + // Mutable attributes + // -------------------------------------------------------------------------------------------- + + private List metadataKeys; + + // -------------------------------------------------------------------------------------------- + // Debezium-specific attributes + // -------------------------------------------------------------------------------------------- + + private final boolean schemaInclude; + + private final boolean updateBeforeInclude; + + private final boolean ignoreParseErrors; + + private final TimestampFormat timestampFormat; + + private final boolean isMigrateAll; + + public DebeziumJsonDecodingFormat( + boolean schemaInclude, + boolean updateBeforeInclude, + boolean ignoreParseErrors, + TimestampFormat timestampFormat, + boolean isMigrateAll) { + this.schemaInclude = schemaInclude; + this.updateBeforeInclude = updateBeforeInclude; + this.ignoreParseErrors = ignoreParseErrors; + this.timestampFormat = timestampFormat; + this.metadataKeys = Collections.emptyList(); + this.isMigrateAll = isMigrateAll; + } + + @Override + public DeserializationSchema createRuntimeDecoder( + DynamicTableSource.Context context, DataType physicalDataType) { + + final List readableMetadata = + metadataKeys.stream() + .map( + k -> Stream.of(ReadableMetadata.values()) + .filter(rm -> rm.key.equals(k)) + .findFirst() + .orElseThrow(IllegalStateException::new)) + .collect(Collectors.toList()); + + final List metadataFields = + readableMetadata.stream() + .map(m -> DataTypes.FIELD(m.key, m.dataType)) + .collect(Collectors.toList()); + + final DataType producedDataType = + DataTypeUtils.appendRowFields(physicalDataType, metadataFields); + + final TypeInformation producedTypeInfo = + context.createTypeInformation(producedDataType); + + return new DebeziumJsonDeserializationSchema( + physicalDataType, + readableMetadata, + producedTypeInfo, + schemaInclude, + updateBeforeInclude, + ignoreParseErrors, + timestampFormat, + isMigrateAll); + } + + @Override + public Map listReadableMetadata() { + final Map metadataMap = new LinkedHashMap<>(); + Stream.of(ReadableMetadata.values()) + .forEachOrdered(m -> metadataMap.put(m.key, m.dataType)); + return metadataMap; + } + + @Override + public void applyReadableMetadata(List metadataKeys) { + this.metadataKeys = metadataKeys; + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.newBuilder() + .addContainedKind(RowKind.INSERT) + .addContainedKind(RowKind.UPDATE_BEFORE) + .addContainedKind(RowKind.UPDATE_AFTER) + .addContainedKind(RowKind.DELETE) + .build(); + } + + // -------------------------------------------------------------------------------------------- + // Metadata handling + // -------------------------------------------------------------------------------------------- + + /** + * List of metadata that can be read with this format. + */ + public enum ReadableMetadata { + + SCHEMA( + "schema", + DataTypes.STRING().nullable(), + false, + DataTypes.FIELD("schema", DataTypes.STRING()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getString(pos); + } + }), + + INGESTION_TIMESTAMP( + "ingestion-timestamp", + DataTypes.BIGINT().nullable(), + true, + DataTypes.FIELD("ts_ms", DataTypes.BIGINT()), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getLong(pos); + } + }), + + SOURCE_TIMESTAMP( + "source.timestamp", + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(3).nullable(), + true, + DataTypes.FIELD("source", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + final StringData timestamp = + (StringData) readProperty(row, pos, KEY_SOURCE_TIMESTAMP); + if (timestamp == null) { + return null; + } + return TimestampData.fromEpochMillis(Long.parseLong(timestamp.toString())); + } + }), + + SOURCE_DATABASE( + "source.database", + DataTypes.STRING().nullable(), + true, + DataTypes.FIELD("source", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return readProperty(row, pos, KEY_SOURCE_DATABASE); + } + }), + + SOURCE_SCHEMA( + "source.schema", + DataTypes.STRING().nullable(), + true, + DataTypes.FIELD("source", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return readProperty(row, pos, KEY_SOURCE_SCHEMA); + } + }), + + SOURCE_TABLE( + "source.table", + DataTypes.STRING().nullable(), + true, + DataTypes.FIELD("source", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return readProperty(row, pos, KEY_SOURCE_TABLE); + } + }), + + SOURCE_PROPERTIES( + "source.properties", + // key and value of the map are nullable to make handling easier in queries + DataTypes.MAP(DataTypes.STRING().nullable(), DataTypes.STRING().nullable()) + .nullable(), + true, + DataTypes.FIELD("source", DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData row, int pos) { + return row.getMap(pos); + } + }); + + final String key; + + final DataType dataType; + + final boolean isJsonPayload; + + final DataTypes.Field requiredJsonField; + + final MetadataConverter converter; + + ReadableMetadata( + String key, + DataType dataType, + boolean isJsonPayload, + DataTypes.Field requiredJsonField, + MetadataConverter converter) { + this.key = key; + this.dataType = dataType; + this.isJsonPayload = isJsonPayload; + this.requiredJsonField = requiredJsonField; + this.converter = converter; + } + + public String getKey() { + return key; + } + } + + private static final StringData KEY_SOURCE_TIMESTAMP = StringData.fromString("ts_ms"); + + private static final StringData KEY_SOURCE_DATABASE = StringData.fromString("db"); + + private static final StringData KEY_SOURCE_SCHEMA = StringData.fromString("schema"); + + private static final StringData KEY_SOURCE_TABLE = StringData.fromString("table"); + + private static Object readProperty(GenericRowData row, int pos, StringData key) { + final GenericMapData map = (GenericMapData) row.getMap(pos); + if (map == null) { + return null; + } + return map.get(key); + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJsonDeserializationSchema.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJsonDeserializationSchema.java new file mode 100644 index 00000000000..a53d6c99654 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumJsonDeserializationSchema.java @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.debezium; + +import org.apache.inlong.sort.formats.json.MysqlBinLogData; +import org.apache.inlong.sort.formats.json.debezium.DebeziumJsonDecodingFormat.ReadableMetadata; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonRowDataDeserializationSchema; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.Collector; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static java.lang.String.format; +import static org.apache.inlong.sort.formats.json.debezium.DebeziumUtils.getMysqlMetadataKey; + +/** + * Copied from apache flink project with a litter change. + * + * Deserialization schema from Debezium JSON to Flink Table/SQL internal data structure {@link + * RowData}. The deserialization schema knows Debezium's schema definition and can extract the + * database data and convert into {@link RowData} with {@link RowKind}. + * + *

Deserializes a byte[] message as a JSON object and reads the specified fields.

+ * + *

Failures during deserialization are forwarded as wrapped IOExceptions.

+ * + * @see Debezium + */ +@Internal +public final class DebeziumJsonDeserializationSchema implements DeserializationSchema { + + private static final long serialVersionUID = 1L; + + private static final String OP_READ = "r"; // snapshot read + private static final String OP_CREATE = "c"; // insert + private static final String OP_UPDATE = "u"; // update + private static final String OP_DELETE = "d"; // delete + + private static final int BEFORE_POS = 0; + private static final int AFTER_POS = 1; + private static final int OP_POS = 2; + + private static final String REPLICA_IDENTITY_EXCEPTION = + "The \"before\" field of %s message is null, " + + "if you are using Debezium Postgres Connector, " + + "please check the Postgres table has been set REPLICA IDENTITY to FULL level."; + + /** The deserializer to deserialize Debezium JSON data. */ + private final JsonRowDataDeserializationSchema jsonDeserializer; + + /** Flag that indicates that an additional projection is required for metadata. */ + private final boolean hasMetadata; + + /** Metadata to be extracted for every record. */ + private final MetadataConverter[] metadataConverters; + + private final List requestedMetadata; + + /** {@link TypeInformation} of the produced {@link RowData} (physical + meta data). */ + private final TypeInformation producedTypeInfo; + + /** + * Flag indicating whether the Debezium JSON data contains schema part or not. When Debezium + * Kafka Connect enables "value.converter.schemas.enable", the JSON will contain "schema" + * information, but we just ignore "schema" and extract data from "payload". + */ + private final boolean schemaInclude; + + /** + * Flag indicating whether to emit update before row. + */ + private final boolean updateBeforeInclude; + + /** Flag indicating whether to ignore invalid fields/rows (default: throw an exception). */ + private final boolean ignoreParseErrors; + + private final boolean isMigrateAll; + + /** + * Constructor of DebeziumJsonDeserializationSchema. + */ + public DebeziumJsonDeserializationSchema( + DataType physicalDataType, + List requestedMetadata, + TypeInformation producedTypeInfo, + boolean schemaInclude, + boolean updateBeforeInclude, + boolean ignoreParseErrors, + TimestampFormat timestampFormat, + boolean isMigrateAll) { + this.isMigrateAll = isMigrateAll; + final RowType jsonRowType = + createJsonRowType(physicalDataType, requestedMetadata, schemaInclude, isMigrateAll); + this.jsonDeserializer = + new JsonRowDataDeserializationSchema( + jsonRowType, + // the result type is never used, so it's fine to pass in the produced type + // info + producedTypeInfo, + false, // ignoreParseErrors already contains the functionality of + // failOnMissingField + ignoreParseErrors, + timestampFormat); + this.hasMetadata = requestedMetadata.size() > 0; + this.metadataConverters = + createMetadataConverters(jsonRowType, requestedMetadata, schemaInclude); + this.requestedMetadata = requestedMetadata; + this.producedTypeInfo = producedTypeInfo; + this.schemaInclude = schemaInclude; + this.updateBeforeInclude = updateBeforeInclude; + this.ignoreParseErrors = ignoreParseErrors; + } + + @Override + public RowData deserialize(byte[] message) { + throw new RuntimeException( + "Please invoke DeserializationSchema#deserialize(byte[], Collector) instead."); + } + + @Override + public void deserialize(byte[] message, Collector out) throws IOException { + if (message == null || message.length == 0) { + // skip tombstone messages + return; + } + try { + GenericRowData row = (GenericRowData) jsonDeserializer.deserialize(message); + GenericRowData payload; + if (schemaInclude) { + payload = (GenericRowData) row.getField(0); + } else { + payload = row; + } + + GenericRowData before; + GenericRowData after; + if (isMigrateAll) { + before = GenericRowData.of(payload.getField(BEFORE_POS)); + after = GenericRowData.of(payload.getField(AFTER_POS)); + } else { + before = (GenericRowData) payload.getField(BEFORE_POS); + after = (GenericRowData) payload.getField(AFTER_POS); + } + + String op = payload.getField(OP_POS).toString(); + if (OP_CREATE.equals(op) || OP_READ.equals(op)) { + after.setRowKind(RowKind.INSERT); + emitRow(row, after, out); + } else if (OP_UPDATE.equals(op)) { + if (before == null) { + throw new IllegalStateException( + String.format(REPLICA_IDENTITY_EXCEPTION, "UPDATE")); + } + before.setRowKind(RowKind.UPDATE_BEFORE); + after.setRowKind(RowKind.UPDATE_AFTER); + if (updateBeforeInclude) { + emitRow(row, before, out); + } + emitRow(row, after, out); + } else if (OP_DELETE.equals(op)) { + if (before == null) { + throw new IllegalStateException( + String.format(REPLICA_IDENTITY_EXCEPTION, "DELETE")); + } + before.setRowKind(RowKind.DELETE); + emitRow(row, before, out); + } else { + if (!ignoreParseErrors) { + throw new IOException( + format( + "Unknown \"op\" value \"%s\". The Debezium JSON message is '%s'", + op, new String(message))); + } + } + } catch (Throwable t) { + // a big try catch to protect the processing. + if (!ignoreParseErrors) { + throw new IOException( + format("Corrupt Debezium JSON message '%s'.", new String(message)), t); + } + } + } + + private void emitRow(GenericRowData rootRow, GenericRowData physicalRow, Collector out) { + int physicalArity = physicalRow.getArity(); + if (isMigrateAll) { + physicalArity = 0; + } + final int metadataArity = metadataConverters.length; + + final GenericRowData producedRow = new GenericRowData(physicalRow.getRowKind(), physicalArity + 1); + + for (int physicalPos = 0; physicalPos < physicalArity; physicalPos++) { + producedRow.setField(physicalPos + 1, physicalRow.getField(physicalPos)); + } + + // Put metadata in the first field of the emitted RowData + Map metadataMap = new HashMap<>(); + metadataMap.put( + StringData.fromString(MysqlBinLogData.MYSQL_METADATA_IS_DDL), + StringData.fromString("false")); + + for (int metadataPos = 0; metadataPos < metadataArity; metadataPos++) { + metadataMap.put( + StringData.fromString(getMysqlMetadataKey(requestedMetadata.get(metadataPos))), + StringData.fromString(metadataConverters[metadataPos].convert(rootRow).toString())); + } + + if (isMigrateAll) { + metadataMap.put( + StringData.fromString(MysqlBinLogData.MYSQL_METADATA_DATA), + (StringData) physicalRow.getField(0)); + } + + producedRow.setField(0, new GenericMapData(metadataMap)); + + out.collect(producedRow); + } + + @Override + public boolean isEndOfStream(RowData nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + return producedTypeInfo; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DebeziumJsonDeserializationSchema that = (DebeziumJsonDeserializationSchema) o; + return Objects.equals(jsonDeserializer, that.jsonDeserializer) + && hasMetadata == that.hasMetadata + && Objects.equals(producedTypeInfo, that.producedTypeInfo) + && schemaInclude == that.schemaInclude + && ignoreParseErrors == that.ignoreParseErrors; + } + + @Override + public int hashCode() { + return Objects.hash( + jsonDeserializer, hasMetadata, producedTypeInfo, schemaInclude, ignoreParseErrors); + } + + // -------------------------------------------------------------------------------------------- + + private static RowType createJsonRowType( + DataType physicalDataType, + List readableMetadata, + boolean schemaInclude, + boolean isMigrateAll) { + + DataType dataTypeForDataFields = physicalDataType; + if (isMigrateAll) { + dataTypeForDataFields = DataTypes.STRING(); + } + + DataType payload = DataTypes.ROW( + DataTypes.FIELD("before", dataTypeForDataFields), + DataTypes.FIELD("after", dataTypeForDataFields), + DataTypes.FIELD("op", DataTypes.STRING())); + + // append fields that are required for reading metadata in the payload + final List payloadMetadataFields = + readableMetadata.stream() + .filter(m -> m.isJsonPayload) + .map(m -> m.requiredJsonField) + .distinct() + .collect(Collectors.toList()); + payload = DataTypeUtils.appendRowFields(payload, payloadMetadataFields); + + DataType root = payload; + if (schemaInclude) { + // when Debezium Kafka connect enables "value.converter.schemas.enable", + // the JSON will contain "schema" information and we need to extract data from + // "payload". + root = DataTypes.ROW(DataTypes.FIELD("payload", payload)); + } + + // append fields that are required for reading metadata in the root + final List rootMetadataFields = + readableMetadata.stream() + .filter(m -> !m.isJsonPayload) + .map(m -> m.requiredJsonField) + .distinct() + .collect(Collectors.toList()); + root = DataTypeUtils.appendRowFields(root, rootMetadataFields); + + return (RowType) root.getLogicalType(); + } + + private static MetadataConverter[] createMetadataConverters( + RowType jsonRowType, List requestedMetadata, boolean schemaInclude) { + return requestedMetadata.stream() + .map( + m -> { + if (m.isJsonPayload) { + return convertInPayload(jsonRowType, m, schemaInclude); + } else { + return convertInRoot(jsonRowType, m); + } + }) + .toArray(MetadataConverter[]::new); + } + + private static MetadataConverter convertInRoot(RowType jsonRowType, ReadableMetadata metadata) { + final int pos = findFieldPos(metadata, jsonRowType); + return new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData root, int unused) { + return metadata.converter.convert(root, pos); + } + }; + } + + private static MetadataConverter convertInPayload( + RowType jsonRowType, ReadableMetadata metadata, boolean schemaInclude) { + if (schemaInclude) { + final int pos = findFieldPos(metadata, (RowType) jsonRowType.getChildren().get(0)); + return new MetadataConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(GenericRowData root, int unused) { + final GenericRowData payload = (GenericRowData) root.getField(0); + return metadata.converter.convert(payload, pos); + } + }; + } + return convertInRoot(jsonRowType, metadata); + } + + private static int findFieldPos(ReadableMetadata metadata, RowType jsonRowType) { + return jsonRowType.getFieldNames().indexOf(metadata.requiredJsonField.getName()); + } + + // -------------------------------------------------------------------------------------------- + + /** + * Converter that extracts a metadata field from the row (root or payload) that comes out of the + * JSON schema and converts it to the desired data type. + */ + interface MetadataConverter extends Serializable { + + // Method for top-level access. + default Object convert(GenericRowData row) { + return convert(row, -1); + } + + Object convert(GenericRowData row, int pos); + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumUtils.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumUtils.java new file mode 100644 index 00000000000..f95a3bef4ac --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/debezium/DebeziumUtils.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.debezium; + +import org.apache.inlong.sort.formats.json.MysqlBinLogData; +import org.apache.inlong.sort.formats.json.debezium.DebeziumJsonDecodingFormat.ReadableMetadata; + +public class DebeziumUtils { + + public static String getMysqlMetadataKey(ReadableMetadata readableMetadata) { + switch (readableMetadata) { + case SOURCE_DATABASE: + return MysqlBinLogData.MYSQL_METADATA_DATABASE; + case SOURCE_TABLE: + return MysqlBinLogData.MYSQL_METADATA_TABLE; + case INGESTION_TIMESTAMP: + return MysqlBinLogData.MYSQL_METADATA_EVENT_TIME; + default: + throw new IllegalArgumentException("Not supported yet"); + } + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/utils/FormatJsonUtil.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/utils/FormatJsonUtil.java new file mode 100644 index 00000000000..8223129964b --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/java/org/apache/inlong/sort/formats/json/utils/FormatJsonUtil.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.utils; + +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonFormatOptions.MapNullKeyMode; +import org.apache.flink.formats.json.RowDataToJsonConverters; +import org.apache.flink.formats.json.RowDataToJsonConverters.RowDataToJsonConverter; +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BinaryType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.CharType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LocalZonedTimestampType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TimeType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarBinaryType; +import org.apache.flink.table.types.logical.VarCharType; + +import java.util.Map; + +import static org.apache.inlong.sort.protocol.constant.DataTypeConstants.DEFAULT_DECIMAL_PRECISION; +import static org.apache.inlong.sort.protocol.constant.DataTypeConstants.DEFAULT_DECIMAL_SCALE; +import static org.apache.inlong.sort.protocol.constant.DataTypeConstants.ORACLE_TIMESTAMP_TIME_ZONE; + +public class FormatJsonUtil { + + public static final Map SQL_TYPE_2_FLINK_TYPE_MAPPING = + ImmutableMap.builder() + .put(java.sql.Types.CHAR, new CharType()) + .put(java.sql.Types.VARCHAR, new VarCharType()) + .put(java.sql.Types.SMALLINT, new SmallIntType()) + .put(java.sql.Types.INTEGER, new IntType()) + .put(java.sql.Types.BIGINT, new BigIntType()) + .put(java.sql.Types.REAL, new FloatType()) + .put(java.sql.Types.DOUBLE, new DoubleType()) + .put(java.sql.Types.FLOAT, new FloatType()) + .put(java.sql.Types.DECIMAL, new DecimalType(DEFAULT_DECIMAL_PRECISION, DEFAULT_DECIMAL_SCALE)) + .put(java.sql.Types.NUMERIC, new DecimalType(DEFAULT_DECIMAL_PRECISION, DEFAULT_DECIMAL_SCALE)) + .put(java.sql.Types.BIT, new BooleanType()) + .put(java.sql.Types.TIME, new TimeType()) + .put(java.sql.Types.TIME_WITH_TIMEZONE, new TimeType()) + .put(java.sql.Types.TIMESTAMP_WITH_TIMEZONE, new LocalZonedTimestampType()) + .put(ORACLE_TIMESTAMP_TIME_ZONE, new LocalZonedTimestampType()) + .put(java.sql.Types.TIMESTAMP, new TimestampType()) + .put(java.sql.Types.BINARY, new BinaryType()) + .put(java.sql.Types.VARBINARY, new VarBinaryType()) + .put(java.sql.Types.BLOB, new VarBinaryType()) + .put(java.sql.Types.CLOB, new VarBinaryType()) + .put(java.sql.Types.DATE, new DateType()) + .put(java.sql.Types.BOOLEAN, new BooleanType()) + .put(java.sql.Types.LONGNVARCHAR, new VarCharType()) + .put(java.sql.Types.LONGVARBINARY, new VarCharType()) + .put(java.sql.Types.LONGVARCHAR, new VarCharType()) + .put(java.sql.Types.ARRAY, new VarCharType()) + .put(java.sql.Types.NCHAR, new CharType()) + .put(java.sql.Types.NCLOB, new VarBinaryType()) + .put(java.sql.Types.TINYINT, new TinyIntType()) + .put(java.sql.Types.OTHER, new VarCharType()) + .build(); + public static final Map SQL_TYPE_2_SPARK_SUPPORTED_FLINK_TYPE_MAPPING = + ImmutableMap.builder() + .put(java.sql.Types.CHAR, new CharType()) + .put(java.sql.Types.VARCHAR, new VarCharType()) + .put(java.sql.Types.SMALLINT, new SmallIntType()) + .put(java.sql.Types.INTEGER, new IntType()) + .put(java.sql.Types.BIGINT, new BigIntType()) + .put(java.sql.Types.REAL, new FloatType()) + .put(java.sql.Types.DOUBLE, new DoubleType()) + .put(java.sql.Types.FLOAT, new FloatType()) + .put(java.sql.Types.DECIMAL, new DecimalType(DEFAULT_DECIMAL_PRECISION, DEFAULT_DECIMAL_SCALE)) + .put(java.sql.Types.NUMERIC, new DecimalType(DEFAULT_DECIMAL_PRECISION, DEFAULT_DECIMAL_SCALE)) + .put(java.sql.Types.BIT, new BooleanType()) + .put(java.sql.Types.TIME, new VarCharType()) + .put(java.sql.Types.TIMESTAMP_WITH_TIMEZONE, new LocalZonedTimestampType()) + .put(ORACLE_TIMESTAMP_TIME_ZONE, new LocalZonedTimestampType()) + .put(java.sql.Types.TIMESTAMP, new LocalZonedTimestampType()) + .put(java.sql.Types.BINARY, new BinaryType()) + .put(java.sql.Types.VARBINARY, new VarBinaryType()) + .put(java.sql.Types.BLOB, new VarBinaryType()) + .put(java.sql.Types.DATE, new DateType()) + .put(java.sql.Types.BOOLEAN, new BooleanType()) + .put(java.sql.Types.LONGNVARCHAR, new VarCharType()) + .put(java.sql.Types.LONGVARBINARY, new VarCharType()) + .put(java.sql.Types.LONGVARCHAR, new VarCharType()) + .put(java.sql.Types.ARRAY, new VarCharType()) + .put(java.sql.Types.NCHAR, new CharType()) + .put(java.sql.Types.NCLOB, new VarBinaryType()) + .put(java.sql.Types.TINYINT, new TinyIntType()) + .put(java.sql.Types.OTHER, new VarCharType()) + .build(); + public static final Map DEBEZIUM_TYPE_2_FLINK_TYPE_MAPPING = + ImmutableMap.builder() + .put("BOOLEAN", new BooleanType()) + .put("INT8", new TinyIntType()) + .put("INT16", new SmallIntType()) + .put("INT32", new IntType()) + .put("INT64", new BigIntType()) + .put("FLOAT32", new FloatType()) + .put("FLOAT64", new DoubleType()) + .put("STRING", new VarCharType()) + .put("BYTES", new VarBinaryType()) + .build(); + + public static RowDataToJsonConverter rowDataToJsonConverter(DataType physicalRowDataType) { + return rowDataToJsonConverter(TimestampFormat.SQL, null, physicalRowDataType); + } + + public static RowDataToJsonConverter rowDataToJsonConverter(TimestampFormat timestampFormat, + String mapNullKeyLiteral, + DataType physicalRowDataType) { + return rowDataToJsonConverter(timestampFormat, MapNullKeyMode.DROP, mapNullKeyLiteral, physicalRowDataType); + } + + public static RowDataToJsonConverter rowDataToJsonConverter(TimestampFormat timestampFormat, + MapNullKeyMode mapNullKeyMode, + String mapNullKeyLiteral, DataType physicalRowDataType) { + return new RowDataToJsonConverters(timestampFormat, mapNullKeyMode, mapNullKeyLiteral) + .createConverter(physicalRowDataType.getLogicalType()); + } + + public static RowDataToJsonConverter rowDataToJsonConverter(LogicalType rowType) { + return rowDataToJsonConverter(TimestampFormat.SQL, null, rowType); + } + + public static RowDataToJsonConverter rowDataToJsonConverter(TimestampFormat timestampFormat, + String mapNullKeyLiteral, + LogicalType rowType) { + return rowDataToJsonConverter(timestampFormat, MapNullKeyMode.DROP, mapNullKeyLiteral, rowType); + } + + public static RowDataToJsonConverter rowDataToJsonConverter(TimestampFormat timestampFormat, + MapNullKeyMode mapNullKeyMode, + String mapNullKeyLiteral, LogicalType rowType) { + return new RowDataToJsonConverters(timestampFormat, mapNullKeyMode, mapNullKeyLiteral) + .createConverter(rowType); + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 00000000000..7ce31e31ed4 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedFormatFactory diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedFormatFactoryTest.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedFormatFactoryTest.java new file mode 100644 index 00000000000..fe54e39e1ab --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedFormatFactoryTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonFormatOptions; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.TestDynamicTableFactory; +import org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext; +import org.apache.flink.table.runtime.connector.source.ScanRuntimeProviderContext; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Consumer; + +import static org.apache.flink.table.factories.utils.FactoryMocks.PHYSICAL_DATA_TYPE; +import static org.apache.flink.table.factories.utils.FactoryMocks.PHYSICAL_TYPE; +import static org.apache.flink.table.factories.utils.FactoryMocks.SCHEMA; +import static org.apache.flink.table.factories.utils.FactoryMocks.createTableSink; +import static org.apache.flink.table.factories.utils.FactoryMocks.createTableSource; +import static org.junit.Assert.assertEquals; + +public class CanalJsonEnhancedFormatFactoryTest { + + private static final InternalTypeInfo ROW_TYPE_INFO = + InternalTypeInfo.of(PHYSICAL_TYPE); + + @Test + public void testUserDefinedOptions() { + final Map tableOptions = + getModifiedOptions(opts -> { + opts.put("canal-json-inlong.map-null-key.mode", "LITERAL"); + opts.put("canal-json-inlong.map-null-key.literal", "nullKey"); + opts.put("canal-json-inlong.ignore-parse-errors", "true"); + opts.put("canal-json-inlong.timestamp-format.standard", "ISO-8601"); + opts.put("canal-json-inlong.database.include", "mydb"); + opts.put("canal-json-inlong.table.include", "mytable"); + opts.put("canal-json-inlong.map-null-key.mode", "LITERAL"); + opts.put("canal-json-inlong.map-null-key.literal", "nullKey"); + opts.put("canal-json-inlong.encode.decimal-as-plain-number", "true"); + }); + + // test Deser + CanalJsonEnhancedDeserializationSchema expectedDeser = + CanalJsonEnhancedDeserializationSchema.builder( + PHYSICAL_DATA_TYPE, Collections.emptyList(), ROW_TYPE_INFO) + .setIgnoreParseErrors(true) + .setTimestampFormat(TimestampFormat.ISO_8601) + .setDatabase("mydb") + .setTable("mytable") + .build(); + DeserializationSchema actualDeser = createDeserializationSchema(tableOptions); + assertEquals(expectedDeser, actualDeser); + + // test Ser + CanalJsonEnhancedSerializationSchema expectedSer = + new CanalJsonEnhancedSerializationSchema( + PHYSICAL_DATA_TYPE, + new ArrayList<>(), + TimestampFormat.ISO_8601, + JsonFormatOptions.MapNullKeyMode.LITERAL, + "nullKey", + true); + SerializationSchema actualSer = createSerializationSchema(tableOptions); + assertEquals(expectedSer, actualSer); + } + + // ------------------------------------------------------------------------ + // Public Tools + // ------------------------------------------------------------------------ + + public static DeserializationSchema createDeserializationSchema( + Map options) { + DynamicTableSource source = createTableSource(SCHEMA, options); + + assert source instanceof TestDynamicTableFactory.DynamicTableSourceMock; + TestDynamicTableFactory.DynamicTableSourceMock scanSourceMock = + (TestDynamicTableFactory.DynamicTableSourceMock) source; + + return scanSourceMock.valueFormat.createRuntimeDecoder( + ScanRuntimeProviderContext.INSTANCE, PHYSICAL_DATA_TYPE); + } + + public static SerializationSchema createSerializationSchema( + Map options) { + DynamicTableSink sink = createTableSink(SCHEMA, options); + + assert sink instanceof TestDynamicTableFactory.DynamicTableSinkMock; + TestDynamicTableFactory.DynamicTableSinkMock sinkMock = + (TestDynamicTableFactory.DynamicTableSinkMock) sink; + + return sinkMock.valueFormat.createRuntimeEncoder( + new SinkRuntimeProviderContext(false), PHYSICAL_DATA_TYPE); + } + + /** + * Returns the full options modified by the given consumer {@code optionModifier}. + * + * @param optionModifier Consumer to modify the options + */ + public static Map getModifiedOptions(Consumer> optionModifier) { + Map options = getAllOptions(); + optionModifier.accept(options); + return options; + } + + private static Map getAllOptions() { + final Map options = new HashMap<>(); + options.put("connector", TestDynamicTableFactory.IDENTIFIER); + options.put("target", "MyTarget"); + options.put("buffer-size", "1000"); + options.put("format", "canal-json-inlong"); + return options; + } +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedSerDeSchemaTest.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedSerDeSchemaTest.java new file mode 100644 index 00000000000..f730340f4cc --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonEnhancedSerDeSchemaTest.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedDecodingFormat.ReadableMetadata; +import org.apache.inlong.sort.formats.json.canal.CanalJsonEnhancedEncodingFormat.WriteableMetadata; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.formats.common.TimestampFormat; +import org.apache.flink.formats.json.JsonFormatOptions; +import org.apache.flink.formats.json.canal.CanalJsonFormatOptions; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.utils.DataTypeUtils; +import org.apache.flink.util.Collector; +import org.junit.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.Assert.assertEquals; + +public class CanalJsonEnhancedSerDeSchemaTest { + + public static final String DATABASE = "TEST"; + + public static final String TABLE = "TEST"; + + public static final ResolvedSchema SCHEMA = + ResolvedSchema.of( + Column.metadata("database", DataTypes.BOOLEAN(), "database", false), + Column.physical("id", DataTypes.BIGINT()), + Column.physical("name", DataTypes.STRING()), + Column.metadata("table", DataTypes.BOOLEAN(), "table", false), + Column.metadata("sql_type", + DataTypes.MAP(DataTypes.STRING(), DataTypes.INT()), "sql-type", false), + Column.metadata("pk_names", + DataTypes.ARRAY(DataTypes.STRING()), "pk-names", false), + Column.metadata("ingestion_timestamp", + DataTypes.TIMESTAMP_LTZ(3), "ingestion-timestamp", false), + Column.metadata("event_timestamp", + DataTypes.TIMESTAMP_LTZ(3), "event-timestamp", false), + Column.metadata("op_type", DataTypes.STRING(), "op-type", false), + Column.metadata("is_ddl", DataTypes.BOOLEAN(), "is-ddl", false), + Column.metadata("mysql_type", + DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING()), "mysql-type", false), + Column.metadata("batch_id", DataTypes.BIGINT(), "batch-id", false), + Column.metadata("update_before", + DataTypes.ARRAY(DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING())), + "update-before", false)); + + public static final DataType PHYSICAL_DATA_TYPE = SCHEMA.toPhysicalRowDataType(); + + public static final List READABLE_METADATA = + Stream.of( + ReadableMetadata.DATABASE, + ReadableMetadata.TABLE, + ReadableMetadata.SQL_TYPE, + ReadableMetadata.PK_NAMES, + ReadableMetadata.INGESTION_TIMESTAMP, + ReadableMetadata.EVENT_TIMESTAMP, + ReadableMetadata.OP_TYPE, + ReadableMetadata.IS_DDL, + ReadableMetadata.MYSQL_TYPE, + ReadableMetadata.BATCH_ID, + ReadableMetadata.UPDATE_BEFORE).collect(Collectors.toList()); + + public static final List WRITEABLE_METADATA = + Stream.of( + WriteableMetadata.DATABASE, + WriteableMetadata.TABLE, + WriteableMetadata.SQL_TYPE, + WriteableMetadata.PK_NAMES, + WriteableMetadata.INGESTION_TIMESTAMP, + WriteableMetadata.EVENT_TIMESTAMP, + WriteableMetadata.OP_TYPE, + WriteableMetadata.IS_DDL, + WriteableMetadata.MYSQL_TYPE, + WriteableMetadata.BATCH_ID, + WriteableMetadata.UPDATE_BEFORE).collect(Collectors.toList()); + + @Test + public void testSerDeWithMetadata() throws Exception { + List lines = readLines("canal-json-inlong-data.txt"); + DeserializationSchema deserializationSchema = createCanalJsonDeserializationSchema( + PHYSICAL_DATA_TYPE, READABLE_METADATA); + // deserialize + SimpleCollector out = new SimpleCollector(); + for (String line : lines) { + deserializationSchema.deserialize(line.getBytes(StandardCharsets.UTF_8), out); + } + List res = out.result(); + + // serialize + SerializationSchema serializationSchema = createCanalJsonSerializationSchema( + PHYSICAL_DATA_TYPE, WRITEABLE_METADATA); + serializationSchema.open(null); + for (int i = 0; i < lines.size(); i++) { + String json = new String(serializationSchema.serialize(res.get(i)), StandardCharsets.UTF_8); + compareJson(json, lines.get(i)); + } + } + + // =======================================Utils======================================================= + + private CanalJsonEnhancedDeserializationSchema createCanalJsonDeserializationSchema( + DataType physicalDataType, List requestedMetadata) { + final DataType producedDataType = + DataTypeUtils.appendRowFields( + physicalDataType, + requestedMetadata.stream() + .map(m -> DataTypes.FIELD(m.key, m.dataType)) + .collect(Collectors.toList())); + return CanalJsonEnhancedDeserializationSchema.builder( + PHYSICAL_DATA_TYPE, + requestedMetadata, + InternalTypeInfo.of(producedDataType.getLogicalType())) + .setDatabase(DATABASE) + .setTable(TABLE) + .setIgnoreParseErrors(JsonFormatOptions.IGNORE_PARSE_ERRORS.defaultValue()) + .setTimestampFormat(TimestampFormat.valueOf(CanalJsonFormatOptions.TIMESTAMP_FORMAT.defaultValue())) + .build(); + } + + private CanalJsonEnhancedSerializationSchema createCanalJsonSerializationSchema( + DataType physicalDataType, List requestedMetadata) { + return new CanalJsonEnhancedSerializationSchema( + physicalDataType, + requestedMetadata, + TimestampFormat.valueOf(CanalJsonFormatOptions.TIMESTAMP_FORMAT.defaultValue()), + JsonFormatOptions.MapNullKeyMode.valueOf(CanalJsonFormatOptions.JSON_MAP_NULL_KEY_MODE.defaultValue()), + CanalJsonFormatOptions.JSON_MAP_NULL_KEY_LITERAL.defaultValue(), + JsonFormatOptions.ENCODE_DECIMAL_AS_PLAIN_NUMBER.defaultValue()); + } + + private static List readLines(String resource) throws IOException { + final URL url = CanalJsonEnhancedSerDeSchemaTest.class.getClassLoader().getResource(resource); + assert url != null; + Path path = new File(url.getFile()).toPath(); + return Files.readAllLines(path); + } + + private static List readRowDatas(String resource) throws IOException, ClassNotFoundException { + final URL url = CanalJsonEnhancedSerDeSchemaTest.class.getClassLoader().getResource(resource); + assert url != null; + Path path = new File(url.getFile()).toPath(); + ObjectInputStream in = new ObjectInputStream(new FileInputStream(path.toString())); + return (List) in.readObject(); + } + + public void compareJson(String json1, String json2) throws JsonProcessingException { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode node1 = objectMapper.readTree(json1); + JsonNode node2 = objectMapper.readTree(json2); + assertEquals(node1, node2); + } + + private static class SimpleCollector implements Collector { + + private List list = new ArrayList<>(); + + @Override + public void collect(RowData record) { + list.add(record); + } + + @Override + public void close() { + // do nothing + } + + public List result() { + List newList = new ArrayList<>(); + list.forEach(row -> newList.add(row)); + list.clear(); + return newList; + } + } + +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonSerializationTest.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonSerializationTest.java new file mode 100644 index 00000000000..a70c7bba2ab --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/CanalJsonSerializationTest.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.protocol.ddl.enums.AlterType; +import org.apache.inlong.sort.protocol.ddl.enums.PositionType; +import org.apache.inlong.sort.protocol.ddl.expressions.AlterColumn; +import org.apache.inlong.sort.protocol.ddl.expressions.Column; +import org.apache.inlong.sort.protocol.ddl.expressions.Position; +import org.apache.inlong.sort.protocol.ddl.operations.AlterOperation; + +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +/** + * Test for {@link CanalJson}. + */ +public class CanalJsonSerializationTest { + + private static final Logger LOG = LoggerFactory.getLogger(CanalJsonSerializationTest.class); + + private final ObjectMapper objectMapper = new ObjectMapper(); + + @Test + public void testCanalJsonSerialization() { + + List alterColumns = new ArrayList<>(); + + Column column = new Column("columnDataType.getColumnName()", new ArrayList<>(), + 1, + new Position(PositionType.FIRST, null), true, "23", + "23"); + + alterColumns.add(new AlterColumn(AlterType.ADD_COLUMN, column, null)); + + AlterOperation alterOperation = new AlterOperation(alterColumns); + + CanalJson canalJson = CanalJson.builder() + .data(null) + .es(0) + .table("table") + .type("type") + .database("database") + .ts(0) + .sql("sql") + .mysqlType(null) + .sqlType(null) + .pkNames(null) + .schema("schema") + .oracleType(null) + .operation(alterOperation) + .incremental(false) + .build(); + + ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + try { + String writeValueAsString = OBJECT_MAPPER.writeValueAsString(canalJson); + LOG.info(writeValueAsString); + objectMapper.readValue(writeValueAsString, CanalJson.class); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/DebeziumJsonSerializationTest.java b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/DebeziumJsonSerializationTest.java new file mode 100644 index 00000000000..0ca38431e39 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/java/org/apache/inlong/sort/formats/json/canal/DebeziumJsonSerializationTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.formats.json.canal; + +import org.apache.inlong.sort.formats.json.debezium.DebeziumJson; +import org.apache.inlong.sort.protocol.ddl.enums.AlterType; +import org.apache.inlong.sort.protocol.ddl.enums.PositionType; +import org.apache.inlong.sort.protocol.ddl.expressions.AlterColumn; +import org.apache.inlong.sort.protocol.ddl.expressions.Column; +import org.apache.inlong.sort.protocol.ddl.expressions.Position; +import org.apache.inlong.sort.protocol.ddl.operations.AlterOperation; + +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +/** + * Test for {@link DebeziumJson}. + */ +public class DebeziumJsonSerializationTest { + + private static final Logger LOG = LoggerFactory.getLogger(CanalJsonSerializationTest.class); + + private final ObjectMapper objectMapper = new ObjectMapper(); + + @Test + public void testDebeziumJsonSerialization() { + + List alterColumns = new ArrayList<>(); + + Column column = new Column("columnDataType.getColumnName()", new ArrayList<>(), + 1, + new Position(PositionType.FIRST, null), true, "23", + "23"); + + alterColumns.add(new AlterColumn(AlterType.ADD_COLUMN, column, null)); + + AlterOperation alterOperation = new AlterOperation(alterColumns); + + DebeziumJson debeziumJson = DebeziumJson.builder().source(null) + .dataSourceName("dataSourceName") + .tableChange(null).incremental(false).build(); + + debeziumJson.setDdl(""); + debeziumJson.setOperation(alterOperation); + debeziumJson.setAfter(new HashMap<>()); + + ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + try { + String writeValueAsString = OBJECT_MAPPER.writeValueAsString(debeziumJson); + LOG.info(writeValueAsString); + objectMapper.readValue(writeValueAsString, DebeziumJson.class); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/resources/canal-json-inlong-data.txt b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/resources/canal-json-inlong-data.txt new file mode 100644 index 00000000000..82a3767be03 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/resources/canal-json-inlong-data.txt @@ -0,0 +1,3 @@ +{"data":[{"id":2,"name":"xixi"}],"type":"INSERT","pkNames":["id"],"database":"TEST","mysqlType":{"name":"VARCHAR(63)","id":"BIGINT(20)"},"opType":"INSERT","es":0,"batchId":0,"sqlType":{"name":12,"id":-5},"updateBefore":null,"ts":1651208731718,"isDdl":false,"table":"TEST"} +{"data":[{"id":1,"name":"oooooo"}],"type":"INSERT","pkNames":["id"],"database":"TEST","mysqlType":{"name":"VARCHAR(63)","id":"BIGINT(20)"},"opType":"INSERT","es":0,"batchId":1,"sqlType":{"name":12,"id":-5},"updateBefore":null,"ts":1651208731717,"isDdl":false,"table":"TEST"} +{"data":[{"id":3,"name":"HAHA"}],"type":"INSERT","pkNames":["id"],"database":"TEST","mysqlType":{"name":"VARCHAR(63)","id":"BIGINT(20)"},"opType":"INSERT","es":1651208797000,"batchId":2,"sqlType":{"name":12,"id":-5},"updateBefore":null,"ts":1651208799752,"isDdl":false,"table":"TEST"} \ No newline at end of file diff --git a/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/resources/log4j2-test.properties b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/resources/log4j2-test.properties new file mode 100644 index 00000000000..d493c984a32 --- /dev/null +++ b/inlong-sort/sort-formats/format-row/format-json-v1.18/src/test/resources/log4j2-test.properties @@ -0,0 +1,32 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +rootLogger=INFO, testlogger + +appender.out.type=Console +appender.out.name=testlogger +appender.out.layout.type=PatternLayout +appender.out.layout.pattern=%-4r [%t] %-5p %c %x - %m%n + +# Suppress the irrelevant (wrong) warnings from the Netty channel handler +logger.flinknetty=ERROR, testlogger +logger.flinknetty.name=org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline +logger.flinknetty.additivity=false +# Resource leak detector only works with logging enabled at error level +logger.leakdetector=ERROR, testlogger +logger.leakdetector.name=org.apache.flink.shaded.netty4.io.netty.util.ResourceLeakDetector +logger.leakdetector.additivity=false diff --git a/inlong-sort/sort-formats/format-row/pom.xml b/inlong-sort/sort-formats/format-row/pom.xml index cca64ed2ca6..22207262a05 100644 --- a/inlong-sort/sort-formats/format-row/pom.xml +++ b/inlong-sort/sort-formats/format-row/pom.xml @@ -66,5 +66,11 @@ format-json-v1.15 + + v1.18 + + format-json-v1.18 + + diff --git a/inlong-sort/sort-formats/pom.xml b/inlong-sort/sort-formats/pom.xml index 4f22664ae3f..a1e69fb0c7a 100644 --- a/inlong-sort/sort-formats/pom.xml +++ b/inlong-sort/sort-formats/pom.xml @@ -30,12 +30,6 @@ pom Apache InLong - Sort Formats - - format-common - format-row - format-rowdata - - ${project.parent.parent.basedir} 1C @@ -103,22 +97,6 @@ test - - - org.apache.flink - flink-table-common - ${flink.version} - provided - - - - org.apache.flink - flink-table-common - ${flink.version} - test-jar - test - - @@ -209,4 +187,87 @@ https://git.code.oa.com/flink/flink-formats + + + v1.13 + + true + + + format-common + format-row + format-rowdata + + + + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + + org.apache.flink + flink-table-common + ${flink.version} + test-jar + test + + + + + + v1.15 + + format-common + format-row + format-rowdata + + + + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + + org.apache.flink + flink-table-common + ${flink.version} + test-jar + test + + + + + + v1.18 + + format-common + format-row/format-json-v1.18 + + + + + org.apache.flink + flink-table-common + ${flink.version} + provided + + + org.apache.flink + flink-table-common + ${flink.version} + test-jar + test + + + + +