From 8f91b64edc7f71fa071ee9420a26e65485681f81 Mon Sep 17 00:00:00 2001 From: Ling Jin <7138436+3AceShowHand@users.noreply.github.com> Date: Sat, 3 Jun 2023 22:17:39 +0800 Subject: [PATCH 1/9] This is an automated cherry-pick of #9079 Signed-off-by: ti-chi-bot --- cdc/api/v2/api_helpers.go | 8 +- cdc/api/v2/api_helpers_test.go | 29 +- cdc/entry/mounter.go | 57 +++- cdc/entry/mounter_group.go | 22 +- cdc/entry/mounter_test.go | 221 ++++++++++++++ cdc/model/changefeed.go | 43 ++- cdc/model/changefeed_test.go | 106 ++++++- cdc/processor/processor.go | 6 + .../sinkmanager/table_sink_wrapper.go | 9 +- cdc/sink/codec/builder/codec_test.go | 41 +-- cdc/sink/codec/builder/encoder_builder.go | 4 +- cdc/sink/codec/canal/canal_encoder.go | 26 +- cdc/sink/codec/canal/canal_encoder_test.go | 8 +- cdc/sink/codec/canal/canal_entry.go | 11 +- cdc/sink/codec/canal/canal_entry_test.go | 6 +- .../codec/canal/canal_json_decoder_test.go | 7 + cdc/sink/codec/canal/canal_json_encoder.go | 96 +++++- .../codec/canal/canal_json_encoder_test.go | 119 ++++++++ cdc/sink/codec/canal/canal_test_util.go | 2 +- cdc/sink/codec/common/config_test.go | 6 + cdc/sink/codec/craft/craft_encoder.go | 24 +- cdc/sink/codec/craft/craft_encoder_test.go | 3 +- cdc/sink/codec/craft/model.go | 16 +- cdc/sink/codec/maxwell/maxwell_encoder.go | 25 +- .../codec/maxwell/maxwell_encoder_test.go | 7 +- cdc/sink/codec/maxwell/maxwell_message.go | 5 +- .../codec/maxwell/maxwell_message_test.go | 2 +- cdc/sink/codec/open/open_protocol_encoder.go | 30 +- .../codec/open/open_protocol_encoder_test.go | 4 +- cdc/sink/codec/open/open_protocol_message.go | 13 +- .../codec/open/open_protocol_message_test.go | 127 ++++++++ errors.toml | 5 + pkg/cmd/cli/cli_changefeed_create.go | 16 +- pkg/cmd/cli/cli_changefeed_create_test.go | 36 +++ pkg/config/replica_config.go | 61 +++- pkg/config/replica_config_test.go | 98 ++++++ pkg/config/sink.go | 94 +++++- pkg/config/sink_test.go | 10 +- pkg/errors/cdc_errors.go | 4 + pkg/orchestrator/reactor_state.go | 4 +- .../canal/canal_json_txn_event_encoder.go | 121 ++++++++ .../canal_json_txn_event_encoder_test.go | 124 ++++++++ pkg/sink/codec/common/config.go | 287 ++++++++++++++++++ pkg/version/creator_version_gate.go | 30 +- .../integration_tests/multi_changefeed/run.sh | 13 +- 45 files changed, 1859 insertions(+), 127 deletions(-) create mode 100644 pkg/sink/codec/canal/canal_json_txn_event_encoder.go create mode 100644 pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go create mode 100644 pkg/sink/codec/common/config.go diff --git a/cdc/api/v2/api_helpers.go b/cdc/api/v2/api_helpers.go index b025b779263..989fbe78665 100644 --- a/cdc/api/v2/api_helpers.go +++ b/cdc/api/v2/api_helpers.go @@ -189,12 +189,8 @@ func (APIV2HelpersImpl) verifyCreateChangefeedConfig( if err != nil { return nil, err } - if !replicaCfg.EnableOldValue { - sinkURIParsed, err := url.Parse(cfg.SinkURI) - if err != nil { - return nil, cerror.WrapError(cerror.ErrSinkURIInvalid, err) - } +<<<<<<< HEAD protocol := sinkURIParsed.Query().Get(config.ProtocolKey) if protocol != "" { replicaCfg.Sink.Protocol = protocol @@ -215,6 +211,8 @@ func (APIV2HelpersImpl) verifyCreateChangefeedConfig( "if use force replicate, old value feature must be enabled") } } +======= +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) f, err := filter.NewFilter(replicaCfg, "") if err != nil { return nil, errors.Cause(err) diff --git a/cdc/api/v2/api_helpers_test.go b/cdc/api/v2/api_helpers_test.go index 16889165dfc..1201e528e00 100644 --- a/cdc/api/v2/api_helpers_test.go +++ b/cdc/api/v2/api_helpers_test.go @@ -45,12 +45,14 @@ func TestVerifyCreateChangefeedConfig(t *testing.T) { cfg.ReplicaConfig = GetDefaultReplicaConfig() cfg.ReplicaConfig.ForceReplicate = true cfg.ReplicaConfig.EnableOldValue = false - // disable old value but force replicate + cfg.SinkURI = "mysql://" + // disable old value but force replicate, and using mysql sink. cfInfo, err = h.verifyCreateChangefeedConfig(ctx, cfg, pdClient, provider, "en", storage) require.NotNil(t, err) require.Nil(t, cfInfo) cfg.ReplicaConfig.ForceReplicate = false cfg.ReplicaConfig.IgnoreIneligibleTable = true + cfg.SinkURI = "blackhole://" cfInfo, err = h.verifyCreateChangefeedConfig(ctx, cfg, pdClient, provider, "en", storage) require.Nil(t, err) require.NotNil(t, cfInfo) @@ -88,6 +90,19 @@ func TestVerifyCreateChangefeedConfig(t *testing.T) { cfg.SinkURI = string([]byte{0x7f, ' '}) cfInfo, err = h.verifyCreateChangefeedConfig(ctx, cfg, pdClient, provider, "en", storage) require.NotNil(t, err) + + cfg.StartTs = 0 + // use blackhole to workaround + cfg.SinkURI = "blackhole://127.0.0.1:9092/test?protocol=avro" + cfg.ReplicaConfig.EnableOldValue = true + cfg.ReplicaConfig.ForceReplicate = false + cfInfo, err = h.verifyCreateChangefeedConfig(ctx, cfg, pdClient, provider, "en", storage) + require.NoError(t, err) + require.False(t, cfInfo.Config.EnableOldValue) + + cfg.ReplicaConfig.ForceReplicate = true + cfInfo, err = h.verifyCreateChangefeedConfig(ctx, cfg, pdClient, provider, "en", storage) + require.Error(t, cerror.ErrOldValueNotEnabled, err) } func TestVerifyUpdateChangefeedConfig(t *testing.T) { @@ -140,4 +155,16 @@ func TestVerifyUpdateChangefeedConfig(t *testing.T) { cfg.TargetTs = 9 newCfInfo, newUpInfo, err = h.verifyUpdateChangefeedConfig(ctx, cfg, oldInfo, oldUpInfo, storage, 0) require.NotNil(t, err) + + cfg.StartTs = 0 + cfg.TargetTs = 0 + cfg.ReplicaConfig.EnableOldValue = true + cfg.SinkURI = "blackhole://127.0.0.1:9092/test?protocol=avro" + newCfInfo, newUpInfo, err = h.verifyUpdateChangefeedConfig(ctx, cfg, oldInfo, oldUpInfo, storage, 0) + require.NoError(t, err) + require.False(t, newCfInfo.Config.EnableOldValue) + + cfg.ReplicaConfig.ForceReplicate = true + newCfInfo, newUpInfo, err = h.verifyUpdateChangefeedConfig(ctx, cfg, oldInfo, oldUpInfo, storage, 0) + require.Error(t, cerror.ErrOldValueNotEnabled, err) } diff --git a/cdc/entry/mounter.go b/cdc/entry/mounter.go index a01700b279b..6d8b1bcf92c 100644 --- a/cdc/entry/mounter.go +++ b/cdc/entry/mounter.go @@ -71,7 +71,6 @@ type Mounter interface { type mounter struct { schemaStorage SchemaStorage tz *time.Location - enableOldValue bool changefeedID model.ChangeFeedID filter pfilter.Filter metricTotalRows prometheus.Gauge @@ -83,13 +82,16 @@ func NewMounter(schemaStorage SchemaStorage, changefeedID model.ChangeFeedID, tz *time.Location, filter pfilter.Filter, +<<<<<<< HEAD enableOldValue bool, +======= + integrity *integrity.Config, +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) ) Mounter { return &mounter{ - schemaStorage: schemaStorage, - changefeedID: changefeedID, - enableOldValue: enableOldValue, - filter: filter, + schemaStorage: schemaStorage, + changefeedID: changefeedID, + filter: filter, metricTotalRows: totalRowsCountGauge. WithLabelValues(changefeedID.Namespace, changefeedID.ID), metricIgnoredDMLEventCounter: ignoredDMLEventCounter. @@ -271,8 +273,13 @@ func parseJob(v []byte, startTs, CRTs uint64) (*timodel.Job, error) { } func datum2Column( +<<<<<<< HEAD tableInfo *model.TableInfo, datums map[int64]types.Datum, fillWithDefaultValue bool, ) ([]*model.Column, []types.Datum, []rowcodec.ColInfo, error) { +======= + tableInfo *model.TableInfo, datums map[int64]types.Datum, +) ([]*model.Column, []types.Datum, []*timodel.ColumnInfo, []rowcodec.ColInfo, error) { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) cols := make([]*model.Column, len(tableInfo.RowColumnsOffset)) rawCols := make([]types.Datum, len(tableInfo.RowColumnsOffset)) @@ -288,6 +295,7 @@ func datum2Column( continue } colName := colInfo.Name.O +<<<<<<< HEAD colDatums, exist := datums[colInfo.ID] var colValue interface{} if !exist && !fillWithDefaultValue { @@ -298,9 +306,20 @@ func datum2Column( var err error var warn string var size int +======= + colID := colInfo.ID + colDatums, exist := datums[colID] + + var ( + colValue interface{} + size int + warn string + err error + ) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) if exist { colValue, size, warn, err = formatColVal(colDatums, colInfo) - } else if fillWithDefaultValue { + } else { colDatums, colValue, size, warn, err = getDefaultOrZeroValue(colInfo) } if err != nil { @@ -342,11 +361,16 @@ func (m *mounter) mountRowKVEntry(tableInfo *model.TableInfo, row *rowKVEntry, d if row.PreRowExist { // FIXME(leoppro): using pre table info to mounter pre column datum // the pre column and current column in one event may using different table info +<<<<<<< HEAD preCols, preRawCols, extendColumnInfos, err = datum2Column(tableInfo, row.PreRow, m.enableOldValue) +======= + preCols, preRawCols, columnInfos, extendColumnInfos, err = datum2Column(tableInfo, row.PreRow) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) if err != nil { return nil, rawRow, errors.Trace(err) } +<<<<<<< HEAD // NOTICE: When the old Value feature is off, // the Delete event only needs to keep the handle key column. if row.Delete && !m.enableOldValue { @@ -356,13 +380,34 @@ func (m *mounter) mountRowKVEntry(tableInfo *model.TableInfo, row *rowKVEntry, d preCols[i] = nil } } +======= + preChecksum, checksumVersion, matched, err = m.verifyChecksum(columnInfos, preRawCols, true) + if err != nil { + return nil, rawRow, errors.Trace(err) + } + + if !matched { + log.Error("previous columns checksum mismatch", + zap.Uint32("checksum", preChecksum), + zap.Any("tableInfo", tableInfo), + zap.Any("row", row)) + if m.integrity.ErrorHandle() { + return nil, rawRow, cerror.ErrCorruptedDataMutation. + GenWithStackByArgs(m.changefeedID.Namespace, m.changefeedID.ID, row) + } + corrupted = true +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } } var cols []*model.Column var rawCols []types.Datum if row.RowExist { +<<<<<<< HEAD cols, rawCols, extendColumnInfos, err = datum2Column(tableInfo, row.Row, true) +======= + cols, rawCols, columnInfos, extendColumnInfos, err = datum2Column(tableInfo, row.Row) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) if err != nil { return nil, rawRow, errors.Trace(err) } diff --git a/cdc/entry/mounter_group.go b/cdc/entry/mounter_group.go index 770267baa55..1f4619c6cf3 100644 --- a/cdc/entry/mounter_group.go +++ b/cdc/entry/mounter_group.go @@ -31,11 +31,19 @@ type MounterGroup interface { } type mounterGroup struct { +<<<<<<< HEAD schemaStorage SchemaStorage inputCh chan *model.PolymorphicEvent tz *time.Location filter filter.Filter enableOldValue bool +======= + schemaStorage SchemaStorage + inputCh chan *model.PolymorphicEvent + tz *time.Location + filter filter.Filter + integrity *integrity.Config +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) workerNum int @@ -52,7 +60,6 @@ const ( func NewMounterGroup( schemaStorage SchemaStorage, workerNum int, - enableOldValue bool, filter filter.Filter, tz *time.Location, changefeedID model.ChangeFeedID, @@ -61,11 +68,10 @@ func NewMounterGroup( workerNum = defaultMounterWorkerNum } return &mounterGroup{ - schemaStorage: schemaStorage, - inputCh: make(chan *model.PolymorphicEvent, defaultInputChanSize), - enableOldValue: enableOldValue, - filter: filter, - tz: tz, + schemaStorage: schemaStorage, + inputCh: make(chan *model.PolymorphicEvent, defaultInputChanSize), + filter: filter, + tz: tz, workerNum: workerNum, @@ -100,7 +106,11 @@ func (m *mounterGroup) Run(ctx context.Context) error { } func (m *mounterGroup) runWorker(ctx context.Context) error { +<<<<<<< HEAD mounter := NewMounter(m.schemaStorage, m.changefeedID, m.tz, m.filter, m.enableOldValue) +======= + mounter := NewMounter(m.schemaStorage, m.changefeedID, m.tz, m.filter, m.integrity) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) for { select { case <-ctx.Done(): diff --git a/cdc/entry/mounter_test.go b/cdc/entry/mounter_test.go index 4767eeb681d..c248fcde4aa 100644 --- a/cdc/entry/mounter_test.go +++ b/cdc/entry/mounter_test.go @@ -309,8 +309,12 @@ func testMounterDisableOldValue(t *testing.T, tc struct { filter, err := filter.NewFilter(config, "") require.Nil(t, err) mounter := NewMounter(scheamStorage, +<<<<<<< HEAD model.DefaultChangeFeedID("c1"), time.UTC, filter, false).(*mounter) +======= + model.DefaultChangeFeedID("c1"), time.UTC, filter, config.Integrity).(*mounter) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) mounter.tz = time.Local ctx := context.Background() @@ -985,6 +989,215 @@ func TestGetDefaultZeroValue(t *testing.T) { } } +<<<<<<< HEAD +======= +func TestDecodeRowEnableChecksum(t *testing.T) { + helper := NewSchemaTestHelper(t) + defer helper.Close() + + tk := helper.Tk() + + tk.MustExec("set global tidb_enable_row_level_checksum = 1") + helper.Tk().MustExec("use test") + + replicaConfig := config.GetDefaultReplicaConfig() + replicaConfig.Integrity.IntegrityCheckLevel = integrity.CheckLevelCorrectness + filter, err := filter.NewFilter(replicaConfig, "") + require.NoError(t, err) + + ver, err := helper.Storage().CurrentVersion(oracle.GlobalTxnScope) + require.NoError(t, err) + + changefeed := model.DefaultChangeFeedID("changefeed-test-decode-row") + schemaStorage, err := NewSchemaStorage(helper.GetCurrentMeta(), + ver.Ver, false, changefeed, util.RoleTester, filter) + require.NoError(t, err) + require.NotNil(t, schemaStorage) + + createTableDDL := "create table t (id int primary key, a int)" + job := helper.DDL2Job(createTableDDL) + err = schemaStorage.HandleDDLJob(job) + require.NoError(t, err) + + ts := schemaStorage.GetLastSnapshot().CurrentTs() + schemaStorage.AdvanceResolvedTs(ver.Ver) + + mounter := NewMounter(schemaStorage, changefeed, time.Local, filter, replicaConfig.Integrity).(*mounter) + + ctx := context.Background() + + tableInfo, ok := schemaStorage.GetLastSnapshot().TableByName("test", "t") + require.True(t, ok) + + // row without checksum + tk.Session().GetSessionVars().EnableRowLevelChecksum = false + tk.MustExec("insert into t values (1, 10)") + + key, value := getLastKeyValueInStore(t, helper.Storage(), tableInfo.ID) + rawKV := &model.RawKVEntry{ + OpType: model.OpTypePut, + Key: key, + Value: value, + StartTs: ts - 1, + CRTs: ts + 1, + } + + row, err := mounter.unmarshalAndMountRowChanged(ctx, rawKV) + require.NoError(t, err) + require.NotNil(t, row) + // the upstream tidb does not enable checksum, so the checksum is nil + require.Nil(t, row.Checksum) + + // row with one checksum + tk.Session().GetSessionVars().EnableRowLevelChecksum = true + tk.MustExec("insert into t values (2, 20)") + + key, value = getLastKeyValueInStore(t, helper.Storage(), tableInfo.ID) + rawKV = &model.RawKVEntry{ + OpType: model.OpTypePut, + Key: key, + Value: value, + StartTs: ts - 1, + CRTs: ts + 1, + } + row, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) + require.NoError(t, err) + require.NotNil(t, row) + require.NotNil(t, row.Checksum) + + expected, ok := mounter.decoder.GetChecksum() + require.True(t, ok) + require.Equal(t, expected, row.Checksum.Current) + require.False(t, row.Checksum.Corrupted) + + // row with 2 checksum + tk.MustExec("insert into t values (3, 30)") + job = helper.DDL2Job("alter table t change column a a varchar(10)") + err = schemaStorage.HandleDDLJob(job) + require.NoError(t, err) + + key, value = getLastKeyValueInStore(t, helper.Storage(), tableInfo.ID) + rawKV = &model.RawKVEntry{ + OpType: model.OpTypePut, + Key: key, + Value: value, + StartTs: ts - 1, + CRTs: ts + 1, + } + row, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) + require.NoError(t, err) + require.NotNil(t, row) + require.NotNil(t, row.Checksum) + + first, ok := mounter.decoder.GetChecksum() + require.True(t, ok) + + extra, ok := mounter.decoder.GetExtraChecksum() + require.True(t, ok) + + if row.Checksum.Current != first { + require.Equal(t, extra, row.Checksum.Current) + } else { + require.Equal(t, first, row.Checksum.Current) + } + require.False(t, row.Checksum.Corrupted) + + // hack the table info to make the checksum corrupted + tableInfo.Columns[0].FieldType = *types.NewFieldType(mysql.TypeVarchar) + + // corrupt-handle-level default to warn, so no error, but the checksum is corrupted + row, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) + require.NoError(t, err) + require.NotNil(t, row.Checksum) + require.True(t, row.Checksum.Corrupted) + + mounter.integrity.CorruptionHandleLevel = integrity.CorruptionHandleLevelError + _, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) + require.Error(t, err) + require.ErrorIs(t, err, cerror.ErrCorruptedDataMutation) + + job = helper.DDL2Job("drop table t") + err = schemaStorage.HandleDDLJob(job) + require.NoError(t, err) +} + +func TestDecodeRow(t *testing.T) { + helper := NewSchemaTestHelper(t) + defer helper.Close() + + helper.Tk().MustExec("set @@tidb_enable_clustered_index=1;") + helper.Tk().MustExec("use test;") + + changefeed := model.DefaultChangeFeedID("changefeed-test-decode-row") + + ver, err := helper.Storage().CurrentVersion(oracle.GlobalTxnScope) + require.NoError(t, err) + + cfg := config.GetDefaultReplicaConfig() + + filter, err := filter.NewFilter(cfg, "") + require.NoError(t, err) + + schemaStorage, err := NewSchemaStorage(helper.GetCurrentMeta(), + ver.Ver, false, changefeed, util.RoleTester, filter) + require.NoError(t, err) + + // apply ddl to schemaStorage + ddl := "create table test.student(id int primary key, name char(50), age int, gender char(10))" + job := helper.DDL2Job(ddl) + err = schemaStorage.HandleDDLJob(job) + require.NoError(t, err) + + ts := schemaStorage.GetLastSnapshot().CurrentTs() + + schemaStorage.AdvanceResolvedTs(ver.Ver) + + mounter := NewMounter(schemaStorage, changefeed, time.Local, filter, cfg.Integrity).(*mounter) + + helper.Tk().MustExec(`insert into student values(1, "dongmen", 20, "male")`) + helper.Tk().MustExec(`update student set age = 27 where id = 1`) + + ctx := context.Background() + decodeAndCheckRowInTable := func(tableID int64, f func(key []byte, value []byte) *model.RawKVEntry) { + walkTableSpanInStore(t, helper.Storage(), tableID, func(key []byte, value []byte) { + rawKV := f(key, value) + + row, err := mounter.unmarshalAndMountRowChanged(ctx, rawKV) + require.NoError(t, err) + require.NotNil(t, row) + + if row.Columns != nil { + require.NotNil(t, mounter.decoder) + } + + if row.PreColumns != nil { + require.NotNil(t, mounter.preDecoder) + } + }) + } + + toRawKV := func(key []byte, value []byte) *model.RawKVEntry { + return &model.RawKVEntry{ + OpType: model.OpTypePut, + Key: key, + Value: value, + StartTs: ts - 1, + CRTs: ts + 1, + } + } + + tableInfo, ok := schemaStorage.GetLastSnapshot().TableByName("test", "student") + require.True(t, ok) + + decodeAndCheckRowInTable(tableInfo.ID, toRawKV) + decodeAndCheckRowInTable(tableInfo.ID, toRawKV) + + job = helper.DDL2Job("drop table student") + err = schemaStorage.HandleDDLJob(job) + require.NoError(t, err) +} + +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) // TestDecodeEventIgnoreRow tests a PolymorphicEvent.Row is nil // if this event should be filter out by filter. func TestDecodeEventIgnoreRow(t *testing.T) { @@ -1019,7 +1232,11 @@ func TestDecodeEventIgnoreRow(t *testing.T) { ts := schemaStorage.GetLastSnapshot().CurrentTs() schemaStorage.AdvanceResolvedTs(ver.Ver) +<<<<<<< HEAD mounter := NewMounter(schemaStorage, cfID, time.Local, f, true).(*mounter) +======= + mounter := NewMounter(schemaStorage, cfID, time.Local, f, cfg.Integrity).(*mounter) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) type testCase struct { schema string @@ -1196,7 +1413,11 @@ func TestBuildTableInfo(t *testing.T) { originTI, err := ddl.BuildTableInfoFromAST(stmt.(*ast.CreateTableStmt)) require.NoError(t, err) cdcTableInfo := model.WrapTableInfo(0, "test", 0, originTI) +<<<<<<< HEAD cols, _, _, err := datum2Column(cdcTableInfo, map[int64]types.Datum{}, true) +======= + cols, _, _, _, err := datum2Column(cdcTableInfo, map[int64]types.Datum{}) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) require.NoError(t, err) recoveredTI := model.BuildTiDBTableInfo(cols, cdcTableInfo.IndexColumnsOffset) handle := sqlmodel.GetWhereHandle(recoveredTI, recoveredTI) diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index a3e1b7a04ff..4ca50664f40 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -18,6 +18,7 @@ import ( "math" "net/url" "regexp" + "strings" "time" "github.com/pingcap/errors" @@ -261,7 +262,7 @@ func (info *ChangeFeedInfo) Clone() (*ChangeFeedInfo, error) { // VerifyAndComplete verifies changefeed info and may fill in some fields. // If a required field is not provided, return an error. // If some necessary filed is missing but can use a default value, fill in it. -func (info *ChangeFeedInfo) VerifyAndComplete() error { +func (info *ChangeFeedInfo) VerifyAndComplete() { defaultConfig := config.GetDefaultReplicaConfig() if info.Engine == "" { info.Engine = SortUnified @@ -278,8 +279,20 @@ func (info *ChangeFeedInfo) VerifyAndComplete() error { if info.Config.Consistent == nil { info.Config.Consistent = defaultConfig.Consistent } +<<<<<<< HEAD return nil +======= + if info.Config.Scheduler == nil { + info.Config.Scheduler = defaultConfig.Scheduler + } + + if info.Config.Integrity == nil { + info.Config.Integrity = defaultConfig.Integrity + } + + info.RmUnusedFields() +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } // FixIncompatible fixes incompatible changefeed meta info. @@ -308,6 +321,22 @@ func (info *ChangeFeedInfo) FixIncompatible() { info.fixMemoryQuota() log.Info("Fix incompatible memory quota completed", zap.String("changefeed", info.String())) } +<<<<<<< HEAD +======= + + log.Info("Start fixing incompatible scheduler", zap.String("changefeed", info.String())) + inheritV66 := creatorVersionGate.ChangefeedInheritSchedulerConfigFromV66() + info.fixScheduler(inheritV66) + log.Info("Fix incompatible scheduler completed", zap.String("changefeed", info.String())) + + if creatorVersionGate.ChangefeedAdjustEnableOldValueByProtocol() { + log.Info("Start fixing incompatible enable old value", zap.String("changefeed", info.String()), + zap.Bool("enableOldValue", info.Config.EnableOldValue)) + info.fixEnableOldValue() + log.Info("Fix incompatible enable old value completed", zap.String("changefeed", info.String()), + zap.Bool("enableOldValue", info.Config.EnableOldValue)) + } +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } // fixState attempts to fix state loss from upgrading the old owner to the new owner. @@ -378,6 +407,18 @@ func (info *ChangeFeedInfo) fixMySQLSinkProtocol() { } } +func (info *ChangeFeedInfo) fixEnableOldValue() { + uri, err := url.Parse(info.SinkURI) + if err != nil { + // this is impossible to happen, since the changefeed registered successfully. + log.Warn("parse sink URI failed", zap.Error(err)) + return + } + scheme := strings.ToLower(uri.Scheme) + protocol := uri.Query().Get(config.ProtocolKey) + info.Config.AdjustEnableOldValue(scheme, protocol) +} + func (info *ChangeFeedInfo) fixMQSinkProtocol() { uri, err := url.Parse(info.SinkURI) if err != nil { diff --git a/cdc/model/changefeed_test.go b/cdc/model/changefeed_test.go index 2f276a35a03..dcc2e4d2c4c 100644 --- a/cdc/model/changefeed_test.go +++ b/cdc/model/changefeed_test.go @@ -27,6 +27,109 @@ import ( "github.com/tikv/client-go/v2/oracle" ) +<<<<<<< HEAD +======= +func TestRmUnusedField(t *testing.T) { + t.Parallel() + const ( + defaultRegistry string = "default-schema-registry" + defaultProtocol string = "default-protocol" + ) + + // 1. mysql downstream + { + mysqlCf := &ChangeFeedInfo{ + SinkURI: "mysql://", + Config: &config.ReplicaConfig{ + Sink: &config.SinkConfig{ + SchemaRegistry: util.AddressOf(defaultRegistry), + Protocol: util.AddressOf(defaultProtocol), + CSVConfig: &config.CSVConfig{ + Quote: string(config.DoubleQuoteChar), + Delimiter: config.Comma, + NullString: config.NULL, + }, + }, + }, + } + + mysqlCf.VerifyAndComplete() + require.True(t, mysqlCf.Config.Sink.SchemaRegistry == nil) + require.True(t, mysqlCf.Config.Sink.Protocol == nil) + require.Nil(t, mysqlCf.Config.Sink.CSVConfig) + } + + // 2. storage downstream + { + strCf := &ChangeFeedInfo{ + SinkURI: "s3://", + Config: &config.ReplicaConfig{ + Sink: &config.SinkConfig{ + SchemaRegistry: util.AddressOf(defaultRegistry), + Protocol: util.AddressOf(defaultProtocol), + CSVConfig: &config.CSVConfig{ + Quote: string(config.DoubleQuoteChar), + Delimiter: config.Comma, + NullString: config.NULL, + }, + }, + }, + } + strCf.VerifyAndComplete() + require.True(t, strCf.Config.Sink.SchemaRegistry == nil) + require.NotNil(t, strCf.Config.Sink.CSVConfig) + } + + // 3. kafka downstream using avro + { + kaCf := &ChangeFeedInfo{ + SinkURI: "kafka://", + Config: &config.ReplicaConfig{ + Sink: &config.SinkConfig{ + Protocol: util.AddressOf(config.ProtocolAvro.String()), + SchemaRegistry: util.AddressOf(defaultRegistry), + CSVConfig: &config.CSVConfig{ + Quote: string(config.DoubleQuoteChar), + Delimiter: config.Comma, + NullString: config.NULL, + }, + }, + }, + } + kaCf.VerifyAndComplete() + require.Equal(t, defaultRegistry, util.GetOrZero(kaCf.Config.Sink.SchemaRegistry)) + require.Equal(t, config.ProtocolAvro.String(), util.GetOrZero(kaCf.Config.Sink.Protocol)) + require.Nil(t, kaCf.Config.Sink.CSVConfig) + } + + // 4. kafka downstream using canal-json + { + kcCf := &ChangeFeedInfo{ + SinkURI: "kafka://", + Config: &config.ReplicaConfig{ + Sink: &config.SinkConfig{ + Protocol: util.AddressOf(config.ProtocolCanal.String()), + SchemaRegistry: util.AddressOf(defaultRegistry), + CSVConfig: &config.CSVConfig{ + Quote: string(config.DoubleQuoteChar), + Delimiter: config.Comma, + NullString: config.NULL, + }, + }, + }, + } + kcCf.VerifyAndComplete() + require.True(t, kcCf.Config.Sink.SchemaRegistry == nil) + require.Equal( + t, + config.ProtocolCanal.String(), + util.GetOrZero(kcCf.Config.Sink.Protocol), + ) + require.Nil(t, kcCf.Config.Sink.CSVConfig) + } +} + +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) func TestFillV1(t *testing.T) { t.Parallel() @@ -154,8 +257,7 @@ func TestVerifyAndComplete(t *testing.T) { }, } - err := info.VerifyAndComplete() - require.Nil(t, err) + info.VerifyAndComplete() require.Equal(t, SortUnified, info.Engine) marshalConfig1, err := info.Config.Marshal() diff --git a/cdc/processor/processor.go b/cdc/processor/processor.go index ca961c85f51..f0e448ab898 100644 --- a/cdc/processor/processor.go +++ b/cdc/processor/processor.go @@ -789,8 +789,14 @@ func (p *processor) lazyInitImpl(ctx cdcContext.Context) error { p.mg = entry.NewMounterGroup(p.schemaStorage, p.changefeed.Info.Config.Mounter.WorkerNum, +<<<<<<< HEAD p.changefeed.Info.Config.EnableOldValue, p.filter, tz, p.changefeedID) +======= + p.filter, tz, p.changefeedID, p.changefeed.Info.Config.Integrity) + p.mg.name = "MounterGroup" + p.mg.spawn(stdCtx) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) p.wg.Add(1) go func() { diff --git a/cdc/processor/sinkmanager/table_sink_wrapper.go b/cdc/processor/sinkmanager/table_sink_wrapper.go index 1ea4e720510..3472d617755 100644 --- a/cdc/processor/sinkmanager/table_sink_wrapper.go +++ b/cdc/processor/sinkmanager/table_sink_wrapper.go @@ -443,7 +443,7 @@ func convertRowChangedEvents( // This indicates that it is an update event, // and after enable old value internally by default(but disable in the configuration). // We need to handle the update event to be compatible with the old format. - if !enableOldValue && colLen != 0 && preColLen != 0 && colLen == preColLen { + if e.Row.IsUpdate() && !enableOldValue { if shouldSplitUpdateEvent(e) { deleteEvent, insertEvent, err := splitUpdateEvent(e) if err != nil { @@ -508,13 +508,6 @@ func splitUpdateEvent( deleteEvent.RawKV = &deleteEventRowKV deleteEvent.Row.Columns = nil - for i := range deleteEvent.Row.PreColumns { - // NOTICE: Only the handle key pre column is retained in the delete event. - if deleteEvent.Row.PreColumns[i] != nil && - !deleteEvent.Row.PreColumns[i].Flag.IsHandleKey() { - deleteEvent.Row.PreColumns[i] = nil - } - } insertEvent := *updateEvent insertEventRow := *updateEvent.Row diff --git a/cdc/sink/codec/builder/codec_test.go b/cdc/sink/codec/builder/codec_test.go index 1e43f2edc92..c01fee4195e 100644 --- a/cdc/sink/codec/builder/codec_test.go +++ b/cdc/sink/codec/builder/codec_test.go @@ -69,14 +69,14 @@ func TestJsonVsCraftVsPB(t *testing.T) { if len(cs) == 0 { continue } - craftEncoder := craft.NewBatchEncoder() - craftEncoder.(*craft.BatchEncoder).MaxMessageBytes = 8192 - craftEncoder.(*craft.BatchEncoder).MaxBatchSize = 64 + config := &common.Config{ + MaxMessageBytes: 8192, + MaxBatchSize: 64, + } + craftEncoder := craft.NewBatchEncoder(config) craftMessages := encodeRowCase(t, craftEncoder, cs) - jsonEncoder := open.NewBatchEncoder() - jsonEncoder.(*open.BatchEncoder).MaxMessageBytes = 8192 - jsonEncoder.(*open.BatchEncoder).MaxBatchSize = 64 + jsonEncoder := open.NewBatchEncoder(config) jsonMessages := encodeRowCase(t, jsonEncoder, cs) protobuf1Messages := codecEncodeRowChangedPB1ToMessage(cs) @@ -220,16 +220,17 @@ func codecEncodeRowCase(encoder codec.EventBatchEncoder, events []*model.RowChan func init() { var err error - encoder := craft.NewBatchEncoder() - encoder.(*craft.BatchEncoder).MaxMessageBytes = 8192 - encoder.(*craft.BatchEncoder).MaxBatchSize = 64 + + config := &common.Config{ + MaxMessageBytes: 8192, + MaxBatchSize: 64, + } + encoder := craft.NewBatchEncoder(config) if codecCraftEncodedRowChanges, err = codecEncodeRowCase(encoder, codecBenchmarkRowChanges); err != nil { panic(err) } - encoder = open.NewBatchEncoder() - encoder.(*open.BatchEncoder).MaxMessageBytes = 8192 - encoder.(*open.BatchEncoder).MaxBatchSize = 64 + encoder = open.NewBatchEncoder(config) if codecJSONEncodedRowChanges, err = codecEncodeRowCase(encoder, codecBenchmarkRowChanges); err != nil { panic(err) } @@ -238,19 +239,23 @@ func init() { } func BenchmarkCraftEncoding(b *testing.B) { + config := &common.Config{ + MaxMessageBytes: 8192, + MaxBatchSize: 64, + } allocator := craft.NewSliceAllocator(128) - encoder := craft.NewBatchEncoderWithAllocator(allocator) - encoder.(*craft.BatchEncoder).MaxMessageBytes = 8192 - encoder.(*craft.BatchEncoder).MaxBatchSize = 64 + encoder := craft.NewBatchEncoderWithAllocator(allocator, config) for i := 0; i < b.N; i++ { _, _ = codecEncodeRowCase(encoder, codecBenchmarkRowChanges) } } func BenchmarkJsonEncoding(b *testing.B) { - encoder := open.NewBatchEncoder() - encoder.(*open.BatchEncoder).MaxMessageBytes = 8192 - encoder.(*open.BatchEncoder).MaxBatchSize = 64 + config := &common.Config{ + MaxMessageBytes: 8192, + MaxBatchSize: 64, + } + encoder := open.NewBatchEncoder(config) for i := 0; i < b.N; i++ { _, _ = codecEncodeRowCase(encoder, codecBenchmarkRowChanges) } diff --git a/cdc/sink/codec/builder/encoder_builder.go b/cdc/sink/codec/builder/encoder_builder.go index 4b398ba4c2c..43f055d2593 100644 --- a/cdc/sink/codec/builder/encoder_builder.go +++ b/cdc/sink/codec/builder/encoder_builder.go @@ -34,11 +34,11 @@ func NewEventBatchEncoderBuilder(ctx context.Context, c *common.Config) (codec.E case config.ProtocolDefault, config.ProtocolOpen: return open.NewBatchEncoderBuilder(c), nil case config.ProtocolCanal: - return canal.NewBatchEncoderBuilder(), nil + return canal.NewBatchEncoderBuilder(c), nil case config.ProtocolAvro: return avro.NewBatchEncoderBuilder(ctx, c) case config.ProtocolMaxwell: - return maxwell.NewBatchEncoderBuilder(), nil + return maxwell.NewBatchEncoderBuilder(c), nil case config.ProtocolCanalJSON: return canal.NewJSONBatchEncoderBuilder(c), nil case config.ProtocolCraft: diff --git a/cdc/sink/codec/canal/canal_encoder.go b/cdc/sink/codec/canal/canal_encoder.go index 4bff7343cda..32105b61bd4 100644 --- a/cdc/sink/codec/canal/canal_encoder.go +++ b/cdc/sink/codec/canal/canal_encoder.go @@ -34,6 +34,8 @@ type BatchEncoder struct { callbackBuf []func() packet *canal.Packet entryBuilder *canalEntryBuilder + + config *common.Config } // EncodeCheckpointEvent implements the EventBatchEncoder interface @@ -50,7 +52,7 @@ func (d *BatchEncoder) AppendRowChangedEvent( e *model.RowChangedEvent, callback func(), ) error { - entry, err := d.entryBuilder.fromRowEvent(e) + entry, err := d.entryBuilder.fromRowEvent(e, d.config.OnlyHandleKeyColumns) if err != nil { return errors.Trace(err) } @@ -156,20 +158,29 @@ func (d *BatchEncoder) resetPacket() { } // newBatchEncoder creates a new canalBatchEncoder. +<<<<<<< HEAD:cdc/sink/codec/canal/canal_encoder.go func newBatchEncoder() codec.EventBatchEncoder { +======= +func newBatchEncoder(config *common.Config) codec.RowEventEncoder { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_encoder.go encoder := &BatchEncoder{ messages: &canal.Messages{}, callbackBuf: make([]func(), 0), entryBuilder: newCanalEntryBuilder(), + + config: config, } encoder.resetPacket() return encoder } -type batchEncoderBuilder struct{} +type batchEncoderBuilder struct { + config *common.Config +} // Build a `canalBatchEncoder` +<<<<<<< HEAD:cdc/sink/codec/canal/canal_encoder.go func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { return newBatchEncoder() } @@ -177,4 +188,15 @@ func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { // NewBatchEncoderBuilder creates a canal batchEncoderBuilder. func NewBatchEncoderBuilder() codec.EncoderBuilder { return &batchEncoderBuilder{} +======= +func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { + return newBatchEncoder(b.config) +} + +// NewBatchEncoderBuilder creates a canal batchEncoderBuilder. +func NewBatchEncoderBuilder(config *common.Config) codec.RowEventEncoderBuilder { + return &batchEncoderBuilder{ + config: config, + } +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_encoder.go } diff --git a/cdc/sink/codec/canal/canal_encoder_test.go b/cdc/sink/codec/canal/canal_encoder_test.go index 481c781993f..2fe8bc06372 100644 --- a/cdc/sink/codec/canal/canal_encoder_test.go +++ b/cdc/sink/codec/canal/canal_encoder_test.go @@ -20,6 +20,8 @@ import ( "github.com/golang/protobuf/proto" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/sink/codec/common" canal "github.com/pingcap/tiflow/proto/canal" "github.com/stretchr/testify/require" ) @@ -28,7 +30,7 @@ func TestCanalBatchEncoder(t *testing.T) { t.Parallel() s := defaultCanalBatchTester for _, cs := range s.rowCases { - encoder := newBatchEncoder() + encoder := newBatchEncoder(common.NewConfig(config.ProtocolCanal)) for _, row := range cs { err := encoder.AppendRowChangedEvent(context.Background(), "", row, nil) require.Nil(t, err) @@ -55,7 +57,7 @@ func TestCanalBatchEncoder(t *testing.T) { } for _, cs := range s.ddlCases { - encoder := newBatchEncoder() + encoder := newBatchEncoder(common.NewConfig(config.ProtocolCanal)) for _, ddl := range cs { msg, err := encoder.EncodeDDLEvent(ddl) require.Nil(t, err) @@ -76,7 +78,7 @@ func TestCanalBatchEncoder(t *testing.T) { } func TestCanalAppendRowChangedEventWithCallback(t *testing.T) { - encoder := newBatchEncoder() + encoder := newBatchEncoder(common.NewConfig(config.ProtocolCanal)) require.NotNil(t, encoder) count := 0 diff --git a/cdc/sink/codec/canal/canal_entry.go b/cdc/sink/codec/canal/canal_entry.go index 4c47f66e438..e99762f2495 100644 --- a/cdc/sink/codec/canal/canal_entry.go +++ b/cdc/sink/codec/canal/canal_entry.go @@ -144,7 +144,7 @@ func (b *canalEntryBuilder) buildColumn(c *model.Column, colName string, updated } // build the RowData of a canal entry -func (b *canalEntryBuilder) buildRowData(e *model.RowChangedEvent) (*canal.RowData, error) { +func (b *canalEntryBuilder) buildRowData(e *model.RowChangedEvent, onlyHandleKeyColumns bool) (*canal.RowData, error) { var columns []*canal.Column for _, column := range e.Columns { if column == nil { @@ -156,11 +156,16 @@ func (b *canalEntryBuilder) buildRowData(e *model.RowChangedEvent) (*canal.RowDa } columns = append(columns, c) } + + onlyHandleKeyColumns = onlyHandleKeyColumns && e.IsDelete() var preColumns []*canal.Column for _, column := range e.PreColumns { if column == nil { continue } + if onlyHandleKeyColumns && !column.Flag.IsHandleKey() { + continue + } c, err := b.buildColumn(column, column.Name, !e.IsDelete()) if err != nil { return nil, errors.Trace(err) @@ -175,11 +180,11 @@ func (b *canalEntryBuilder) buildRowData(e *model.RowChangedEvent) (*canal.RowDa } // fromRowEvent builds canal entry from cdc RowChangedEvent -func (b *canalEntryBuilder) fromRowEvent(e *model.RowChangedEvent) (*canal.Entry, error) { +func (b *canalEntryBuilder) fromRowEvent(e *model.RowChangedEvent, onlyHandleKeyColumns bool) (*canal.Entry, error) { eventType := convertRowEventType(e) header := b.buildHeader(e.CommitTs, e.Table.Schema, e.Table.Table, eventType, 1) isDdl := isCanalDDL(eventType) // false - rowData, err := b.buildRowData(e) + rowData, err := b.buildRowData(e, onlyHandleKeyColumns) if err != nil { return nil, errors.Trace(err) } diff --git a/cdc/sink/codec/canal/canal_entry_test.go b/cdc/sink/codec/canal/canal_entry_test.go index 77447bfda47..14ec07087cd 100644 --- a/cdc/sink/codec/canal/canal_entry_test.go +++ b/cdc/sink/codec/canal/canal_entry_test.go @@ -70,7 +70,7 @@ func testInsert(t *testing.T) { } builder := newCanalEntryBuilder() - entry, err := builder.fromRowEvent(testCaseInsert) + entry, err := builder.fromRowEvent(testCaseInsert, false) require.Nil(t, err) require.Equal(t, canal.EntryType_ROWDATA, entry.GetEntryType()) header := entry.GetHeader() @@ -146,7 +146,7 @@ func testUpdate(t *testing.T) { }, } builder := newCanalEntryBuilder() - entry, err := builder.fromRowEvent(testCaseUpdate) + entry, err := builder.fromRowEvent(testCaseUpdate, false) require.Nil(t, err) require.Equal(t, canal.EntryType_ROWDATA, entry.GetEntryType()) @@ -219,7 +219,7 @@ func testDelete(t *testing.T) { } builder := newCanalEntryBuilder() - entry, err := builder.fromRowEvent(testCaseDelete) + entry, err := builder.fromRowEvent(testCaseDelete, false) require.Nil(t, err) require.Equal(t, canal.EntryType_ROWDATA, entry.GetEntryType()) header := entry.GetHeader() diff --git a/cdc/sink/codec/canal/canal_json_decoder_test.go b/cdc/sink/codec/canal/canal_json_decoder_test.go index edcbc8c093b..9eaec071700 100644 --- a/cdc/sink/codec/canal/canal_json_decoder_test.go +++ b/cdc/sink/codec/canal/canal_json_decoder_test.go @@ -84,7 +84,14 @@ func TestNewCanalJSONBatchDecoder4RowMessage(t *testing.T) { func TestNewCanalJSONBatchDecoder4DDLMessage(t *testing.T) { t.Parallel() for _, encodeEnable := range []bool{false, true} { +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_decoder_test.go encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: encodeEnable} +======= + encoder := &JSONRowEventEncoder{ + builder: newCanalEntryBuilder(), + config: &common.Config{EnableTiDBExtension: encodeEnable}, + } +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_decoder_test.go require.NotNil(t, encoder) result, err := encoder.EncodeDDLEvent(testCaseDDL) diff --git a/cdc/sink/codec/canal/canal_json_encoder.go b/cdc/sink/codec/canal/canal_json_encoder.go index 3e2fa5a540c..928202e6d35 100644 --- a/cdc/sink/codec/canal/canal_json_encoder.go +++ b/cdc/sink/codec/canal/canal_json_encoder.go @@ -29,6 +29,7 @@ import ( "go.uber.org/zap" ) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go // JSONBatchEncoder encodes Canal json messages in JSON format type JSONBatchEncoder struct { builder *canalEntryBuilder @@ -59,6 +60,21 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt mysqlTypeMap := make(map[string]string, len(e.Columns)) filling := func(columns []*model.Column, out *jwriter.Writer) error { +======= +func newJSONMessageForDML( + builder *canalEntryBuilder, + e *model.RowChangedEvent, + config *common.Config, +) ([]byte, error) { + isDelete := e.IsDelete() + mysqlTypeMap := make(map[string]string, len(e.Columns)) + + filling := func(columns []*model.Column, out *jwriter.Writer, + onlyOutputUpdatedColumn bool, + onlyHandleKeyColumns bool, + newColumnMap map[string]*model.Column, + ) error { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go if len(columns) == 0 { out.RawString("null") return nil @@ -68,6 +84,16 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt isFirst := true for _, col := range columns { if col != nil { +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go +======= + // column equal, do not output it + if onlyOutputUpdatedColumn && shouldIgnoreColumn(col, newColumnMap) { + continue + } + if onlyHandleKeyColumns && !col.Flag.IsHandleKey() { + continue + } +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go if isFirst { isFirst = false } else { @@ -165,6 +191,9 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt emptyColumn := true for _, col := range columns { if col != nil { + if isDelete && config.OnlyHandleKeyColumns && !col.Flag.IsHandleKey() { + continue + } if emptyColumn { out.RawByte('{') emptyColumn = false @@ -213,12 +242,17 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt if e.IsDelete() { out.RawString(",\"old\":null") out.RawString(",\"data\":") +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go if err := filling(e.PreColumns, out); err != nil { +======= + if err := filling(e.PreColumns, out, false, config.OnlyHandleKeyColumns, nil); err != nil { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go return nil, err } } else if e.IsInsert() { out.RawString(",\"old\":null") out.RawString(",\"data\":") +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go if err := filling(e.Columns, out); err != nil { return nil, err } @@ -229,13 +263,36 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt } out.RawString(",\"data\":") if err := filling(e.Columns, out); err != nil { +======= + if err := filling(e.Columns, out, false, false, nil); err != nil { + return nil, err + } + } else if e.IsUpdate() { + var newColsMap map[string]*model.Column + if config.OnlyOutputUpdatedColumns { + newColsMap = make(map[string]*model.Column, len(e.Columns)) + for _, col := range e.Columns { + newColsMap[col.Name] = col + } + } + out.RawString(",\"old\":") + if err := filling(e.PreColumns, out, config.OnlyOutputUpdatedColumns, false, newColsMap); err != nil { + return nil, err + } + out.RawString(",\"data\":") + if err := filling(e.Columns, out, false, false, nil); err != nil { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go return nil, err } } else { log.Panic("unreachable event type", zap.Any("event", e)) } +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go if c.enableTiDBExtension { +======= + if config.EnableTiDBExtension { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go const prefix string = ",\"_tidb\":" out.RawString(prefix) out.RawByte('{') @@ -258,7 +315,30 @@ func eventTypeString(e *model.RowChangedEvent) string { return "UPDATE" } +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go func (c *JSONBatchEncoder) newJSONMessageForDDL(e *model.DDLEvent) canalJSONMessageInterface { +======= +// JSONRowEventEncoder encodes row event in JSON format +type JSONRowEventEncoder struct { + builder *canalEntryBuilder + messages []*common.Message + + config *common.Config +} + +// newJSONRowEventEncoder creates a new JSONRowEventEncoder +func newJSONRowEventEncoder(config *common.Config) codec.RowEventEncoder { + encoder := &JSONRowEventEncoder{ + builder: newCanalEntryBuilder(), + messages: make([]*common.Message, 0, 1), + + config: config, + } + return encoder +} + +func (c *JSONRowEventEncoder) newJSONMessageForDDL(e *model.DDLEvent) canalJSONMessageInterface { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go msg := &JSONMessage{ ID: 0, // ignored by both Canal Adapter and Flink Schema: e.TableInfo.TableName.Schema, @@ -270,7 +350,7 @@ func (c *JSONBatchEncoder) newJSONMessageForDDL(e *model.DDLEvent) canalJSONMess Query: e.Query, } - if !c.enableTiDBExtension { + if !c.config.EnableTiDBExtension { return msg } @@ -293,9 +373,15 @@ func (c *JSONBatchEncoder) newJSONMessage4CheckpointEvent(ts uint64) *canalJSONM } } +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go // EncodeCheckpointEvent implements the EventBatchEncoder interface func (c *JSONBatchEncoder) EncodeCheckpointEvent(ts uint64) (*common.Message, error) { if !c.enableTiDBExtension { +======= +// EncodeCheckpointEvent implements the RowEventEncoder interface +func (c *JSONRowEventEncoder) EncodeCheckpointEvent(ts uint64) (*common.Message, error) { + if !c.config.EnableTiDBExtension { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go return nil, nil } @@ -314,7 +400,11 @@ func (c *JSONBatchEncoder) AppendRowChangedEvent( e *model.RowChangedEvent, callback func(), ) error { +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go value, err := c.newJSONMessageForDML(e) +======= + value, err := newJSONMessageForDML(c.builder, e, c.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go if err != nil { return errors.Trace(err) } @@ -324,9 +414,9 @@ func (c *JSONBatchEncoder) AppendRowChangedEvent( length := len(value) + common.MaxRecordOverhead // for single message that is longer than max-message-bytes, do not send it. - if length > c.maxMessageBytes { + if length > c.config.MaxMessageBytes { log.Warn("Single message is too large for canal-json", - zap.Int("maxMessageBytes", c.maxMessageBytes), + zap.Int("maxMessageBytes", c.config.MaxMessageBytes), zap.Int("length", length), zap.Any("table", e.Table)) return cerror.ErrMessageTooLarge.GenWithStackByArgs() diff --git a/cdc/sink/codec/canal/canal_json_encoder_test.go b/cdc/sink/codec/canal/canal_json_encoder_test.go index ba74694187b..5fa5b501673 100644 --- a/cdc/sink/codec/canal/canal_json_encoder_test.go +++ b/cdc/sink/codec/canal/canal_json_encoder_test.go @@ -33,6 +33,7 @@ func TestBuildJSONBatchEncoder(t *testing.T) { builder := &jsonBatchEncoderBuilder{config: cfg} encoder, ok := builder.Build().(*JSONBatchEncoder) require.True(t, ok) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go require.False(t, encoder.enableTiDBExtension) cfg.EnableTiDBExtension = true @@ -40,6 +41,9 @@ func TestBuildJSONBatchEncoder(t *testing.T) { encoder, ok = builder.Build().(*JSONBatchEncoder) require.True(t, ok) require.True(t, encoder.enableTiDBExtension) +======= + require.NotNil(t, encoder.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go } func TestNewCanalJSONMessage4DML(t *testing.T) { @@ -53,7 +57,11 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { encoder, ok := e.(*JSONBatchEncoder) require.True(t, ok) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err := encoder.newJSONMessageForDML(testCaseInsert) +======= + data, err := newJSONMessageForDML(encoder.builder, testCaseInsert, encoder.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.Nil(t, err) var msg canalJSONMessageInterface = &JSONMessage{} err = json.Unmarshal(data, msg) @@ -68,6 +76,12 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.Equal(t, "person", jsonMsg.Table) require.False(t, jsonMsg.IsDDL) + for _, col := range testCaseInsert.Columns { + require.Contains(t, jsonMsg.Data[0], col.Name) + require.Contains(t, jsonMsg.SQLType, col.Name) + require.Contains(t, jsonMsg.MySQLType, col.Name) + } + // check data is enough obtainedDataMap := jsonMsg.getData() require.NotNil(t, obtainedDataMap) @@ -96,7 +110,11 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.Equal(t, item.expectedEncodedValue, obtainedValue) } +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err = encoder.newJSONMessageForDML(testCaseUpdate) +======= + data, err = newJSONMessageForDML(encoder.builder, testCaseUpdate, encoder.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.Nil(t, err) jsonMsg = &JSONMessage{} err = json.Unmarshal(data, jsonMsg) @@ -105,7 +123,20 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.NotNil(t, jsonMsg.Old) require.Equal(t, "UPDATE", jsonMsg.EventType) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err = encoder.newJSONMessageForDML(testCaseDelete) +======= + for _, col := range testCaseUpdate.Columns { + require.Contains(t, jsonMsg.Data[0], col.Name) + require.Contains(t, jsonMsg.SQLType, col.Name) + require.Contains(t, jsonMsg.MySQLType, col.Name) + } + for _, col := range testCaseUpdate.PreColumns { + require.Contains(t, jsonMsg.Old[0], col.Name) + } + + data, err = newJSONMessageForDML(encoder.builder, testCaseDelete, encoder.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.Nil(t, err) jsonMsg = &JSONMessage{} err = json.Unmarshal(data, jsonMsg) @@ -114,15 +145,50 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.Nil(t, jsonMsg.Old) require.Equal(t, "DELETE", jsonMsg.EventType) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go e = newJSONBatchEncoder(&common.Config{ EnableTiDBExtension: true, Terminator: "", +======= + for _, col := range testCaseDelete.PreColumns { + require.Contains(t, jsonMsg.Data[0], col.Name) + } + + data, err = newJSONMessageForDML(encoder.builder, testCaseDelete, &common.Config{OnlyHandleKeyColumns: true}) + require.NoError(t, err) + jsonMsg = &JSONMessage{} + err = json.Unmarshal(data, jsonMsg) + require.NoError(t, err) + require.NotNil(t, jsonMsg.Data) + require.Nil(t, jsonMsg.Old) + + for _, col := range testCaseDelete.PreColumns { + if col.Flag.IsHandleKey() { + require.Contains(t, jsonMsg.Data[0], col.Name) + require.Contains(t, jsonMsg.SQLType, col.Name) + require.Contains(t, jsonMsg.MySQLType, col.Name) + } else { + require.NotContains(t, jsonMsg.Data[0], col.Name) + require.NotContains(t, jsonMsg.SQLType, col.Name) + require.NotContains(t, jsonMsg.MySQLType, col.Name) + } + } + + e = newJSONRowEventEncoder(&common.Config{ + EnableTiDBExtension: true, + Terminator: "", + OnlyOutputUpdatedColumns: true, +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go }) require.NotNil(t, e) encoder, ok = e.(*JSONBatchEncoder) require.True(t, ok) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err = encoder.newJSONMessageForDML(testCaseUpdate) +======= + data, err = newJSONMessageForDML(encoder.builder, testCaseUpdate, encoder.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.Nil(t, err) withExtension := &canalJSONMessageWithTiDBExtension{} @@ -131,12 +197,34 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.NotNil(t, withExtension.Extensions) require.Equal(t, testCaseUpdate.CommitTs, withExtension.Extensions.CommitTs) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go +======= + + encoder, ok = e.(*JSONRowEventEncoder) + require.True(t, ok) + data, err = newJSONMessageForDML(encoder.builder, testCaseUpdate, encoder.config) + require.Nil(t, err) + + withExtension = &canalJSONMessageWithTiDBExtension{} + err = json.Unmarshal(data, withExtension) + require.Nil(t, err) + require.Equal(t, 0, len(withExtension.JSONMessage.Old[0])) + + require.NotNil(t, withExtension.Extensions) + require.Equal(t, testCaseUpdate.CommitTs, withExtension.Extensions.CommitTs) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go } func TestNewCanalJSONMessageFromDDL(t *testing.T) { t.Parallel() +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder()} require.NotNil(t, encoder) +======= + + encoder, ok := newJSONRowEventEncoder(&common.Config{}).(*JSONRowEventEncoder) + require.True(t, ok) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go message := encoder.newJSONMessageForDDL(testCaseDDL) require.NotNil(t, message) @@ -150,8 +238,15 @@ func TestNewCanalJSONMessageFromDDL(t *testing.T) { require.Equal(t, testCaseDDL.Query, msg.Query) require.Equal(t, "CREATE", msg.EventType) +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go encoder = &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: true} require.NotNil(t, encoder) +======= + encoder, ok = newJSONRowEventEncoder(&common.Config{ + EnableTiDBExtension: true, + }).(*JSONRowEventEncoder) + require.True(t, ok) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go message = encoder.newJSONMessageForDDL(testCaseDDL) require.NotNil(t, message) @@ -202,7 +297,18 @@ func TestEncodeCheckpointEvent(t *testing.T) { t.Parallel() var watermark uint64 = 2333 for _, enable := range []bool{false, true} { +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: enable} +======= + config := &common.Config{ + EnableTiDBExtension: enable, + } + encoder := &JSONRowEventEncoder{ + builder: newCanalEntryBuilder(), + config: config, + } + +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.NotNil(t, encoder) msg, err := encoder.EncodeCheckpointEvent(watermark) @@ -239,9 +345,15 @@ func TestEncodeCheckpointEvent(t *testing.T) { func TestCheckpointEventValueMarshal(t *testing.T) { t.Parallel() var watermark uint64 = 1024 +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go encoder := &JSONBatchEncoder{ builder: newCanalEntryBuilder(), enableTiDBExtension: true, +======= + encoder := &JSONRowEventEncoder{ + builder: newCanalEntryBuilder(), + config: &common.Config{EnableTiDBExtension: true}, +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go } require.NotNil(t, encoder) msg, err := encoder.EncodeCheckpointEvent(watermark) @@ -286,7 +398,14 @@ func TestCheckpointEventValueMarshal(t *testing.T) { func TestDDLEventWithExtensionValueMarshal(t *testing.T) { t.Parallel() +<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: true} +======= + encoder := &JSONRowEventEncoder{ + builder: newCanalEntryBuilder(), + config: &common.Config{EnableTiDBExtension: true}, + } +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.NotNil(t, encoder) message := encoder.newJSONMessageForDDL(testCaseDDL) diff --git a/cdc/sink/codec/canal/canal_test_util.go b/cdc/sink/codec/canal/canal_test_util.go index 368c334ff42..578653e0685 100644 --- a/cdc/sink/codec/canal/canal_test_util.go +++ b/cdc/sink/codec/canal/canal_test_util.go @@ -35,7 +35,7 @@ type testColumnTuple struct { var ( testColumnsTable = []*testColumnTuple{ { - &model.Column{Name: "tinyint", Type: mysql.TypeTiny, Value: int64(127)}, + &model.Column{Name: "tinyint", Flag: model.HandleKeyFlag | model.PrimaryKeyFlag, Type: mysql.TypeTiny, Value: int64(127)}, "tinyint", internal.JavaSQLTypeTINYINT, "127", "127", }, diff --git a/cdc/sink/codec/common/config_test.go b/cdc/sink/codec/common/config_test.go index 1aac36e58d2..8b21cbde772 100644 --- a/cdc/sink/codec/common/config_test.go +++ b/cdc/sink/codec/common/config_test.go @@ -55,10 +55,16 @@ func TestConfigApplyValidate(t *testing.T) { err = c.Apply(sinkURI, replicaConfig) require.NoError(t, err) require.True(t, c.EnableTiDBExtension) + require.False(t, c.OnlyHandleKeyColumns) err = c.Validate() require.NoError(t, err) + replicaConfig.EnableOldValue = false + err = c.Apply(sinkURI, replicaConfig) + require.NoError(t, err) + require.True(t, c.OnlyHandleKeyColumns) + uri = "kafka://127.0.0.1:9092/abc?protocol=canal-json&enable-tidb-extension=a" sinkURI, err = url.Parse(uri) require.NoError(t, err) diff --git a/cdc/sink/codec/craft/craft_encoder.go b/cdc/sink/codec/craft/craft_encoder.go index af950dffa0f..86860ccce7c 100644 --- a/cdc/sink/codec/craft/craft_encoder.go +++ b/cdc/sink/codec/craft/craft_encoder.go @@ -28,9 +28,7 @@ type BatchEncoder struct { messageBuf []*common.Message callbackBuf []func() - // configs - MaxMessageBytes int - MaxBatchSize int + config *common.Config allocator *SliceAllocator } @@ -49,11 +47,11 @@ func (e *BatchEncoder) AppendRowChangedEvent( ev *model.RowChangedEvent, callback func(), ) error { - rows, size := e.rowChangedBuffer.AppendRowChangedEvent(ev) + rows, size := e.rowChangedBuffer.AppendRowChangedEvent(ev, e.config.OnlyHandleKeyColumns) if callback != nil { e.callbackBuf = append(e.callbackBuf, callback) } - if size > e.MaxMessageBytes || rows >= e.MaxBatchSize { + if size > e.config.MaxMessageBytes || rows >= e.config.MaxBatchSize { e.flush() } return nil @@ -98,11 +96,15 @@ func (e *BatchEncoder) flush() { } // NewBatchEncoder creates a new BatchEncoder. +<<<<<<< HEAD:cdc/sink/codec/craft/craft_encoder.go func NewBatchEncoder() codec.EventBatchEncoder { +======= +func NewBatchEncoder(config *common.Config) codec.RowEventEncoder { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/craft/craft_encoder.go // 64 is a magic number that come up with these assumptions and manual benchmark. // 1. Most table will not have more than 64 columns // 2. It only worth allocating slices in batch for slices that's small enough - return NewBatchEncoderWithAllocator(NewSliceAllocator(64)) + return NewBatchEncoderWithAllocator(NewSliceAllocator(64), config) } type batchEncoderBuilder struct { @@ -110,11 +112,16 @@ type batchEncoderBuilder struct { } // Build a BatchEncoder +<<<<<<< HEAD:cdc/sink/codec/craft/craft_encoder.go func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { encoder := NewBatchEncoder() encoder.(*BatchEncoder).MaxMessageBytes = b.config.MaxMessageBytes encoder.(*BatchEncoder).MaxBatchSize = b.config.MaxBatchSize return encoder +======= +func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { + return NewBatchEncoder(b.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/craft/craft_encoder.go } // NewBatchEncoderBuilder creates a craft batchEncoderBuilder. @@ -123,11 +130,16 @@ func NewBatchEncoderBuilder(config *common.Config) codec.EncoderBuilder { } // NewBatchEncoderWithAllocator creates a new BatchEncoder with given allocator. +<<<<<<< HEAD:cdc/sink/codec/craft/craft_encoder.go func NewBatchEncoderWithAllocator(allocator *SliceAllocator) codec.EventBatchEncoder { +======= +func NewBatchEncoderWithAllocator(allocator *SliceAllocator, config *common.Config) codec.RowEventEncoder { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/craft/craft_encoder.go return &BatchEncoder{ allocator: allocator, messageBuf: make([]*common.Message, 0, 2), callbackBuf: make([]func(), 0), rowChangedBuffer: NewRowChangedEventBuffer(allocator), + config: config, } } diff --git a/cdc/sink/codec/craft/craft_encoder_test.go b/cdc/sink/codec/craft/craft_encoder_test.go index 1da596de907..626527d1a19 100644 --- a/cdc/sink/codec/craft/craft_encoder_test.go +++ b/cdc/sink/codec/craft/craft_encoder_test.go @@ -104,8 +104,7 @@ func TestBuildCraftBatchEncoder(t *testing.T) { builder := &batchEncoderBuilder{config: cfg} encoder, ok := builder.Build().(*BatchEncoder) require.True(t, ok) - require.Equal(t, cfg.MaxBatchSize, encoder.MaxBatchSize) - require.Equal(t, cfg.MaxMessageBytes, encoder.MaxMessageBytes) + require.NotNil(t, encoder.config) } func testBatchCodec( diff --git a/cdc/sink/codec/craft/model.go b/cdc/sink/codec/craft/model.go index 12943b77658..71af0e88f14 100644 --- a/cdc/sink/codec/craft/model.go +++ b/cdc/sink/codec/craft/model.go @@ -366,7 +366,7 @@ func decodeColumnGroup(bits []byte, allocator *SliceAllocator, dict *termDiction }, nil } -func newColumnGroup(allocator *SliceAllocator, ty byte, columns []*model.Column) (int, *columnGroup) { +func newColumnGroup(allocator *SliceAllocator, ty byte, columns []*model.Column, onlyHandleKeyColumns bool) (int, *columnGroup) { l := len(columns) if l == 0 { return 0, nil @@ -381,6 +381,9 @@ func newColumnGroup(allocator *SliceAllocator, ty byte, columns []*model.Column) if col == nil { continue } + if onlyHandleKeyColumns && !col.Flag.IsHandleKey() { + continue + } names[idx] = col.Name types[idx] = uint64(col.Type) flags[idx] = uint64(col.Flag) @@ -404,7 +407,7 @@ func newColumnGroup(allocator *SliceAllocator, ty byte, columns []*model.Column) // Row changed message is basically an array of column groups type rowChangedEvent = []*columnGroup -func newRowChangedMessage(allocator *SliceAllocator, ev *model.RowChangedEvent) (int, rowChangedEvent) { +func newRowChangedMessage(allocator *SliceAllocator, ev *model.RowChangedEvent, onlyHandleKeyColumns bool) (int, rowChangedEvent) { numGroups := 0 if ev.PreColumns != nil { numGroups++ @@ -415,12 +418,13 @@ func newRowChangedMessage(allocator *SliceAllocator, ev *model.RowChangedEvent) groups := allocator.columnGroupSlice(numGroups) estimatedSize := 0 idx := 0 - if size, group := newColumnGroup(allocator, columnGroupTypeNew, ev.Columns); group != nil { + if size, group := newColumnGroup(allocator, columnGroupTypeNew, ev.Columns, false); group != nil { groups[idx] = group idx++ estimatedSize += size } - if size, group := newColumnGroup(allocator, columnGroupTypeOld, ev.PreColumns); group != nil { + onlyHandleKeyColumns = onlyHandleKeyColumns && ev.IsDelete() + if size, group := newColumnGroup(allocator, columnGroupTypeOld, ev.PreColumns, onlyHandleKeyColumns); group != nil { groups[idx] = group estimatedSize += size } @@ -454,7 +458,7 @@ func (b *RowChangedEventBuffer) Encode() []byte { } // AppendRowChangedEvent append a new event to buffer -func (b *RowChangedEventBuffer) AppendRowChangedEvent(ev *model.RowChangedEvent) (rows, size int) { +func (b *RowChangedEventBuffer) AppendRowChangedEvent(ev *model.RowChangedEvent, onlyHandleKeyColumns bool) (rows, size int) { var partition int64 = -1 if ev.Table.IsPartition { partition = ev.Table.TableID @@ -479,7 +483,7 @@ func (b *RowChangedEventBuffer) AppendRowChangedEvent(ev *model.RowChangedEvent) if b.eventsCount+1 > len(b.events) { b.events = b.allocator.resizeRowChangedEventSlice(b.events, newBufferSize(b.eventsCount)) } - size, message := newRowChangedMessage(b.allocator, ev) + size, message := newRowChangedMessage(b.allocator, ev, onlyHandleKeyColumns) b.events[b.eventsCount] = message b.eventsCount++ b.estimatedSize += size diff --git a/cdc/sink/codec/maxwell/maxwell_encoder.go b/cdc/sink/codec/maxwell/maxwell_encoder.go index a23379e4132..b7954d70b06 100644 --- a/cdc/sink/codec/maxwell/maxwell_encoder.go +++ b/cdc/sink/codec/maxwell/maxwell_encoder.go @@ -31,6 +31,8 @@ type BatchEncoder struct { valueBuf *bytes.Buffer callbackBuf []func() batchSize int + + config *common.Config } // EncodeCheckpointEvent implements the EventBatchEncoder interface @@ -47,7 +49,7 @@ func (d *BatchEncoder) AppendRowChangedEvent( e *model.RowChangedEvent, callback func(), ) error { - _, valueMsg := rowChangeToMaxwellMsg(e) + _, valueMsg := rowChangeToMaxwellMsg(e, d.config.OnlyHandleKeyColumns) value, err := valueMsg.encode() if err != nil { return errors.Trace(err) @@ -109,19 +111,27 @@ func (d *BatchEncoder) reset() { } // newBatchEncoder creates a new maxwell BatchEncoder. +<<<<<<< HEAD:cdc/sink/codec/maxwell/maxwell_encoder.go func newBatchEncoder() codec.EventBatchEncoder { +======= +func newBatchEncoder(config *common.Config) codec.RowEventEncoder { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/maxwell/maxwell_encoder.go batch := &BatchEncoder{ keyBuf: &bytes.Buffer{}, valueBuf: &bytes.Buffer{}, callbackBuf: make([]func(), 0), + config: config, } batch.reset() return batch } -type batchEncoderBuilder struct{} +type batchEncoderBuilder struct { + config *common.Config +} // NewBatchEncoderBuilder creates a maxwell batchEncoderBuilder. +<<<<<<< HEAD:cdc/sink/codec/maxwell/maxwell_encoder.go func NewBatchEncoderBuilder() codec.EncoderBuilder { return &batchEncoderBuilder{} } @@ -129,4 +139,15 @@ func NewBatchEncoderBuilder() codec.EncoderBuilder { // Build a `maxwellBatchEncoder` func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { return newBatchEncoder() +======= +func NewBatchEncoderBuilder(config *common.Config) codec.RowEventEncoderBuilder { + return &batchEncoderBuilder{ + config: config, + } +} + +// Build a `maxwellBatchEncoder` +func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { + return newBatchEncoder(b.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/maxwell/maxwell_encoder.go } diff --git a/cdc/sink/codec/maxwell/maxwell_encoder_test.go b/cdc/sink/codec/maxwell/maxwell_encoder_test.go index 4220db9a15b..d9703b8ce52 100644 --- a/cdc/sink/codec/maxwell/maxwell_encoder_test.go +++ b/cdc/sink/codec/maxwell/maxwell_encoder_test.go @@ -20,6 +20,7 @@ import ( timodel "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/sink/codec/common" "github.com/stretchr/testify/require" ) @@ -33,7 +34,7 @@ func TestMaxwellBatchCodec(t *testing.T) { Columns: []*model.Column{{Name: "col1", Type: 3, Value: 10}}, }}, {}} for _, cs := range rowCases { - encoder := newEncoder() + encoder := newEncoder(&common.Config{}) for _, row := range cs { err := encoder.AppendRowChangedEvent(context.Background(), "", row, nil) require.Nil(t, err) @@ -59,7 +60,7 @@ func TestMaxwellBatchCodec(t *testing.T) { Type: 1, }}} for _, cs := range ddlCases { - encoder := newEncoder() + encoder := newEncoder(&common.Config{}) for _, ddl := range cs { msg, err := encoder.EncodeDDLEvent(ddl) require.Nil(t, err) @@ -69,7 +70,7 @@ func TestMaxwellBatchCodec(t *testing.T) { } func TestMaxwellAppendRowChangedEventWithCallback(t *testing.T) { - encoder := newBatchEncoder() + encoder := newBatchEncoder(&common.Config{}) require.NotNil(t, encoder) count := 0 diff --git a/cdc/sink/codec/maxwell/maxwell_message.go b/cdc/sink/codec/maxwell/maxwell_message.go index 0ef5836f655..db3b9121d1e 100644 --- a/cdc/sink/codec/maxwell/maxwell_message.go +++ b/cdc/sink/codec/maxwell/maxwell_message.go @@ -43,7 +43,7 @@ func (m *maxwellMessage) encode() ([]byte, error) { return data, cerror.WrapError(cerror.ErrMaxwellEncodeFailed, err) } -func rowChangeToMaxwellMsg(e *model.RowChangedEvent) (*internal.MessageKey, *maxwellMessage) { +func rowChangeToMaxwellMsg(e *model.RowChangedEvent, onlyHandleKeyColumns bool) (*internal.MessageKey, *maxwellMessage) { var partition *int64 if e.Table.IsPartition { partition = &e.Table.TableID @@ -68,6 +68,9 @@ func rowChangeToMaxwellMsg(e *model.RowChangedEvent) (*internal.MessageKey, *max if e.IsDelete() { value.Type = "delete" for _, v := range e.PreColumns { + if onlyHandleKeyColumns && !v.Flag.IsHandleKey() { + continue + } switch v.Type { case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: if v.Value == nil { diff --git a/cdc/sink/codec/maxwell/maxwell_message_test.go b/cdc/sink/codec/maxwell/maxwell_message_test.go index 852886cc444..3928d3f36c4 100644 --- a/cdc/sink/codec/maxwell/maxwell_message_test.go +++ b/cdc/sink/codec/maxwell/maxwell_message_test.go @@ -54,7 +54,7 @@ func TestEncodeBinaryToMaxwell(t *testing.T) { Columns: []*model.Column{column}, } - key, msg := rowChangeToMaxwellMsg(e) + key, msg := rowChangeToMaxwellMsg(e, false) require.NotNil(t, key) require.NotNil(t, msg) } diff --git a/cdc/sink/codec/open/open_protocol_encoder.go b/cdc/sink/codec/open/open_protocol_encoder.go index 2ace0a0ef89..3a0aa7cc7b8 100644 --- a/cdc/sink/codec/open/open_protocol_encoder.go +++ b/cdc/sink/codec/open/open_protocol_encoder.go @@ -34,9 +34,13 @@ type BatchEncoder struct { callbackBuff []func() curBatchSize int +<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go // configs MaxMessageBytes int MaxBatchSize int +======= + config *common.Config +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go } // AppendRowChangedEvent implements the EventBatchEncoder interface @@ -46,12 +50,16 @@ func (d *BatchEncoder) AppendRowChangedEvent( e *model.RowChangedEvent, callback func(), ) error { - keyMsg, valueMsg := rowChangeToMsg(e) + keyMsg, valueMsg := rowChangeToMsg(e, d.config.OnlyHandleKeyColumns) key, err := keyMsg.Encode() if err != nil { return errors.Trace(err) } +<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go value, err := valueMsg.encode() +======= + value, err := valueMsg.encode(d.config.OnlyOutputUpdatedColumns) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go if err != nil { return errors.Trace(err) } @@ -64,9 +72,9 @@ func (d *BatchEncoder) AppendRowChangedEvent( // for single message that is longer than max-message-bytes, do not send it. // 16 is the length of `keyLenByte` and `valueLenByte`, 8 is the length of `versionHead` length := len(key) + len(value) + common.MaxRecordOverhead + 16 + 8 - if length > d.MaxMessageBytes { + if length > d.config.MaxMessageBytes { log.Warn("Single message is too large for open-protocol", - zap.Int("maxMessageBytes", d.MaxMessageBytes), + zap.Int("maxMessageBytes", d.config.MaxMessageBytes), zap.Int("length", length), zap.Any("table", e.Table), zap.Any("key", key)) @@ -74,8 +82,8 @@ func (d *BatchEncoder) AppendRowChangedEvent( } if len(d.messageBuf) == 0 || - d.curBatchSize >= d.MaxBatchSize || - d.messageBuf[len(d.messageBuf)-1].Length()+len(key)+len(value)+16 > d.MaxMessageBytes { + d.curBatchSize >= d.config.MaxBatchSize || + d.messageBuf[len(d.messageBuf)-1].Length()+len(key)+len(value)+16 > d.config.MaxMessageBytes { // Before we create a new message, we should handle the previous callbacks. d.tryBuildCallback() versionHead := make([]byte, 8) @@ -189,12 +197,17 @@ type batchEncoderBuilder struct { } // Build a BatchEncoder +<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { encoder := NewBatchEncoder() encoder.(*BatchEncoder).MaxMessageBytes = b.config.MaxMessageBytes encoder.(*BatchEncoder).MaxBatchSize = b.config.MaxBatchSize return encoder +======= +func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { + return NewBatchEncoder(b.config) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go } // NewBatchEncoderBuilder creates an open-protocol batchEncoderBuilder. @@ -203,7 +216,14 @@ func NewBatchEncoderBuilder(config *common.Config) codec.EncoderBuilder { } // NewBatchEncoder creates a new BatchEncoder. +<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go func NewBatchEncoder() codec.EventBatchEncoder { batch := &BatchEncoder{} return batch +======= +func NewBatchEncoder(config *common.Config) codec.RowEventEncoder { + return &BatchEncoder{ + config: config, + } +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go } diff --git a/cdc/sink/codec/open/open_protocol_encoder_test.go b/cdc/sink/codec/open/open_protocol_encoder_test.go index 49ddd6b6dd6..80c8305d65f 100644 --- a/cdc/sink/codec/open/open_protocol_encoder_test.go +++ b/cdc/sink/codec/open/open_protocol_encoder_test.go @@ -31,8 +31,7 @@ func TestBuildOpenProtocolBatchEncoder(t *testing.T) { builder := &batchEncoderBuilder{config: config} encoder, ok := builder.Build().(*BatchEncoder) require.True(t, ok) - require.Equal(t, config.MaxBatchSize, encoder.MaxBatchSize) - require.Equal(t, config.MaxMessageBytes, encoder.MaxMessageBytes) + require.NotNil(t, encoder.config) } func TestMaxMessageBytes(t *testing.T) { @@ -131,7 +130,6 @@ func TestOpenProtocolAppendRowChangedEventWithCallback(t *testing.T) { builder := &batchEncoderBuilder{config: cfg} encoder, ok := builder.Build().(*BatchEncoder) require.True(t, ok) - require.Equal(t, cfg.MaxBatchSize, encoder.MaxBatchSize) count := 0 diff --git a/cdc/sink/codec/open/open_protocol_message.go b/cdc/sink/codec/open/open_protocol_message.go index 4c06169fd9f..4032eb3306e 100644 --- a/cdc/sink/codec/open/open_protocol_message.go +++ b/cdc/sink/codec/open/open_protocol_message.go @@ -76,7 +76,7 @@ func newResolvedMessage(ts uint64) *internal.MessageKey { } } -func rowChangeToMsg(e *model.RowChangedEvent) (*internal.MessageKey, *messageRow) { +func rowChangeToMsg(e *model.RowChangedEvent, onlyHandleKeyColumns bool) (*internal.MessageKey, *messageRow) { var partition *int64 if e.Table.IsPartition { partition = &e.Table.TableID @@ -91,10 +91,10 @@ func rowChangeToMsg(e *model.RowChangedEvent) (*internal.MessageKey, *messageRow } value := &messageRow{} if e.IsDelete() { - value.Delete = rowChangeColumns2CodecColumns(e.PreColumns) + value.Delete = rowChangeColumns2CodecColumns(e.PreColumns, onlyHandleKeyColumns) } else { - value.Update = rowChangeColumns2CodecColumns(e.Columns) - value.PreColumns = rowChangeColumns2CodecColumns(e.PreColumns) + value.Update = rowChangeColumns2CodecColumns(e.Columns, false) + value.PreColumns = rowChangeColumns2CodecColumns(e.PreColumns, false) } return key, value } @@ -123,12 +123,15 @@ func msgToRowChange(key *internal.MessageKey, value *messageRow) *model.RowChang return e } -func rowChangeColumns2CodecColumns(cols []*model.Column) map[string]internal.Column { +func rowChangeColumns2CodecColumns(cols []*model.Column, onlyHandleKeyColumns bool) map[string]internal.Column { jsonCols := make(map[string]internal.Column, len(cols)) for _, col := range cols { if col == nil { continue } + if onlyHandleKeyColumns && !col.Flag.IsHandleKey() { + continue + } c := internal.Column{} c.FromRowChangeColumn(col) jsonCols[col.Name] = c diff --git a/cdc/sink/codec/open/open_protocol_message_test.go b/cdc/sink/codec/open/open_protocol_message_test.go index 826eda172f7..4812bce2ebd 100644 --- a/cdc/sink/codec/open/open_protocol_message_test.go +++ b/cdc/sink/codec/open/open_protocol_message_test.go @@ -90,3 +90,130 @@ func TestVarBinaryCol(t *testing.T) { col2 := mqCol2.ToRowChangeColumn("test") require.Equal(t, col, col2) } +<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_message_test.go +======= + +func TestOnlyOutputUpdatedColumn(t *testing.T) { + t.Parallel() + cases := []struct { + pre interface{} + updated interface{} + output bool + }{ + { + pre: []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}, + updated: []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}, + output: false, + }, + { + pre: uint64(1), + updated: uint64(1), + output: false, + }, + { + pre: nil, + updated: nil, + output: false, + }, + { + pre: float64(6.2), + updated: float32(6.2), + output: true, + }, + { + pre: uint64(1), + updated: int64(1), + output: true, + }, + { + pre: time.Time{}, + updated: time.Time{}, + output: false, + }, + { + pre: "time.Time{}", + updated: time.Time{}, + output: true, + }, + { + pre: "time.Time{}", + updated: "time.Time{}", + output: false, + }, + } + + for _, cs := range cases { + col := internal.Column{ + Value: cs.pre, + } + col2 := internal.Column{ + Value: cs.updated, + } + row := &messageRow{ + Update: map[string]internal.Column{"test": col2}, + PreColumns: map[string]internal.Column{"test": col}, + } + _, err := row.encode(true) + require.Nil(t, err) + _, ok := row.PreColumns["test"] + assert.Equal(t, cs.output, ok) + } +} + +func TestRowChanged2MsgOnlyHandleKeyColumns(t *testing.T) { + t.Parallel() + + insertEvent := &model.RowChangedEvent{ + CommitTs: 417318403368288260, + Table: &model.TableName{ + Schema: "schema", + Table: "table", + }, + Columns: []*model.Column{ + {Name: "id", Flag: model.HandleKeyFlag, Type: mysql.TypeLonglong, Value: 1}, + {Name: "a", Type: mysql.TypeLonglong, Value: 1}, + }, + } + _, value := rowChangeToMsg(insertEvent, true) + _, ok := value.Update["a"] + require.True(t, ok) + + updateEvent := &model.RowChangedEvent{ + CommitTs: 417318403368288260, + Table: &model.TableName{ + Schema: "schema", + Table: "table", + }, + Columns: []*model.Column{ + {Name: "id", Flag: model.HandleKeyFlag, Type: mysql.TypeLonglong, Value: 1}, + {Name: "a", Type: mysql.TypeLonglong, Value: 2}, + }, + PreColumns: []*model.Column{ + {Name: "id", Flag: model.HandleKeyFlag, Type: mysql.TypeLonglong, Value: 1}, + {Name: "a", Type: mysql.TypeLonglong, Value: 1}, + }, + } + _, value = rowChangeToMsg(updateEvent, true) + _, ok = value.PreColumns["a"] + require.True(t, ok) + + deleteEvent := &model.RowChangedEvent{ + CommitTs: 417318403368288260, + Table: &model.TableName{ + Schema: "schema", + Table: "table", + }, + PreColumns: []*model.Column{ + {Name: "id", Flag: model.HandleKeyFlag, Type: mysql.TypeLonglong, Value: 1}, + {Name: "a", Type: mysql.TypeLonglong, Value: 2}, + }, + } + _, value = rowChangeToMsg(deleteEvent, true) + _, ok = value.Delete["a"] + require.False(t, ok) + + _, value = rowChangeToMsg(deleteEvent, false) + _, ok = value.Delete["a"] + require.True(t, ok) +} +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_message_test.go diff --git a/errors.toml b/errors.toml index cc3785c1ba3..89125e14737 100755 --- a/errors.toml +++ b/errors.toml @@ -431,6 +431,11 @@ error = ''' illegal parameter for sorter: %s ''' +["CDC:ErrIncompatibleConfig"] +error = ''' +incompatible configuration +''' + ["CDC:ErrIncompatibleSinkConfig"] error = ''' incompatible configuration in sink uri(%s) and config file(%s), please try to update the configuration only through sink uri diff --git a/pkg/cmd/cli/cli_changefeed_create.go b/pkg/cmd/cli/cli_changefeed_create.go index a437fab1a5f..2e80fd2758d 100644 --- a/pkg/cmd/cli/cli_changefeed_create.go +++ b/pkg/cmd/cli/cli_changefeed_create.go @@ -29,7 +29,6 @@ import ( "github.com/pingcap/tiflow/pkg/cmd/factory" "github.com/pingcap/tiflow/pkg/cmd/util" "github.com/pingcap/tiflow/pkg/config" - cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/filter" "github.com/spf13/cobra" "github.com/tikv/client-go/v2/oracle" @@ -151,12 +150,12 @@ func (o *createChangefeedOptions) completeReplicaCfg( } } - if !cfg.EnableOldValue { - sinkURIParsed, err := url.Parse(o.commonChangefeedOptions.sinkURI) - if err != nil { - return cerror.WrapError(cerror.ErrSinkURIInvalid, err) - } + uri, err := url.Parse(o.commonChangefeedOptions.sinkURI) + if err != nil { + return err + } +<<<<<<< HEAD protocol := sinkURIParsed.Query().Get(config.ProtocolKey) if protocol != "" { cfg.Sink.Protocol = protocol @@ -173,6 +172,11 @@ func (o *createChangefeedOptions) completeReplicaCfg( log.Error("if use force replicate, old value feature must be enabled") return cerror.ErrOldValueNotEnabled.GenWithStackByArgs() } +======= + err = cfg.AdjustEnableOldValueAndVerifyForceReplicate(uri) + if err != nil { + return err +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } for _, rules := range cfg.Sink.DispatchRules { diff --git a/pkg/cmd/cli/cli_changefeed_create_test.go b/pkg/cmd/cli/cli_changefeed_create_test.go index 0c7191bde49..2664bc08db1 100644 --- a/pkg/cmd/cli/cli_changefeed_create_test.go +++ b/pkg/cmd/cli/cli_changefeed_create_test.go @@ -24,6 +24,7 @@ import ( v2 "github.com/pingcap/tiflow/cdc/api/v2" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" + cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/spf13/cobra" "github.com/stretchr/testify/require" ) @@ -174,3 +175,38 @@ func TestChangefeedCreateCli(t *testing.T) { require.NoError(t, o.complete(f, cmd)) require.Contains(t, o.validate(cmd).Error(), "creating changefeed with `--sort-dir`") } + +func TestChangefeedCreateCliAdjustEnableOldValue(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + f := newMockFactory(ctrl) + + // enable old value, but use avro as the encoding protocol, should be set to false. + dir := t.TempDir() + configPath := filepath.Join(dir, "adjust-old-value.toml") + err := os.WriteFile(configPath, []byte("enable-old-value=true"), 0o644) + require.NoError(t, err) + + cmd := new(cobra.Command) + o := newChangefeedCommonOptions() + o.addFlags(cmd) + + require.NoError(t, cmd.ParseFlags([]string{fmt.Sprintf("--config=%s", configPath)})) + require.NoError(t, cmd.ParseFlags([]string{"--sink-uri=kafka://127.0.0.1:9092/test?protocol=avro"})) + + opt := newCreateChangefeedOptions(o) + require.NoError(t, opt.complete(f, cmd)) + require.False(t, opt.cfg.EnableOldValue) + + // also enable the force replicate, should return error + configPath = filepath.Join(dir, "enable-old-value-force-replicate.toml") + err = os.WriteFile(configPath, []byte("enable-old-value=true\r\nforce-replicate = true"), 0o644) + require.NoError(t, err) + + require.NoError(t, cmd.ParseFlags([]string{"--sink-uri=kafka://127.0.0.1:9092/test?protocol=avro"})) + require.NoError(t, cmd.ParseFlags([]string{fmt.Sprintf("--config=%s", configPath)})) + + opt = newCreateChangefeedOptions(o) + err = opt.complete(f, cmd) + require.Error(t, cerror.ErrOldValueNotEnabled, err) +} diff --git a/pkg/config/replica_config.go b/pkg/config/replica_config.go index bc65ce26496..abde6f49f63 100644 --- a/pkg/config/replica_config.go +++ b/pkg/config/replica_config.go @@ -164,14 +164,19 @@ func (c *replicaConfig) fillFromV1(v1 *outdated.ReplicaConfigV1) { } // ValidateAndAdjust verifies and adjusts the replica configuration. -func (c *ReplicaConfig) ValidateAndAdjust(sinkURI *url.URL) error { - // check sink uri +func (c *ReplicaConfig) ValidateAndAdjust(sinkURI *url.URL) error { // check sink uri if c.Sink != nil { - err := c.Sink.validateAndAdjust(sinkURI, c.EnableOldValue) + err := c.Sink.validateAndAdjust(sinkURI) + if err != nil { + return err + } + + err = c.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) if err != nil { return err } } + if c.Consistent != nil { err := c.Consistent.ValidateAndAdjust() if err != nil { @@ -224,3 +229,53 @@ func GetSinkURIAndAdjustConfigWithSinkURI( return sinkURI, nil } + +// AdjustEnableOldValue adjust the old value configuration by the sink scheme and encoding protocol +func (c *ReplicaConfig) AdjustEnableOldValue(scheme, protocol string) { + if sink.IsMySQLCompatibleScheme(scheme) { + return + } + + if c.EnableOldValue { + _, ok := ForceDisableOldValueProtocols[protocol] + if ok { + log.Warn("Attempting to replicate with old value enabled, but the specified protocol must disable old value. "+ + "CDC will disable old value and continue.", zap.String("protocol", protocol)) + c.EnableOldValue = false + } + return + } + + _, ok := ForceEnableOldValueProtocols[protocol] + if ok { + log.Warn("Attempting to replicate with old value disabled, but the specified protocol must enable old value. "+ + "CDC will enable old value and continue.", zap.String("protocol", protocol)) + c.EnableOldValue = true + } +} + +// AdjustEnableOldValueAndVerifyForceReplicate adjust the old value configuration by the sink scheme and encoding protocol +// and then verify the force replicate. +func (c *ReplicaConfig) AdjustEnableOldValueAndVerifyForceReplicate(sinkURI *url.URL) error { + scheme := strings.ToLower(sinkURI.Scheme) + protocol := sinkURI.Query().Get(ProtocolKey) + if protocol != "" { + c.Sink.Protocol = util.AddressOf(protocol) + } + protocol = util.GetOrZero(c.Sink.Protocol) + c.AdjustEnableOldValue(scheme, protocol) + + if !c.ForceReplicate { + return nil + } + + // MySQL Sink require the old value feature must be enabled to allow delete event send to downstream. + if sink.IsMySQLCompatibleScheme(scheme) { + if !c.EnableOldValue { + log.Error("force replicate, old value feature is disabled for the changefeed using mysql sink") + return cerror.ErrIncompatibleConfig.GenWithStackByArgs() + } + } + + return nil +} diff --git a/pkg/config/replica_config_test.go b/pkg/config/replica_config_test.go index d5d584e4727..59fd8225a18 100644 --- a/pkg/config/replica_config_test.go +++ b/pkg/config/replica_config_test.go @@ -19,6 +19,13 @@ import ( "testing" "time" +<<<<<<< HEAD +======= + "github.com/aws/aws-sdk-go/aws" + cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/integrity" + "github.com/pingcap/tiflow/pkg/util" +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) "github.com/stretchr/testify/require" ) @@ -101,21 +108,40 @@ func TestReplicaConfigOutDated(t *testing.T) { func TestReplicaConfigValidate(t *testing.T) { t.Parallel() conf := GetDefaultReplicaConfig() +<<<<<<< HEAD require.Nil(t, conf.ValidateAndAdjust(nil)) +======= + + sinkURL, err := url.Parse("blackhole://") + require.NoError(t, err) + require.NoError(t, conf.ValidateAndAdjust(sinkURL)) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) // Incorrect sink configuration. conf = GetDefaultReplicaConfig() conf.Sink.Protocol = "canal" conf.EnableOldValue = false +<<<<<<< HEAD require.Regexp(t, ".*canal protocol requires old value to be enabled.*", conf.ValidateAndAdjust(nil)) +======= + + err = conf.ValidateAndAdjust(sinkURL) + require.NoError(t, err) + require.True(t, conf.EnableOldValue) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) conf = GetDefaultReplicaConfig() conf.Sink.DispatchRules = []*DispatchRule{ {Matcher: []string{"a.b"}, DispatcherRule: "d1", PartitionRule: "r1"}, } +<<<<<<< HEAD require.Regexp(t, ".*dispatcher and partition cannot be configured both.*", conf.ValidateAndAdjust(nil)) +======= + err = conf.ValidateAndAdjust(sinkURL) + require.Regexp(t, ".*dispatcher and partition cannot be configured both.*", err) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) // Correct sink configuration. conf = GetDefaultReplicaConfig() @@ -162,3 +188,75 @@ func TestValidateAndAdjust(t *testing.T) { cfg.Sink.EncoderConcurrency = -1 require.Error(t, cfg.ValidateAndAdjust(nil)) } + +func TestAdjustEnableOldValueAndVerifyForceReplicate(t *testing.T) { + t.Parallel() + + config := GetDefaultReplicaConfig() + config.EnableOldValue = false + + // mysql sink, do not adjust enable-old-value + sinkURI, err := url.Parse("mysql://") + require.NoError(t, err) + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.False(t, config.EnableOldValue) + + // mysql sink, `enable-old-value` false, `force-replicate` true, should return error + config.ForceReplicate = true + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.Error(t, cerror.ErrOldValueNotEnabled, err) + + // canal, `enable-old-value` false, `force-replicate` false, no error, `enable-old-value` adjust to true + config.ForceReplicate = false + config.EnableOldValue = false + // canal require old value enabled + sinkURI, err = url.Parse("kafka://127.0.0.1:9092/test?protocol=canal") + require.NoError(t, err) + + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.True(t, config.EnableOldValue) + + // canal, `force-replicate` true, `enable-old-value` true, no error + config.ForceReplicate = true + config.EnableOldValue = true + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.True(t, config.ForceReplicate) + require.True(t, config.EnableOldValue) + + // avro, `enable-old-value` false, `force-replicate` false, no error + config.ForceReplicate = false + config.EnableOldValue = false + sinkURI, err = url.Parse("kafka://127.0.0.1:9092/test?protocol=avro") + require.NoError(t, err) + + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.False(t, config.EnableOldValue) + + // avro, `enable-old-value` true, no error, set to false. no matter `force-replicate` + config.EnableOldValue = true + config.ForceReplicate = true + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.False(t, config.EnableOldValue) + + // csv, `enable-old-value` false, `force-replicate` false, no error + config.EnableOldValue = false + config.ForceReplicate = false + sinkURI, err = url.Parse("s3://xxx/yyy?protocol=csv") + require.NoError(t, err) + + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.False(t, config.EnableOldValue) + + // csv, `enable-old-value` true, no error, set to false. no matter `force-replicate` + config.EnableOldValue = true + config.ForceReplicate = true + err = config.AdjustEnableOldValueAndVerifyForceReplicate(sinkURI) + require.NoError(t, err) + require.False(t, config.EnableOldValue) +} diff --git a/pkg/config/sink.go b/pkg/config/sink.go index 93a164026af..2df9d50d2af 100644 --- a/pkg/config/sink.go +++ b/pkg/config/sink.go @@ -94,10 +94,16 @@ func (l AtomicityLevel) validate(scheme string) error { } // ForceEnableOldValueProtocols specifies which protocols need to be forced to enable old value. -var ForceEnableOldValueProtocols = []string{ - ProtocolCanal.String(), - ProtocolCanalJSON.String(), - ProtocolMaxwell.String(), +var ForceEnableOldValueProtocols = map[string]struct{}{ + ProtocolCanal.String(): {}, + ProtocolCanalJSON.String(): {}, + ProtocolMaxwell.String(): {}, +} + +// ForceDisableOldValueProtocols specifies protocols need to be forced to disable old value. +var ForceDisableOldValueProtocols = map[string]struct{}{ + ProtocolAvro.String(): {}, + ProtocolCsv.String(): {}, } // SinkConfig represents sink config for a changefeed @@ -243,11 +249,89 @@ type ColumnSelector struct { Columns []string `toml:"columns" json:"columns"` } +<<<<<<< HEAD func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL, enableOldValue bool) error { +======= +// CodecConfig represents a MQ codec configuration +type CodecConfig struct { + EnableTiDBExtension *bool `toml:"enable-tidb-extension" json:"enable-tidb-extension,omitempty"` + MaxBatchSize *int `toml:"max-batch-size" json:"max-batch-size,omitempty"` + AvroEnableWatermark *bool `toml:"avro-enable-watermark" json:"avro-enable-watermark"` + AvroDecimalHandlingMode *string `toml:"avro-decimal-handling-mode" json:"avro-decimal-handling-mode,omitempty"` + AvroBigintUnsignedHandlingMode *string `toml:"avro-bigint-unsigned-handling-mode" json:"avro-bigint-unsigned-handling-mode,omitempty"` +} + +// KafkaConfig represents a kafka sink configuration +type KafkaConfig struct { + PartitionNum *int32 `toml:"partition-num" json:"partition-num,omitempty"` + ReplicationFactor *int16 `toml:"replication-factor" json:"replication-factor,omitempty"` + KafkaVersion *string `toml:"kafka-version" json:"kafka-version,omitempty"` + MaxMessageBytes *int `toml:"max-message-bytes" json:"max-message-bytes,omitempty"` + Compression *string `toml:"compression" json:"compression,omitempty"` + KafkaClientID *string `toml:"kafka-client-id" json:"kafka-client-id,omitempty"` + AutoCreateTopic *bool `toml:"auto-create-topic" json:"auto-create-topic,omitempty"` + DialTimeout *string `toml:"dial-timeout" json:"dial-timeout,omitempty"` + WriteTimeout *string `toml:"write-timeout" json:"write-timeout,omitempty"` + ReadTimeout *string `toml:"read-timeout" json:"read-timeout,omitempty"` + RequiredAcks *int `toml:"required-acks" json:"required-acks,omitempty"` + SASLUser *string `toml:"sasl-user" json:"sasl-user,omitempty"` + SASLPassword *string `toml:"sasl-password" json:"sasl-password,omitempty"` + SASLMechanism *string `toml:"sasl-mechanism" json:"sasl-mechanism,omitempty"` + SASLGssAPIAuthType *string `toml:"sasl-gssapi-auth-type" json:"sasl-gssapi-auth-type,omitempty"` + SASLGssAPIKeytabPath *string `toml:"sasl-gssapi-keytab-path" json:"sasl-gssapi-keytab-path,omitempty"` + SASLGssAPIKerberosConfigPath *string `toml:"sasl-gssapi-kerberos-config-path" json:"sasl-gssapi-kerberos-config-path,omitempty"` + SASLGssAPIServiceName *string `toml:"sasl-gssapi-service-name" json:"sasl-gssapi-service-name,omitempty"` + SASLGssAPIUser *string `toml:"sasl-gssapi-user" json:"sasl-gssapi-user,omitempty"` + SASLGssAPIPassword *string `toml:"sasl-gssapi-password" json:"sasl-gssapi-password,omitempty"` + SASLGssAPIRealm *string `toml:"sasl-gssapi-realm" json:"sasl-gssapi-realm,omitempty"` + SASLGssAPIDisablePafxfast *bool `toml:"sasl-gssapi-disable-pafxfast" json:"sasl-gssapi-disable-pafxfast,omitempty"` + SASLOAuthClientID *string `toml:"sasl-oauth-client-id" json:"sasl-oauth-client-id,omitempty"` + SASLOAuthClientSecret *string `toml:"sasl-oauth-client-secret" json:"sasl-oauth-client-secret,omitempty"` + SASLOAuthTokenURL *string `toml:"sasl-oauth-token-url" json:"sasl-oauth-token-url,omitempty"` + SASLOAuthScopes []string `toml:"sasl-oauth-scopes" json:"sasl-oauth-scopes,omitempty"` + SASLOAuthGrantType *string `toml:"sasl-oauth-grant-type" json:"sasl-oauth-grant-type,omitempty"` + SASLOAuthAudience *string `toml:"sasl-oauth-audience" json:"sasl-oauth-audience,omitempty"` + EnableTLS *bool `toml:"enable-tls" json:"enable-tls,omitempty"` + CA *string `toml:"ca" json:"ca,omitempty"` + Cert *string `toml:"cert" json:"cert,omitempty"` + Key *string `toml:"key" json:"key,omitempty"` + InsecureSkipVerify *bool `toml:"insecure-skip-verify" json:"insecure-skip-verify,omitempty"` + CodecConfig *CodecConfig `toml:"codec-config" json:"codec-config,omitempty"` +} + +// MySQLConfig represents a MySQL sink configuration +type MySQLConfig struct { + WorkerCount *int `toml:"worker-count" json:"worker-count,omitempty"` + MaxTxnRow *int `toml:"max-txn-row" json:"max-txn-row,omitempty"` + MaxMultiUpdateRowSize *int `toml:"max-multi-update-row-size" json:"max-multi-update-row-size,omitempty"` + MaxMultiUpdateRowCount *int `toml:"max-multi-update-row" json:"max-multi-update-row,omitempty"` + TiDBTxnMode *string `toml:"tidb-txn-mode" json:"tidb-txn-mode,omitempty"` + SSLCa *string `toml:"ssl-ca" json:"ssl-ca,omitempty"` + SSLCert *string `toml:"ssl-cert" json:"ssl-cert,omitempty"` + SSLKey *string `toml:"ssl-key" json:"ssl-key,omitempty"` + TimeZone *string `toml:"time-zone" json:"time-zone,omitempty"` + WriteTimeout *string `toml:"write-timeout" json:"write-timeout,omitempty"` + ReadTimeout *string `toml:"read-timeout" json:"read-timeout,omitempty"` + Timeout *string `toml:"timeout" json:"timeout,omitempty"` + EnableBatchDML *bool `toml:"enable-batch-dml" json:"enable-batch-dml,omitempty"` + EnableMultiStatement *bool `toml:"enable-multi-statement" json:"enable-multi-statement,omitempty"` + EnableCachePreparedStatement *bool `toml:"enable-cache-prepared-statement" json:"enable-cache-prepared-statement,omitempty"` +} + +// CloudStorageConfig represents a cloud storage sink configuration +type CloudStorageConfig struct { + WorkerCount *int `toml:"worker-count" json:"worker-count,omitempty"` + FlushInterval *string `toml:"flush-interval" json:"flush-interval,omitempty"` + FileSize *int `toml:"file-size" json:"file-size,omitempty"` +} + +func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL) error { +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) if err := s.validateAndAdjustSinkURI(sinkURI); err != nil { return err } +<<<<<<< HEAD if !enableOldValue { for _, protocolStr := range ForceEnableOldValueProtocols { if protocolStr == s.Protocol { @@ -258,6 +342,8 @@ func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL, enableOldValue bool) er } } } +======= +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) for _, rule := range s.DispatchRules { if rule.DispatcherRule != "" && rule.PartitionRule != "" { log.Error("dispatcher and partition cannot be configured both", zap.Any("rule", rule)) diff --git a/pkg/config/sink_test.go b/pkg/config/sink_test.go index af7051e74bc..87ca87d47ca 100644 --- a/pkg/config/sink_test.go +++ b/pkg/config/sink_test.go @@ -20,6 +20,7 @@ import ( "github.com/stretchr/testify/require" ) +<<<<<<< HEAD func TestValidateOldValue(t *testing.T) { t.Parallel() testCases := []struct { @@ -81,6 +82,8 @@ func TestValidateOldValue(t *testing.T) { } } +======= +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) func TestValidateTxnAtomicity(t *testing.T) { t.Parallel() testCases := []struct { @@ -156,10 +159,15 @@ func TestValidateTxnAtomicity(t *testing.T) { parsedSinkURI, err := url.Parse(tc.sinkURI) require.Nil(t, err) if tc.expectedErr == "" { +<<<<<<< HEAD require.Nil(t, cfg.validateAndAdjust(parsedSinkURI, true)) require.Equal(t, tc.shouldSplitTxn, cfg.TxnAtomicity.ShouldSplitTxn()) +======= + require.Nil(t, cfg.validateAndAdjust(parsedSinkURI)) + require.Equal(t, tc.shouldSplitTxn, util.GetOrZero(cfg.TxnAtomicity).ShouldSplitTxn()) +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } else { - require.Regexp(t, tc.expectedErr, cfg.validateAndAdjust(parsedSinkURI, true)) + require.Regexp(t, tc.expectedErr, cfg.validateAndAdjust(parsedSinkURI)) } } } diff --git a/pkg/errors/cdc_errors.go b/pkg/errors/cdc_errors.go index 2fc0d287493..09cab2eae4b 100644 --- a/pkg/errors/cdc_errors.go +++ b/pkg/errors/cdc_errors.go @@ -400,6 +400,10 @@ var ( "old value is not enabled", errors.RFCCodeText("CDC:ErrOldValueNotEnabled"), ) + ErrIncompatibleConfig = errors.Normalize( + "incompatible configuration", + errors.RFCCodeText("CDC:ErrIncompatibleConfig"), + ) ErrSinkInvalidConfig = errors.Normalize( "sink config invalid", errors.RFCCodeText("CDC:ErrSinkInvalidConfig"), diff --git a/pkg/orchestrator/reactor_state.go b/pkg/orchestrator/reactor_state.go index 9affae5be61..18fb88c353c 100644 --- a/pkg/orchestrator/reactor_state.go +++ b/pkg/orchestrator/reactor_state.go @@ -229,9 +229,7 @@ func (s *ChangefeedReactorState) UpdateCDCKey(key *etcd.CDCKey, value []byte) er return errors.Trace(err) } if key.Tp == etcd.CDCKeyTypeChangefeedInfo { - if err := s.Info.VerifyAndComplete(); err != nil { - return errors.Trace(err) - } + s.Info.VerifyAndComplete() } return nil } diff --git a/pkg/sink/codec/canal/canal_json_txn_event_encoder.go b/pkg/sink/codec/canal/canal_json_txn_event_encoder.go new file mode 100644 index 00000000000..67d1055c937 --- /dev/null +++ b/pkg/sink/codec/canal/canal_json_txn_event_encoder.go @@ -0,0 +1,121 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package canal + +import ( + "bytes" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/config" + cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/sink/codec" + "github.com/pingcap/tiflow/pkg/sink/codec/common" + "go.uber.org/zap" +) + +// JSONTxnEventEncoder encodes txn event in JSON format +type JSONTxnEventEncoder struct { + builder *canalEntryBuilder + + config *common.Config + + // the symbol separating two lines + terminator []byte + valueBuf *bytes.Buffer + batchSize int + callback func() + + // Store some fields of the txn event. + txnCommitTs uint64 + txnSchema *string + txnTable *string +} + +// AppendTxnEvent appends a txn event to the encoder. +func (j *JSONTxnEventEncoder) AppendTxnEvent( + txn *model.SingleTableTxn, + callback func(), +) error { + for _, row := range txn.Rows { + value, err := newJSONMessageForDML(j.builder, row, j.config) + if err != nil { + return errors.Trace(err) + } + length := len(value) + common.MaxRecordOverhead + // For single message that is longer than max-message-bytes, do not send it. + if length > j.config.MaxMessageBytes { + log.Warn("Single message is too large for canal-json", + zap.Int("maxMessageBytes", j.config.MaxMessageBytes), + zap.Int("length", length), + zap.Any("table", row.Table)) + return cerror.ErrMessageTooLarge.GenWithStackByArgs() + } + j.valueBuf.Write(value) + j.valueBuf.Write(j.terminator) + j.batchSize++ + } + j.callback = callback + j.txnCommitTs = txn.CommitTs + j.txnSchema = &txn.Table.Schema + j.txnTable = &txn.Table.Table + return nil +} + +// Build builds a message from the encoder and resets the encoder. +func (j *JSONTxnEventEncoder) Build() []*common.Message { + if j.batchSize == 0 { + return nil + } + + ret := common.NewMsg(config.ProtocolCanalJSON, nil, + j.valueBuf.Bytes(), j.txnCommitTs, model.MessageTypeRow, j.txnSchema, j.txnTable) + ret.SetRowsCount(j.batchSize) + ret.Callback = j.callback + j.valueBuf.Reset() + j.callback = nil + j.batchSize = 0 + j.txnCommitTs = 0 + j.txnSchema = nil + j.txnTable = nil + + return []*common.Message{ret} +} + +// newJSONTxnEventEncoder creates a new JSONTxnEventEncoder +func newJSONTxnEventEncoder(config *common.Config) codec.TxnEventEncoder { + encoder := &JSONTxnEventEncoder{ + builder: newCanalEntryBuilder(), + valueBuf: &bytes.Buffer{}, + terminator: []byte(config.Terminator), + + config: config, + } + return encoder +} + +type jsonTxnEventEncoderBuilder struct { + config *common.Config +} + +// NewJSONTxnEventEncoderBuilder creates a jsonTxnEventEncoderBuilder. +func NewJSONTxnEventEncoderBuilder(config *common.Config) codec.TxnEventEncoderBuilder { + return &jsonTxnEventEncoderBuilder{config: config} +} + +// Build a `jsonTxnEventEncoderBuilder` +func (b *jsonTxnEventEncoderBuilder) Build() codec.TxnEventEncoder { + return newJSONTxnEventEncoder(b.config) +} diff --git a/pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go b/pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go new file mode 100644 index 00000000000..71db664d285 --- /dev/null +++ b/pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go @@ -0,0 +1,124 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package canal + +import ( + "testing" + + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/stretchr/testify/require" +) + +func TestBuildCanalJSONTxnEventEncoder(t *testing.T) { + t.Parallel() + cfg := common.NewConfig(config.ProtocolCanalJSON) + + builder := NewJSONTxnEventEncoderBuilder(cfg) + encoder, ok := builder.Build().(*JSONTxnEventEncoder) + require.True(t, ok) + require.NotNil(t, encoder.config) +} + +func TestCanalJSONTxnEventEncoderMaxMessageBytes(t *testing.T) { + t.Parallel() + + // the size of `testEvent` after being encoded by canal-json is 200 + testEvent := &model.SingleTableTxn{ + Table: &model.TableName{Schema: "a", Table: "b"}, + Rows: []*model.RowChangedEvent{ + { + CommitTs: 1, + Table: &model.TableName{Schema: "a", Table: "b"}, + Columns: []*model.Column{{ + Name: "col1", + Type: mysql.TypeVarchar, + Value: []byte("aa"), + }}, + }, + }, + } + + // the test message length is smaller than max-message-bytes + maxMessageBytes := 300 + cfg := common.NewConfig(config.ProtocolCanalJSON).WithMaxMessageBytes(maxMessageBytes) + encoder := NewJSONTxnEventEncoderBuilder(cfg).Build() + err := encoder.AppendTxnEvent(testEvent, nil) + require.Nil(t, err) + + // the test message length is larger than max-message-bytes + cfg = cfg.WithMaxMessageBytes(100) + encoder = NewJSONTxnEventEncoderBuilder(cfg).Build() + err = encoder.AppendTxnEvent(testEvent, nil) + require.NotNil(t, err) +} + +func TestCanalJSONAppendTxnEventEncoderWithCallback(t *testing.T) { + t.Parallel() + + cfg := common.NewConfig(config.ProtocolCanalJSON) + encoder := NewJSONTxnEventEncoderBuilder(cfg).Build() + require.NotNil(t, encoder) + + count := 0 + + txn := &model.SingleTableTxn{ + Table: &model.TableName{Schema: "a", Table: "b"}, + Rows: []*model.RowChangedEvent{ + { + CommitTs: 1, + Table: &model.TableName{Schema: "a", Table: "b"}, + Columns: []*model.Column{{ + Name: "col1", + Type: mysql.TypeVarchar, + Value: []byte("aa"), + }}, + }, + { + CommitTs: 2, + Table: &model.TableName{Schema: "a", Table: "b"}, + Columns: []*model.Column{{ + Name: "col1", + Type: mysql.TypeVarchar, + Value: []byte("bb"), + }}, + }, + }, + } + + // Empty build makes sure that the callback build logic not broken. + msgs := encoder.Build() + require.Len(t, msgs, 0, "no message should be built and no panic") + + // Append the events. + callback := func() { + count++ + } + err := encoder.AppendTxnEvent(txn, callback) + require.Nil(t, err) + require.Equal(t, 0, count, "nothing should be called") + + msgs = encoder.Build() + require.Len(t, msgs, 1, "expected one message") + msgs[0].Callback() + require.Equal(t, 1, count, "expected one callback be called") + // Assert the build reset all the internal states. + require.Nil(t, encoder.(*JSONTxnEventEncoder).txnSchema) + require.Nil(t, encoder.(*JSONTxnEventEncoder).txnTable) + require.Nil(t, encoder.(*JSONTxnEventEncoder).callback) + require.Equal(t, 0, encoder.(*JSONTxnEventEncoder).batchSize) + require.Equal(t, 0, encoder.(*JSONTxnEventEncoder).valueBuf.Len()) +} diff --git a/pkg/sink/codec/common/config.go b/pkg/sink/codec/common/config.go new file mode 100644 index 00000000000..efe3ac64831 --- /dev/null +++ b/pkg/sink/codec/common/config.go @@ -0,0 +1,287 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "net/http" + "net/url" + + "github.com/gin-gonic/gin/binding" + "github.com/imdario/mergo" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tiflow/pkg/config" + cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/util" + "go.uber.org/zap" +) + +// defaultMaxBatchSize sets the default value for max-batch-size +const defaultMaxBatchSize int = 16 + +// Config use to create the encoder +type Config struct { + Protocol config.Protocol + + // control batch behavior, only for `open-protocol` and `craft` at the moment. + MaxMessageBytes int + MaxBatchSize int + + // onlyHandleKeyColumns is true, for the delete event only output the handle key columns. + OnlyHandleKeyColumns bool + + EnableTiDBExtension bool + EnableRowChecksum bool + + // avro only + AvroSchemaRegistry string + AvroDecimalHandlingMode string + AvroBigintUnsignedHandlingMode string + + AvroEnableWatermark bool + + // for sinking to cloud storage + Delimiter string + Quote string + NullString string + IncludeCommitTs bool + Terminator string + + // for open protocol + OnlyOutputUpdatedColumns bool +} + +// NewConfig return a Config for codec +func NewConfig(protocol config.Protocol) *Config { + return &Config{ + Protocol: protocol, + + MaxMessageBytes: config.DefaultMaxMessageBytes, + MaxBatchSize: defaultMaxBatchSize, + + EnableTiDBExtension: false, + EnableRowChecksum: false, + + AvroSchemaRegistry: "", + AvroDecimalHandlingMode: "precise", + AvroBigintUnsignedHandlingMode: "long", + AvroEnableWatermark: false, + + OnlyOutputUpdatedColumns: false, + } +} + +const ( + codecOPTEnableTiDBExtension = "enable-tidb-extension" + codecOPTAvroDecimalHandlingMode = "avro-decimal-handling-mode" + codecOPTAvroBigintUnsignedHandlingMode = "avro-bigint-unsigned-handling-mode" + codecOPTAvroSchemaRegistry = "schema-registry" + + codecOPTOnlyOutputUpdatedColumns = "only-output-updated-columns" +) + +const ( + // DecimalHandlingModeString is the string mode for decimal handling + DecimalHandlingModeString = "string" + // DecimalHandlingModePrecise is the precise mode for decimal handling + DecimalHandlingModePrecise = "precise" + // BigintUnsignedHandlingModeString is the string mode for unsigned bigint handling + BigintUnsignedHandlingModeString = "string" + // BigintUnsignedHandlingModeLong is the long mode for unsigned bigint handling + BigintUnsignedHandlingModeLong = "long" +) + +type urlConfig struct { + EnableTiDBExtension *bool `form:"enable-tidb-extension"` + MaxBatchSize *int `form:"max-batch-size"` + MaxMessageBytes *int `form:"max-message-bytes"` + AvroDecimalHandlingMode *string `form:"avro-decimal-handling-mode"` + AvroBigintUnsignedHandlingMode *string `form:"avro-bigint-unsigned-handling-mode"` + + // AvroEnableWatermark is the option for enabling watermark in avro protocol + // only used for internal testing, do not set this in the production environment since the + // confluent official consumer cannot handle watermark. + AvroEnableWatermark *bool `form:"avro-enable-watermark"` + + AvroSchemaRegistry string `form:"schema-registry"` + OnlyOutputUpdatedColumns *bool `form:"only-output-updated-columns"` +} + +// Apply fill the Config +func (c *Config) Apply(sinkURI *url.URL, replicaConfig *config.ReplicaConfig) error { + req := &http.Request{URL: sinkURI} + var err error + urlParameter := &urlConfig{} + if err := binding.Query.Bind(req, urlParameter); err != nil { + return cerror.WrapError(cerror.ErrMySQLInvalidConfig, err) + } + if urlParameter, err = mergeConfig(replicaConfig, urlParameter); err != nil { + return err + } + + if urlParameter.EnableTiDBExtension != nil { + c.EnableTiDBExtension = *urlParameter.EnableTiDBExtension + } + + if urlParameter.MaxBatchSize != nil { + c.MaxBatchSize = *urlParameter.MaxBatchSize + } + + if urlParameter.MaxMessageBytes != nil { + c.MaxMessageBytes = *urlParameter.MaxMessageBytes + } + + if urlParameter.AvroDecimalHandlingMode != nil && + *urlParameter.AvroDecimalHandlingMode != "" { + c.AvroDecimalHandlingMode = *urlParameter.AvroDecimalHandlingMode + } + if urlParameter.AvroBigintUnsignedHandlingMode != nil && + *urlParameter.AvroBigintUnsignedHandlingMode != "" { + c.AvroBigintUnsignedHandlingMode = *urlParameter.AvroBigintUnsignedHandlingMode + } + if urlParameter.AvroEnableWatermark != nil { + if c.EnableTiDBExtension && c.Protocol == config.ProtocolAvro { + c.AvroEnableWatermark = *urlParameter.AvroEnableWatermark + } + } + + if urlParameter.AvroSchemaRegistry != "" { + c.AvroSchemaRegistry = urlParameter.AvroSchemaRegistry + } + + if replicaConfig.Sink != nil { + c.Terminator = util.GetOrZero(replicaConfig.Sink.Terminator) + if replicaConfig.Sink.CSVConfig != nil { + c.Delimiter = replicaConfig.Sink.CSVConfig.Delimiter + c.Quote = replicaConfig.Sink.CSVConfig.Quote + c.NullString = replicaConfig.Sink.CSVConfig.NullString + c.IncludeCommitTs = replicaConfig.Sink.CSVConfig.IncludeCommitTs + } + } + if urlParameter.OnlyOutputUpdatedColumns != nil { + c.OnlyOutputUpdatedColumns = *urlParameter.OnlyOutputUpdatedColumns + } + if c.OnlyOutputUpdatedColumns && !replicaConfig.EnableOldValue { + return cerror.ErrCodecInvalidConfig.GenWithStack( + `old value must be enabled when configuration "%s" is true.`, + codecOPTOnlyOutputUpdatedColumns, + ) + } + + if replicaConfig.Integrity != nil { + c.EnableRowChecksum = replicaConfig.Integrity.Enabled() + } + + c.OnlyHandleKeyColumns = !replicaConfig.EnableOldValue + + return nil +} + +func mergeConfig( + replicaConfig *config.ReplicaConfig, + urlParameters *urlConfig, +) (*urlConfig, error) { + dest := &urlConfig{} + if replicaConfig.Sink != nil { + dest.AvroSchemaRegistry = util.GetOrZero(replicaConfig.Sink.SchemaRegistry) + dest.OnlyOutputUpdatedColumns = replicaConfig.Sink.OnlyOutputUpdatedColumns + if replicaConfig.Sink.KafkaConfig != nil { + dest.MaxMessageBytes = replicaConfig.Sink.KafkaConfig.MaxMessageBytes + if replicaConfig.Sink.KafkaConfig.CodecConfig != nil { + codecConfig := replicaConfig.Sink.KafkaConfig.CodecConfig + dest.EnableTiDBExtension = codecConfig.EnableTiDBExtension + dest.MaxBatchSize = codecConfig.MaxBatchSize + dest.AvroEnableWatermark = codecConfig.AvroEnableWatermark + dest.AvroDecimalHandlingMode = codecConfig.AvroDecimalHandlingMode + dest.AvroBigintUnsignedHandlingMode = codecConfig.AvroBigintUnsignedHandlingMode + } + } + } + if err := mergo.Merge(dest, urlParameters, mergo.WithOverride); err != nil { + return nil, err + } + return dest, nil +} + +// WithMaxMessageBytes set the `maxMessageBytes` +func (c *Config) WithMaxMessageBytes(bytes int) *Config { + c.MaxMessageBytes = bytes + return c +} + +// Validate the Config +func (c *Config) Validate() error { + if c.EnableTiDBExtension && + !(c.Protocol == config.ProtocolCanalJSON || c.Protocol == config.ProtocolAvro) { + log.Warn("ignore invalid config, enable-tidb-extension"+ + "only supports canal-json/avro protocol", + zap.Bool("enableTidbExtension", c.EnableTiDBExtension), + zap.String("protocol", c.Protocol.String())) + } + + if c.Protocol == config.ProtocolAvro { + if c.AvroSchemaRegistry == "" { + return cerror.ErrCodecInvalidConfig.GenWithStack( + `Avro protocol requires parameter "%s"`, + codecOPTAvroSchemaRegistry, + ) + } + + if c.AvroDecimalHandlingMode != DecimalHandlingModePrecise && + c.AvroDecimalHandlingMode != DecimalHandlingModeString { + return cerror.ErrCodecInvalidConfig.GenWithStack( + `%s value could only be "%s" or "%s"`, + codecOPTAvroDecimalHandlingMode, + DecimalHandlingModeString, + DecimalHandlingModePrecise, + ) + } + + if c.AvroBigintUnsignedHandlingMode != BigintUnsignedHandlingModeLong && + c.AvroBigintUnsignedHandlingMode != BigintUnsignedHandlingModeString { + return cerror.ErrCodecInvalidConfig.GenWithStack( + `%s value could only be "%s" or "%s"`, + codecOPTAvroBigintUnsignedHandlingMode, + BigintUnsignedHandlingModeLong, + BigintUnsignedHandlingModeString, + ) + } + + if c.EnableRowChecksum { + if !(c.EnableTiDBExtension && c.AvroDecimalHandlingMode == DecimalHandlingModeString && + c.AvroBigintUnsignedHandlingMode == BigintUnsignedHandlingModeString) { + return cerror.ErrCodecInvalidConfig.GenWithStack( + `Avro protocol with row level checksum, + should set "%s" to "%s", and set "%s" to "%s" and "%s" to "%s"`, + codecOPTEnableTiDBExtension, "true", + codecOPTAvroDecimalHandlingMode, DecimalHandlingModeString, + codecOPTAvroBigintUnsignedHandlingMode, BigintUnsignedHandlingModeString) + } + } + } + + if c.MaxMessageBytes <= 0 { + return cerror.ErrCodecInvalidConfig.Wrap( + errors.Errorf("invalid max-message-bytes %d", c.MaxMessageBytes), + ) + } + + if c.MaxBatchSize <= 0 { + return cerror.ErrCodecInvalidConfig.Wrap( + errors.Errorf("invalid max-batch-size %d", c.MaxBatchSize), + ) + } + + return nil +} diff --git a/pkg/version/creator_version_gate.go b/pkg/version/creator_version_gate.go index a11ce3c3eac..43504f29143 100644 --- a/pkg/version/creator_version_gate.go +++ b/pkg/version/creator_version_gate.go @@ -78,7 +78,10 @@ func (g *CreatorVersionGate) ChangefeedAcceptUnknownProtocols() bool { return creatorVersion.LessThan(changefeedAcceptUnknownProtocolsVersion) } -var changefeedAcceptProtocolInMysqlSinURI = *semver.New("6.1.1") +var ( + changefeedAcceptProtocolInMysqlSinURI = *semver.New("6.1.1") + changefeedAdjustEnableOldValueByProtocol = *semver.New("7.2.0") +) // ChangefeedAcceptProtocolInMysqlSinURI determines whether to accept // protocol in mysql sink uri or configure based on the creator's version. @@ -92,3 +95,28 @@ func (g *CreatorVersionGate) ChangefeedAcceptProtocolInMysqlSinURI() bool { creatorVersion := semver.New(SanitizeVersion(g.version)) return creatorVersion.LessThan(changefeedAcceptProtocolInMysqlSinURI) } +<<<<<<< HEAD +======= + +// ChangefeedInheritSchedulerConfigFromV66 determines whether to inherit +// changefeed scheduler config created by v6.6.0. +func (g *CreatorVersionGate) ChangefeedInheritSchedulerConfigFromV66() bool { + if g.version == "" { + return false + } + + creatorVersion := semver.New(SanitizeVersion(g.version)) + return creatorVersion.Major == 6 && creatorVersion.Minor == 6 +} + +// ChangefeedAdjustEnableOldValueByProtocol determines whether to adjust +// the `enable-old-value` configuration by the using encoding protocol. +func (g *CreatorVersionGate) ChangefeedAdjustEnableOldValueByProtocol() bool { + if g.version == "" { + return true + } + + creatorVersion := semver.New(SanitizeVersion(g.version)) + return creatorVersion.LessThan(changefeedAdjustEnableOldValueByProtocol) +} +>>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) diff --git a/tests/integration_tests/multi_changefeed/run.sh b/tests/integration_tests/multi_changefeed/run.sh index f4522be3d63..72b60fff2b8 100755 --- a/tests/integration_tests/multi_changefeed/run.sh +++ b/tests/integration_tests/multi_changefeed/run.sh @@ -38,17 +38,8 @@ function check_old_value_enabled() { # When old value is turned on, the pre-column in our delete will include all the columns. # So here we have 1 (id) and 3 (val). delete_with_old_value_count=$(grep "BlackHoleSink: WriteEvents" "$1/cdc.log" | grep 'pre\-columns\\\":\[' | grep 'columns\\\":null' | grep 'value\\\":1' | grep -c 'value\\\":3') - if [[ "$delete_with_old_value_count" -ne 1 ]]; then - echo "can't found delete row with old value" - exit 1 - fi - - # check if exist a delete row without a complete `pre-column` - # When old value is turned off, the pre-column in our delete will only include the handle columns. - # So here we only have 1 (id). - delete_without_old_value_count=$(grep "BlackHoleSink: WriteEvents" "$1/cdc.log" | grep 'pre\-columns\\\":\[' | grep 'columns\\\":null' | grep -c 'value\\\":1,\\\"default\\\":null},null') - if [[ "$delete_without_old_value_count" -ne 1 ]]; then - echo "can't found delete row without old value" + if [[ "$delete_with_old_value_count" -ne 2 ]]; then + echo "can't found delete row with old value, not 2 found" exit 1 fi } From d6e22c69a6781ddad81588a09b5f17684c9febd8 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Sun, 4 Jun 2023 00:34:46 +0800 Subject: [PATCH 2/9] fix all conflicts the first time. --- cdc/api/v2/api_helpers.go | 23 -- cdc/entry/mounter.go | 66 +---- cdc/entry/mounter_group.go | 18 +- cdc/entry/mounter_test.go | 229 +----------------- cdc/model/changefeed.go | 28 --- cdc/model/changefeed_test.go | 103 -------- cdc/processor/processor.go | 7 - cdc/sink/codec/canal/canal_encoder.go | 18 +- .../codec/canal/canal_json_decoder_test.go | 6 +- cdc/sink/codec/canal/canal_json_encoder.go | 125 ++-------- .../codec/canal/canal_json_encoder_test.go | 94 +------ cdc/sink/codec/common/config.go | 4 + cdc/sink/codec/craft/craft_encoder.go | 20 +- cdc/sink/codec/maxwell/maxwell_encoder.go | 20 +- cdc/sink/codec/open/open_protocol_encoder.go | 27 +-- .../codec/open/open_protocol_message_test.go | 70 ------ pkg/cmd/cli/cli_changefeed_create.go | 19 -- pkg/config/replica_config.go | 6 +- pkg/config/replica_config_test.go | 23 +- pkg/config/sink.go | 90 ------- pkg/config/sink_test.go | 71 +----- pkg/version/creator_version_gate.go | 14 -- 22 files changed, 54 insertions(+), 1027 deletions(-) diff --git a/cdc/api/v2/api_helpers.go b/cdc/api/v2/api_helpers.go index 989fbe78665..21ca4a7c53d 100644 --- a/cdc/api/v2/api_helpers.go +++ b/cdc/api/v2/api_helpers.go @@ -190,29 +190,6 @@ func (APIV2HelpersImpl) verifyCreateChangefeedConfig( return nil, err } -<<<<<<< HEAD - protocol := sinkURIParsed.Query().Get(config.ProtocolKey) - if protocol != "" { - replicaCfg.Sink.Protocol = protocol - } - for _, fp := range config.ForceEnableOldValueProtocols { - if replicaCfg.Sink.Protocol == fp { - log.Warn( - "Attempting to replicate without old value enabled. "+ - "CDC will enable old value and continue.", - zap.String("protocol", replicaCfg.Sink.Protocol)) - replicaCfg.EnableOldValue = true - break - } - } - - if replicaCfg.ForceReplicate { - return nil, cerror.ErrOldValueNotEnabled.GenWithStackByArgs( - "if use force replicate, old value feature must be enabled") - } - } -======= ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) f, err := filter.NewFilter(replicaCfg, "") if err != nil { return nil, errors.Cause(err) diff --git a/cdc/entry/mounter.go b/cdc/entry/mounter.go index 6d8b1bcf92c..28edd19186c 100644 --- a/cdc/entry/mounter.go +++ b/cdc/entry/mounter.go @@ -82,11 +82,6 @@ func NewMounter(schemaStorage SchemaStorage, changefeedID model.ChangeFeedID, tz *time.Location, filter pfilter.Filter, -<<<<<<< HEAD - enableOldValue bool, -======= - integrity *integrity.Config, ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) ) Mounter { return &mounter{ schemaStorage: schemaStorage, @@ -273,13 +268,8 @@ func parseJob(v []byte, startTs, CRTs uint64) (*timodel.Job, error) { } func datum2Column( -<<<<<<< HEAD - tableInfo *model.TableInfo, datums map[int64]types.Datum, fillWithDefaultValue bool, -) ([]*model.Column, []types.Datum, []rowcodec.ColInfo, error) { -======= tableInfo *model.TableInfo, datums map[int64]types.Datum, -) ([]*model.Column, []types.Datum, []*timodel.ColumnInfo, []rowcodec.ColInfo, error) { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) +) ([]*model.Column, []types.Datum, []rowcodec.ColInfo, error) { cols := make([]*model.Column, len(tableInfo.RowColumnsOffset)) rawCols := make([]types.Datum, len(tableInfo.RowColumnsOffset)) @@ -295,18 +285,6 @@ func datum2Column( continue } colName := colInfo.Name.O -<<<<<<< HEAD - colDatums, exist := datums[colInfo.ID] - var colValue interface{} - if !exist && !fillWithDefaultValue { - log.Debug("column value is not found", - zap.String("table", tableInfo.Name.O), zap.String("column", colName)) - continue - } - var err error - var warn string - var size int -======= colID := colInfo.ID colDatums, exist := datums[colID] @@ -316,7 +294,6 @@ func datum2Column( warn string err error ) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) if exist { colValue, size, warn, err = formatColVal(colDatums, colInfo) } else { @@ -361,53 +338,16 @@ func (m *mounter) mountRowKVEntry(tableInfo *model.TableInfo, row *rowKVEntry, d if row.PreRowExist { // FIXME(leoppro): using pre table info to mounter pre column datum // the pre column and current column in one event may using different table info -<<<<<<< HEAD - preCols, preRawCols, extendColumnInfos, err = datum2Column(tableInfo, row.PreRow, m.enableOldValue) -======= - preCols, preRawCols, columnInfos, extendColumnInfos, err = datum2Column(tableInfo, row.PreRow) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) - if err != nil { - return nil, rawRow, errors.Trace(err) - } - -<<<<<<< HEAD - // NOTICE: When the old Value feature is off, - // the Delete event only needs to keep the handle key column. - if row.Delete && !m.enableOldValue { - for i := range preCols { - col := preCols[i] - if col != nil && !col.Flag.IsHandleKey() { - preCols[i] = nil - } - } -======= - preChecksum, checksumVersion, matched, err = m.verifyChecksum(columnInfos, preRawCols, true) + preCols, preRawCols, extendColumnInfos, err = datum2Column(tableInfo, row.PreRow) if err != nil { return nil, rawRow, errors.Trace(err) } - - if !matched { - log.Error("previous columns checksum mismatch", - zap.Uint32("checksum", preChecksum), - zap.Any("tableInfo", tableInfo), - zap.Any("row", row)) - if m.integrity.ErrorHandle() { - return nil, rawRow, cerror.ErrCorruptedDataMutation. - GenWithStackByArgs(m.changefeedID.Namespace, m.changefeedID.ID, row) - } - corrupted = true ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) - } } var cols []*model.Column var rawCols []types.Datum if row.RowExist { -<<<<<<< HEAD - cols, rawCols, extendColumnInfos, err = datum2Column(tableInfo, row.Row, true) -======= - cols, rawCols, columnInfos, extendColumnInfos, err = datum2Column(tableInfo, row.Row) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) + cols, rawCols, extendColumnInfos, err = datum2Column(tableInfo, row.Row) if err != nil { return nil, rawRow, errors.Trace(err) } diff --git a/cdc/entry/mounter_group.go b/cdc/entry/mounter_group.go index 1f4619c6cf3..41c21680f69 100644 --- a/cdc/entry/mounter_group.go +++ b/cdc/entry/mounter_group.go @@ -31,21 +31,11 @@ type MounterGroup interface { } type mounterGroup struct { -<<<<<<< HEAD - schemaStorage SchemaStorage - inputCh chan *model.PolymorphicEvent - tz *time.Location - filter filter.Filter - enableOldValue bool -======= schemaStorage SchemaStorage inputCh chan *model.PolymorphicEvent tz *time.Location filter filter.Filter - integrity *integrity.Config ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) - - workerNum int + workerNum int changefeedID model.ChangeFeedID } @@ -106,11 +96,7 @@ func (m *mounterGroup) Run(ctx context.Context) error { } func (m *mounterGroup) runWorker(ctx context.Context) error { -<<<<<<< HEAD - mounter := NewMounter(m.schemaStorage, m.changefeedID, m.tz, m.filter, m.enableOldValue) -======= - mounter := NewMounter(m.schemaStorage, m.changefeedID, m.tz, m.filter, m.integrity) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) + mounter := NewMounter(m.schemaStorage, m.changefeedID, m.tz, m.filter) for { select { case <-ctx.Done(): diff --git a/cdc/entry/mounter_test.go b/cdc/entry/mounter_test.go index c248fcde4aa..cee85e64e1f 100644 --- a/cdc/entry/mounter_test.go +++ b/cdc/entry/mounter_test.go @@ -308,13 +308,7 @@ func testMounterDisableOldValue(t *testing.T, tc struct { config := config.GetDefaultReplicaConfig() filter, err := filter.NewFilter(config, "") require.Nil(t, err) - mounter := NewMounter(scheamStorage, -<<<<<<< HEAD - model.DefaultChangeFeedID("c1"), - time.UTC, filter, false).(*mounter) -======= - model.DefaultChangeFeedID("c1"), time.UTC, filter, config.Integrity).(*mounter) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) + mounter := NewMounter(scheamStorage, model.DefaultChangeFeedID("c1"), time.UTC, filter).(*mounter) mounter.tz = time.Local ctx := context.Background() @@ -989,215 +983,6 @@ func TestGetDefaultZeroValue(t *testing.T) { } } -<<<<<<< HEAD -======= -func TestDecodeRowEnableChecksum(t *testing.T) { - helper := NewSchemaTestHelper(t) - defer helper.Close() - - tk := helper.Tk() - - tk.MustExec("set global tidb_enable_row_level_checksum = 1") - helper.Tk().MustExec("use test") - - replicaConfig := config.GetDefaultReplicaConfig() - replicaConfig.Integrity.IntegrityCheckLevel = integrity.CheckLevelCorrectness - filter, err := filter.NewFilter(replicaConfig, "") - require.NoError(t, err) - - ver, err := helper.Storage().CurrentVersion(oracle.GlobalTxnScope) - require.NoError(t, err) - - changefeed := model.DefaultChangeFeedID("changefeed-test-decode-row") - schemaStorage, err := NewSchemaStorage(helper.GetCurrentMeta(), - ver.Ver, false, changefeed, util.RoleTester, filter) - require.NoError(t, err) - require.NotNil(t, schemaStorage) - - createTableDDL := "create table t (id int primary key, a int)" - job := helper.DDL2Job(createTableDDL) - err = schemaStorage.HandleDDLJob(job) - require.NoError(t, err) - - ts := schemaStorage.GetLastSnapshot().CurrentTs() - schemaStorage.AdvanceResolvedTs(ver.Ver) - - mounter := NewMounter(schemaStorage, changefeed, time.Local, filter, replicaConfig.Integrity).(*mounter) - - ctx := context.Background() - - tableInfo, ok := schemaStorage.GetLastSnapshot().TableByName("test", "t") - require.True(t, ok) - - // row without checksum - tk.Session().GetSessionVars().EnableRowLevelChecksum = false - tk.MustExec("insert into t values (1, 10)") - - key, value := getLastKeyValueInStore(t, helper.Storage(), tableInfo.ID) - rawKV := &model.RawKVEntry{ - OpType: model.OpTypePut, - Key: key, - Value: value, - StartTs: ts - 1, - CRTs: ts + 1, - } - - row, err := mounter.unmarshalAndMountRowChanged(ctx, rawKV) - require.NoError(t, err) - require.NotNil(t, row) - // the upstream tidb does not enable checksum, so the checksum is nil - require.Nil(t, row.Checksum) - - // row with one checksum - tk.Session().GetSessionVars().EnableRowLevelChecksum = true - tk.MustExec("insert into t values (2, 20)") - - key, value = getLastKeyValueInStore(t, helper.Storage(), tableInfo.ID) - rawKV = &model.RawKVEntry{ - OpType: model.OpTypePut, - Key: key, - Value: value, - StartTs: ts - 1, - CRTs: ts + 1, - } - row, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) - require.NoError(t, err) - require.NotNil(t, row) - require.NotNil(t, row.Checksum) - - expected, ok := mounter.decoder.GetChecksum() - require.True(t, ok) - require.Equal(t, expected, row.Checksum.Current) - require.False(t, row.Checksum.Corrupted) - - // row with 2 checksum - tk.MustExec("insert into t values (3, 30)") - job = helper.DDL2Job("alter table t change column a a varchar(10)") - err = schemaStorage.HandleDDLJob(job) - require.NoError(t, err) - - key, value = getLastKeyValueInStore(t, helper.Storage(), tableInfo.ID) - rawKV = &model.RawKVEntry{ - OpType: model.OpTypePut, - Key: key, - Value: value, - StartTs: ts - 1, - CRTs: ts + 1, - } - row, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) - require.NoError(t, err) - require.NotNil(t, row) - require.NotNil(t, row.Checksum) - - first, ok := mounter.decoder.GetChecksum() - require.True(t, ok) - - extra, ok := mounter.decoder.GetExtraChecksum() - require.True(t, ok) - - if row.Checksum.Current != first { - require.Equal(t, extra, row.Checksum.Current) - } else { - require.Equal(t, first, row.Checksum.Current) - } - require.False(t, row.Checksum.Corrupted) - - // hack the table info to make the checksum corrupted - tableInfo.Columns[0].FieldType = *types.NewFieldType(mysql.TypeVarchar) - - // corrupt-handle-level default to warn, so no error, but the checksum is corrupted - row, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) - require.NoError(t, err) - require.NotNil(t, row.Checksum) - require.True(t, row.Checksum.Corrupted) - - mounter.integrity.CorruptionHandleLevel = integrity.CorruptionHandleLevelError - _, err = mounter.unmarshalAndMountRowChanged(ctx, rawKV) - require.Error(t, err) - require.ErrorIs(t, err, cerror.ErrCorruptedDataMutation) - - job = helper.DDL2Job("drop table t") - err = schemaStorage.HandleDDLJob(job) - require.NoError(t, err) -} - -func TestDecodeRow(t *testing.T) { - helper := NewSchemaTestHelper(t) - defer helper.Close() - - helper.Tk().MustExec("set @@tidb_enable_clustered_index=1;") - helper.Tk().MustExec("use test;") - - changefeed := model.DefaultChangeFeedID("changefeed-test-decode-row") - - ver, err := helper.Storage().CurrentVersion(oracle.GlobalTxnScope) - require.NoError(t, err) - - cfg := config.GetDefaultReplicaConfig() - - filter, err := filter.NewFilter(cfg, "") - require.NoError(t, err) - - schemaStorage, err := NewSchemaStorage(helper.GetCurrentMeta(), - ver.Ver, false, changefeed, util.RoleTester, filter) - require.NoError(t, err) - - // apply ddl to schemaStorage - ddl := "create table test.student(id int primary key, name char(50), age int, gender char(10))" - job := helper.DDL2Job(ddl) - err = schemaStorage.HandleDDLJob(job) - require.NoError(t, err) - - ts := schemaStorage.GetLastSnapshot().CurrentTs() - - schemaStorage.AdvanceResolvedTs(ver.Ver) - - mounter := NewMounter(schemaStorage, changefeed, time.Local, filter, cfg.Integrity).(*mounter) - - helper.Tk().MustExec(`insert into student values(1, "dongmen", 20, "male")`) - helper.Tk().MustExec(`update student set age = 27 where id = 1`) - - ctx := context.Background() - decodeAndCheckRowInTable := func(tableID int64, f func(key []byte, value []byte) *model.RawKVEntry) { - walkTableSpanInStore(t, helper.Storage(), tableID, func(key []byte, value []byte) { - rawKV := f(key, value) - - row, err := mounter.unmarshalAndMountRowChanged(ctx, rawKV) - require.NoError(t, err) - require.NotNil(t, row) - - if row.Columns != nil { - require.NotNil(t, mounter.decoder) - } - - if row.PreColumns != nil { - require.NotNil(t, mounter.preDecoder) - } - }) - } - - toRawKV := func(key []byte, value []byte) *model.RawKVEntry { - return &model.RawKVEntry{ - OpType: model.OpTypePut, - Key: key, - Value: value, - StartTs: ts - 1, - CRTs: ts + 1, - } - } - - tableInfo, ok := schemaStorage.GetLastSnapshot().TableByName("test", "student") - require.True(t, ok) - - decodeAndCheckRowInTable(tableInfo.ID, toRawKV) - decodeAndCheckRowInTable(tableInfo.ID, toRawKV) - - job = helper.DDL2Job("drop table student") - err = schemaStorage.HandleDDLJob(job) - require.NoError(t, err) -} - ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) // TestDecodeEventIgnoreRow tests a PolymorphicEvent.Row is nil // if this event should be filter out by filter. func TestDecodeEventIgnoreRow(t *testing.T) { @@ -1232,11 +1017,7 @@ func TestDecodeEventIgnoreRow(t *testing.T) { ts := schemaStorage.GetLastSnapshot().CurrentTs() schemaStorage.AdvanceResolvedTs(ver.Ver) -<<<<<<< HEAD - mounter := NewMounter(schemaStorage, cfID, time.Local, f, true).(*mounter) -======= - mounter := NewMounter(schemaStorage, cfID, time.Local, f, cfg.Integrity).(*mounter) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) + mounter := NewMounter(schemaStorage, cfID, time.Local, f).(*mounter) type testCase struct { schema string @@ -1413,11 +1194,7 @@ func TestBuildTableInfo(t *testing.T) { originTI, err := ddl.BuildTableInfoFromAST(stmt.(*ast.CreateTableStmt)) require.NoError(t, err) cdcTableInfo := model.WrapTableInfo(0, "test", 0, originTI) -<<<<<<< HEAD - cols, _, _, err := datum2Column(cdcTableInfo, map[int64]types.Datum{}, true) -======= - cols, _, _, _, err := datum2Column(cdcTableInfo, map[int64]types.Datum{}) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) + cols, _, _, err := datum2Column(cdcTableInfo, map[int64]types.Datum{}) require.NoError(t, err) recoveredTI := model.BuildTiDBTableInfo(cols, cdcTableInfo.IndexColumnsOffset) handle := sqlmodel.GetWhereHandle(recoveredTI, recoveredTI) diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index 4ca50664f40..27aa145bedd 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -279,20 +279,8 @@ func (info *ChangeFeedInfo) VerifyAndComplete() { if info.Config.Consistent == nil { info.Config.Consistent = defaultConfig.Consistent } -<<<<<<< HEAD return nil -======= - if info.Config.Scheduler == nil { - info.Config.Scheduler = defaultConfig.Scheduler - } - - if info.Config.Integrity == nil { - info.Config.Integrity = defaultConfig.Integrity - } - - info.RmUnusedFields() ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } // FixIncompatible fixes incompatible changefeed meta info. @@ -321,22 +309,6 @@ func (info *ChangeFeedInfo) FixIncompatible() { info.fixMemoryQuota() log.Info("Fix incompatible memory quota completed", zap.String("changefeed", info.String())) } -<<<<<<< HEAD -======= - - log.Info("Start fixing incompatible scheduler", zap.String("changefeed", info.String())) - inheritV66 := creatorVersionGate.ChangefeedInheritSchedulerConfigFromV66() - info.fixScheduler(inheritV66) - log.Info("Fix incompatible scheduler completed", zap.String("changefeed", info.String())) - - if creatorVersionGate.ChangefeedAdjustEnableOldValueByProtocol() { - log.Info("Start fixing incompatible enable old value", zap.String("changefeed", info.String()), - zap.Bool("enableOldValue", info.Config.EnableOldValue)) - info.fixEnableOldValue() - log.Info("Fix incompatible enable old value completed", zap.String("changefeed", info.String()), - zap.Bool("enableOldValue", info.Config.EnableOldValue)) - } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } // fixState attempts to fix state loss from upgrading the old owner to the new owner. diff --git a/cdc/model/changefeed_test.go b/cdc/model/changefeed_test.go index dcc2e4d2c4c..a2b2f5923ce 100644 --- a/cdc/model/changefeed_test.go +++ b/cdc/model/changefeed_test.go @@ -27,109 +27,6 @@ import ( "github.com/tikv/client-go/v2/oracle" ) -<<<<<<< HEAD -======= -func TestRmUnusedField(t *testing.T) { - t.Parallel() - const ( - defaultRegistry string = "default-schema-registry" - defaultProtocol string = "default-protocol" - ) - - // 1. mysql downstream - { - mysqlCf := &ChangeFeedInfo{ - SinkURI: "mysql://", - Config: &config.ReplicaConfig{ - Sink: &config.SinkConfig{ - SchemaRegistry: util.AddressOf(defaultRegistry), - Protocol: util.AddressOf(defaultProtocol), - CSVConfig: &config.CSVConfig{ - Quote: string(config.DoubleQuoteChar), - Delimiter: config.Comma, - NullString: config.NULL, - }, - }, - }, - } - - mysqlCf.VerifyAndComplete() - require.True(t, mysqlCf.Config.Sink.SchemaRegistry == nil) - require.True(t, mysqlCf.Config.Sink.Protocol == nil) - require.Nil(t, mysqlCf.Config.Sink.CSVConfig) - } - - // 2. storage downstream - { - strCf := &ChangeFeedInfo{ - SinkURI: "s3://", - Config: &config.ReplicaConfig{ - Sink: &config.SinkConfig{ - SchemaRegistry: util.AddressOf(defaultRegistry), - Protocol: util.AddressOf(defaultProtocol), - CSVConfig: &config.CSVConfig{ - Quote: string(config.DoubleQuoteChar), - Delimiter: config.Comma, - NullString: config.NULL, - }, - }, - }, - } - strCf.VerifyAndComplete() - require.True(t, strCf.Config.Sink.SchemaRegistry == nil) - require.NotNil(t, strCf.Config.Sink.CSVConfig) - } - - // 3. kafka downstream using avro - { - kaCf := &ChangeFeedInfo{ - SinkURI: "kafka://", - Config: &config.ReplicaConfig{ - Sink: &config.SinkConfig{ - Protocol: util.AddressOf(config.ProtocolAvro.String()), - SchemaRegistry: util.AddressOf(defaultRegistry), - CSVConfig: &config.CSVConfig{ - Quote: string(config.DoubleQuoteChar), - Delimiter: config.Comma, - NullString: config.NULL, - }, - }, - }, - } - kaCf.VerifyAndComplete() - require.Equal(t, defaultRegistry, util.GetOrZero(kaCf.Config.Sink.SchemaRegistry)) - require.Equal(t, config.ProtocolAvro.String(), util.GetOrZero(kaCf.Config.Sink.Protocol)) - require.Nil(t, kaCf.Config.Sink.CSVConfig) - } - - // 4. kafka downstream using canal-json - { - kcCf := &ChangeFeedInfo{ - SinkURI: "kafka://", - Config: &config.ReplicaConfig{ - Sink: &config.SinkConfig{ - Protocol: util.AddressOf(config.ProtocolCanal.String()), - SchemaRegistry: util.AddressOf(defaultRegistry), - CSVConfig: &config.CSVConfig{ - Quote: string(config.DoubleQuoteChar), - Delimiter: config.Comma, - NullString: config.NULL, - }, - }, - }, - } - kcCf.VerifyAndComplete() - require.True(t, kcCf.Config.Sink.SchemaRegistry == nil) - require.Equal( - t, - config.ProtocolCanal.String(), - util.GetOrZero(kcCf.Config.Sink.Protocol), - ) - require.Nil(t, kcCf.Config.Sink.CSVConfig) - } -} - ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) func TestFillV1(t *testing.T) { t.Parallel() diff --git a/cdc/processor/processor.go b/cdc/processor/processor.go index f0e448ab898..8289c0579f1 100644 --- a/cdc/processor/processor.go +++ b/cdc/processor/processor.go @@ -789,14 +789,7 @@ func (p *processor) lazyInitImpl(ctx cdcContext.Context) error { p.mg = entry.NewMounterGroup(p.schemaStorage, p.changefeed.Info.Config.Mounter.WorkerNum, -<<<<<<< HEAD - p.changefeed.Info.Config.EnableOldValue, p.filter, tz, p.changefeedID) -======= - p.filter, tz, p.changefeedID, p.changefeed.Info.Config.Integrity) - p.mg.name = "MounterGroup" - p.mg.spawn(stdCtx) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) p.wg.Add(1) go func() { diff --git a/cdc/sink/codec/canal/canal_encoder.go b/cdc/sink/codec/canal/canal_encoder.go index 32105b61bd4..8d3b28ea988 100644 --- a/cdc/sink/codec/canal/canal_encoder.go +++ b/cdc/sink/codec/canal/canal_encoder.go @@ -158,11 +158,7 @@ func (d *BatchEncoder) resetPacket() { } // newBatchEncoder creates a new canalBatchEncoder. -<<<<<<< HEAD:cdc/sink/codec/canal/canal_encoder.go -func newBatchEncoder() codec.EventBatchEncoder { -======= -func newBatchEncoder(config *common.Config) codec.RowEventEncoder { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_encoder.go +func newBatchEncoder(config *common.Config) codec.EventBatchEncoder { encoder := &BatchEncoder{ messages: &canal.Messages{}, callbackBuf: make([]func(), 0), @@ -180,23 +176,13 @@ type batchEncoderBuilder struct { } // Build a `canalBatchEncoder` -<<<<<<< HEAD:cdc/sink/codec/canal/canal_encoder.go func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { - return newBatchEncoder() -} - -// NewBatchEncoderBuilder creates a canal batchEncoderBuilder. -func NewBatchEncoderBuilder() codec.EncoderBuilder { - return &batchEncoderBuilder{} -======= -func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { return newBatchEncoder(b.config) } // NewBatchEncoderBuilder creates a canal batchEncoderBuilder. -func NewBatchEncoderBuilder(config *common.Config) codec.RowEventEncoderBuilder { +func NewBatchEncoderBuilder(config *common.Config) codec.EncoderBuilder { return &batchEncoderBuilder{ config: config, } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_encoder.go } diff --git a/cdc/sink/codec/canal/canal_json_decoder_test.go b/cdc/sink/codec/canal/canal_json_decoder_test.go index 9eaec071700..017170fb7e7 100644 --- a/cdc/sink/codec/canal/canal_json_decoder_test.go +++ b/cdc/sink/codec/canal/canal_json_decoder_test.go @@ -84,14 +84,10 @@ func TestNewCanalJSONBatchDecoder4RowMessage(t *testing.T) { func TestNewCanalJSONBatchDecoder4DDLMessage(t *testing.T) { t.Parallel() for _, encodeEnable := range []bool{false, true} { -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_decoder_test.go - encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: encodeEnable} -======= - encoder := &JSONRowEventEncoder{ + encoder := &JSONBatchEncoder{ builder: newCanalEntryBuilder(), config: &common.Config{EnableTiDBExtension: encodeEnable}, } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_decoder_test.go require.NotNil(t, encoder) result, err := encoder.EncodeDDLEvent(testCaseDDL) diff --git a/cdc/sink/codec/canal/canal_json_encoder.go b/cdc/sink/codec/canal/canal_json_encoder.go index 928202e6d35..0a1cbf50638 100644 --- a/cdc/sink/codec/canal/canal_json_encoder.go +++ b/cdc/sink/codec/canal/canal_json_encoder.go @@ -29,28 +29,31 @@ import ( "go.uber.org/zap" ) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go // JSONBatchEncoder encodes Canal json messages in JSON format type JSONBatchEncoder struct { - builder *canalEntryBuilder + builder *canalEntryBuilder + messages []*common.Message - // When it is true, canal-json would generate TiDB extension information - // which, at the moment, only includes `tidbWaterMarkType` and `_tidb` fields. - enableTiDBExtension bool - // the symbol separating two lines - terminator []byte - maxMessageBytes int - messages []*common.Message + config *common.Config +} + +// newJSONRowEventEncoder creates a new JSONRowEventEncoder +func newJSONRowEventEncoder(config *common.Config) codec.RowEventEncoder { + encoder := &JSONRowEventEncoder{ + builder: newCanalEntryBuilder(), + messages: make([]*common.Message, 0, 1), + + config: config, + } + return encoder } // newJSONBatchEncoder creates a new JSONBatchEncoder func newJSONBatchEncoder(config *common.Config) codec.EventBatchEncoder { encoder := &JSONBatchEncoder{ - builder: newCanalEntryBuilder(), - enableTiDBExtension: config.EnableTiDBExtension, - messages: make([]*common.Message, 0, 1), - terminator: []byte(config.Terminator), - maxMessageBytes: config.MaxMessageBytes, + builder: newCanalEntryBuilder(), + messages: make([]*common.Message, 0, 1), + config: config, } return encoder } @@ -59,22 +62,7 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt isDelete := e.IsDelete() mysqlTypeMap := make(map[string]string, len(e.Columns)) - filling := func(columns []*model.Column, out *jwriter.Writer) error { -======= -func newJSONMessageForDML( - builder *canalEntryBuilder, - e *model.RowChangedEvent, - config *common.Config, -) ([]byte, error) { - isDelete := e.IsDelete() - mysqlTypeMap := make(map[string]string, len(e.Columns)) - - filling := func(columns []*model.Column, out *jwriter.Writer, - onlyOutputUpdatedColumn bool, - onlyHandleKeyColumns bool, - newColumnMap map[string]*model.Column, - ) error { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go + filling := func(columns []*model.Column, out *jwriter.Writer, onlyHandleKeyColumns bool) error { if len(columns) == 0 { out.RawString("null") return nil @@ -84,16 +72,9 @@ func newJSONMessageForDML( isFirst := true for _, col := range columns { if col != nil { -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go -======= - // column equal, do not output it - if onlyOutputUpdatedColumn && shouldIgnoreColumn(col, newColumnMap) { - continue - } if onlyHandleKeyColumns && !col.Flag.IsHandleKey() { continue } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go if isFirst { isFirst = false } else { @@ -242,57 +223,29 @@ func newJSONMessageForDML( if e.IsDelete() { out.RawString(",\"old\":null") out.RawString(",\"data\":") -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go - if err := filling(e.PreColumns, out); err != nil { -======= - if err := filling(e.PreColumns, out, false, config.OnlyHandleKeyColumns, nil); err != nil { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go + if err := filling(e.PreColumns, out, config.OnlyHandleKeyColumns); err != nil { return nil, err } } else if e.IsInsert() { out.RawString(",\"old\":null") out.RawString(",\"data\":") -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go - if err := filling(e.Columns, out); err != nil { + if err := filling(e.Columns, out, false); err != nil { return nil, err } } else if e.IsUpdate() { out.RawString(",\"old\":") - if err := filling(e.PreColumns, out); err != nil { + if err := filling(e.PreColumns, out, false); err != nil { return nil, err } out.RawString(",\"data\":") if err := filling(e.Columns, out); err != nil { -======= - if err := filling(e.Columns, out, false, false, nil); err != nil { - return nil, err - } - } else if e.IsUpdate() { - var newColsMap map[string]*model.Column - if config.OnlyOutputUpdatedColumns { - newColsMap = make(map[string]*model.Column, len(e.Columns)) - for _, col := range e.Columns { - newColsMap[col.Name] = col - } - } - out.RawString(",\"old\":") - if err := filling(e.PreColumns, out, config.OnlyOutputUpdatedColumns, false, newColsMap); err != nil { - return nil, err - } - out.RawString(",\"data\":") - if err := filling(e.Columns, out, false, false, nil); err != nil { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go return nil, err } } else { log.Panic("unreachable event type", zap.Any("event", e)) } -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go - if c.enableTiDBExtension { -======= if config.EnableTiDBExtension { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go const prefix string = ",\"_tidb\":" out.RawString(prefix) out.RawByte('{') @@ -315,30 +268,7 @@ func eventTypeString(e *model.RowChangedEvent) string { return "UPDATE" } -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go func (c *JSONBatchEncoder) newJSONMessageForDDL(e *model.DDLEvent) canalJSONMessageInterface { -======= -// JSONRowEventEncoder encodes row event in JSON format -type JSONRowEventEncoder struct { - builder *canalEntryBuilder - messages []*common.Message - - config *common.Config -} - -// newJSONRowEventEncoder creates a new JSONRowEventEncoder -func newJSONRowEventEncoder(config *common.Config) codec.RowEventEncoder { - encoder := &JSONRowEventEncoder{ - builder: newCanalEntryBuilder(), - messages: make([]*common.Message, 0, 1), - - config: config, - } - return encoder -} - -func (c *JSONRowEventEncoder) newJSONMessageForDDL(e *model.DDLEvent) canalJSONMessageInterface { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go msg := &JSONMessage{ ID: 0, // ignored by both Canal Adapter and Flink Schema: e.TableInfo.TableName.Schema, @@ -373,15 +303,9 @@ func (c *JSONBatchEncoder) newJSONMessage4CheckpointEvent(ts uint64) *canalJSONM } } -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go // EncodeCheckpointEvent implements the EventBatchEncoder interface func (c *JSONBatchEncoder) EncodeCheckpointEvent(ts uint64) (*common.Message, error) { - if !c.enableTiDBExtension { -======= -// EncodeCheckpointEvent implements the RowEventEncoder interface -func (c *JSONRowEventEncoder) EncodeCheckpointEvent(ts uint64) (*common.Message, error) { if !c.config.EnableTiDBExtension { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go return nil, nil } @@ -400,17 +324,10 @@ func (c *JSONBatchEncoder) AppendRowChangedEvent( e *model.RowChangedEvent, callback func(), ) error { -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder.go value, err := c.newJSONMessageForDML(e) -======= - value, err := newJSONMessageForDML(c.builder, e, c.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder.go if err != nil { return errors.Trace(err) } - if len(c.terminator) > 0 { - value = append(value, c.terminator...) - } length := len(value) + common.MaxRecordOverhead // for single message that is longer than max-message-bytes, do not send it. diff --git a/cdc/sink/codec/canal/canal_json_encoder_test.go b/cdc/sink/codec/canal/canal_json_encoder_test.go index 5fa5b501673..871a09a91f4 100644 --- a/cdc/sink/codec/canal/canal_json_encoder_test.go +++ b/cdc/sink/codec/canal/canal_json_encoder_test.go @@ -33,17 +33,7 @@ func TestBuildJSONBatchEncoder(t *testing.T) { builder := &jsonBatchEncoderBuilder{config: cfg} encoder, ok := builder.Build().(*JSONBatchEncoder) require.True(t, ok) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - require.False(t, encoder.enableTiDBExtension) - - cfg.EnableTiDBExtension = true - builder = &jsonBatchEncoderBuilder{config: cfg} - encoder, ok = builder.Build().(*JSONBatchEncoder) - require.True(t, ok) - require.True(t, encoder.enableTiDBExtension) -======= require.NotNil(t, encoder.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go } func TestNewCanalJSONMessage4DML(t *testing.T) { @@ -57,11 +47,7 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { encoder, ok := e.(*JSONBatchEncoder) require.True(t, ok) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err := encoder.newJSONMessageForDML(testCaseInsert) -======= - data, err := newJSONMessageForDML(encoder.builder, testCaseInsert, encoder.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.Nil(t, err) var msg canalJSONMessageInterface = &JSONMessage{} err = json.Unmarshal(data, msg) @@ -110,12 +96,8 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.Equal(t, item.expectedEncodedValue, obtainedValue) } -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err = encoder.newJSONMessageForDML(testCaseUpdate) -======= - data, err = newJSONMessageForDML(encoder.builder, testCaseUpdate, encoder.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go - require.Nil(t, err) + require.NoError(t, err) jsonMsg = &JSONMessage{} err = json.Unmarshal(data, jsonMsg) require.Nil(t, err) @@ -123,9 +105,6 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.NotNil(t, jsonMsg.Old) require.Equal(t, "UPDATE", jsonMsg.EventType) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - data, err = encoder.newJSONMessageForDML(testCaseDelete) -======= for _, col := range testCaseUpdate.Columns { require.Contains(t, jsonMsg.Data[0], col.Name) require.Contains(t, jsonMsg.SQLType, col.Name) @@ -135,8 +114,7 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.Contains(t, jsonMsg.Old[0], col.Name) } - data, err = newJSONMessageForDML(encoder.builder, testCaseDelete, encoder.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go + data, err = encoder.newJSONMessageForDML(testCaseDelete) require.Nil(t, err) jsonMsg = &JSONMessage{} err = json.Unmarshal(data, jsonMsg) @@ -145,16 +123,11 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.Nil(t, jsonMsg.Old) require.Equal(t, "DELETE", jsonMsg.EventType) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - e = newJSONBatchEncoder(&common.Config{ - EnableTiDBExtension: true, - Terminator: "", -======= for _, col := range testCaseDelete.PreColumns { require.Contains(t, jsonMsg.Data[0], col.Name) } - data, err = newJSONMessageForDML(encoder.builder, testCaseDelete, &common.Config{OnlyHandleKeyColumns: true}) + data, err = encoder.newJSONMessageForDML(testCaseDelete) require.NoError(t, err) jsonMsg = &JSONMessage{} err = json.Unmarshal(data, jsonMsg) @@ -175,20 +148,14 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { } e = newJSONRowEventEncoder(&common.Config{ - EnableTiDBExtension: true, - Terminator: "", - OnlyOutputUpdatedColumns: true, ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go + EnableTiDBExtension: true, + Terminator: "", }) require.NotNil(t, e) encoder, ok = e.(*JSONBatchEncoder) require.True(t, ok) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go data, err = encoder.newJSONMessageForDML(testCaseUpdate) -======= - data, err = newJSONMessageForDML(encoder.builder, testCaseUpdate, encoder.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.Nil(t, err) withExtension := &canalJSONMessageWithTiDBExtension{} @@ -197,34 +164,13 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.NotNil(t, withExtension.Extensions) require.Equal(t, testCaseUpdate.CommitTs, withExtension.Extensions.CommitTs) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go -======= - - encoder, ok = e.(*JSONRowEventEncoder) - require.True(t, ok) - data, err = newJSONMessageForDML(encoder.builder, testCaseUpdate, encoder.config) - require.Nil(t, err) - - withExtension = &canalJSONMessageWithTiDBExtension{} - err = json.Unmarshal(data, withExtension) - require.Nil(t, err) - require.Equal(t, 0, len(withExtension.JSONMessage.Old[0])) - - require.NotNil(t, withExtension.Extensions) - require.Equal(t, testCaseUpdate.CommitTs, withExtension.Extensions.CommitTs) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go } func TestNewCanalJSONMessageFromDDL(t *testing.T) { t.Parallel() -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder()} - require.NotNil(t, encoder) -======= encoder, ok := newJSONRowEventEncoder(&common.Config{}).(*JSONRowEventEncoder) require.True(t, ok) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go message := encoder.newJSONMessageForDDL(testCaseDDL) require.NotNil(t, message) @@ -238,15 +184,10 @@ func TestNewCanalJSONMessageFromDDL(t *testing.T) { require.Equal(t, testCaseDDL.Query, msg.Query) require.Equal(t, "CREATE", msg.EventType) -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - encoder = &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: true} - require.NotNil(t, encoder) -======= encoder, ok = newJSONRowEventEncoder(&common.Config{ EnableTiDBExtension: true, }).(*JSONRowEventEncoder) require.True(t, ok) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go message = encoder.newJSONMessageForDDL(testCaseDDL) require.NotNil(t, message) @@ -297,18 +238,10 @@ func TestEncodeCheckpointEvent(t *testing.T) { t.Parallel() var watermark uint64 = 2333 for _, enable := range []bool{false, true} { -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: enable} -======= config := &common.Config{ EnableTiDBExtension: enable, } - encoder := &JSONRowEventEncoder{ - builder: newCanalEntryBuilder(), - config: config, - } - ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go + encoder := newJSONBatchEncoder(config).(*JSONBatchEncoder) require.NotNil(t, encoder) msg, err := encoder.EncodeCheckpointEvent(watermark) @@ -345,16 +278,7 @@ func TestEncodeCheckpointEvent(t *testing.T) { func TestCheckpointEventValueMarshal(t *testing.T) { t.Parallel() var watermark uint64 = 1024 -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - encoder := &JSONBatchEncoder{ - builder: newCanalEntryBuilder(), - enableTiDBExtension: true, -======= - encoder := &JSONRowEventEncoder{ - builder: newCanalEntryBuilder(), - config: &common.Config{EnableTiDBExtension: true}, ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go - } + encoder := newJSONBatchEncoder(&common.Config{EnableTiDBExtension: true}) require.NotNil(t, encoder) msg, err := encoder.EncodeCheckpointEvent(watermark) require.Nil(t, err) @@ -398,14 +322,10 @@ func TestCheckpointEventValueMarshal(t *testing.T) { func TestDDLEventWithExtensionValueMarshal(t *testing.T) { t.Parallel() -<<<<<<< HEAD:cdc/sink/codec/canal/canal_json_encoder_test.go - encoder := &JSONBatchEncoder{builder: newCanalEntryBuilder(), enableTiDBExtension: true} -======= encoder := &JSONRowEventEncoder{ builder: newCanalEntryBuilder(), config: &common.Config{EnableTiDBExtension: true}, } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/canal/canal_json_row_event_encoder_test.go require.NotNil(t, encoder) message := encoder.newJSONMessageForDDL(testCaseDDL) diff --git a/cdc/sink/codec/common/config.go b/cdc/sink/codec/common/config.go index cc3e0786d00..e5a1596ce0b 100644 --- a/cdc/sink/codec/common/config.go +++ b/cdc/sink/codec/common/config.go @@ -31,6 +31,8 @@ const defaultMaxBatchSize int = 16 type Config struct { Protocol config.Protocol + OnlyHandleKeyColumns bool + // control batch behavior, only for `open-protocol` and `craft` at the moment. MaxMessageBytes int MaxBatchSize int @@ -135,6 +137,8 @@ func (c *Config) Apply(sinkURI *url.URL, config *config.ReplicaConfig) error { } } + c.OnlyHandleKeyColumns = !config.EnableOldValue + return nil } diff --git a/cdc/sink/codec/craft/craft_encoder.go b/cdc/sink/codec/craft/craft_encoder.go index 86860ccce7c..ce6e99c17cd 100644 --- a/cdc/sink/codec/craft/craft_encoder.go +++ b/cdc/sink/codec/craft/craft_encoder.go @@ -96,11 +96,7 @@ func (e *BatchEncoder) flush() { } // NewBatchEncoder creates a new BatchEncoder. -<<<<<<< HEAD:cdc/sink/codec/craft/craft_encoder.go -func NewBatchEncoder() codec.EventBatchEncoder { -======= -func NewBatchEncoder(config *common.Config) codec.RowEventEncoder { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/craft/craft_encoder.go +func NewBatchEncoder(config *common.Config) codec.EventBatchEncoder { // 64 is a magic number that come up with these assumptions and manual benchmark. // 1. Most table will not have more than 64 columns // 2. It only worth allocating slices in batch for slices that's small enough @@ -112,16 +108,8 @@ type batchEncoderBuilder struct { } // Build a BatchEncoder -<<<<<<< HEAD:cdc/sink/codec/craft/craft_encoder.go func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { - encoder := NewBatchEncoder() - encoder.(*BatchEncoder).MaxMessageBytes = b.config.MaxMessageBytes - encoder.(*BatchEncoder).MaxBatchSize = b.config.MaxBatchSize - return encoder -======= -func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { return NewBatchEncoder(b.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/craft/craft_encoder.go } // NewBatchEncoderBuilder creates a craft batchEncoderBuilder. @@ -130,11 +118,7 @@ func NewBatchEncoderBuilder(config *common.Config) codec.EncoderBuilder { } // NewBatchEncoderWithAllocator creates a new BatchEncoder with given allocator. -<<<<<<< HEAD:cdc/sink/codec/craft/craft_encoder.go -func NewBatchEncoderWithAllocator(allocator *SliceAllocator) codec.EventBatchEncoder { -======= -func NewBatchEncoderWithAllocator(allocator *SliceAllocator, config *common.Config) codec.RowEventEncoder { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/craft/craft_encoder.go +func NewBatchEncoderWithAllocator(allocator *SliceAllocator, config *common.Config) codec.EventBatchEncoder { return &BatchEncoder{ allocator: allocator, messageBuf: make([]*common.Message, 0, 2), diff --git a/cdc/sink/codec/maxwell/maxwell_encoder.go b/cdc/sink/codec/maxwell/maxwell_encoder.go index b7954d70b06..05562867ec8 100644 --- a/cdc/sink/codec/maxwell/maxwell_encoder.go +++ b/cdc/sink/codec/maxwell/maxwell_encoder.go @@ -111,11 +111,7 @@ func (d *BatchEncoder) reset() { } // newBatchEncoder creates a new maxwell BatchEncoder. -<<<<<<< HEAD:cdc/sink/codec/maxwell/maxwell_encoder.go -func newBatchEncoder() codec.EventBatchEncoder { -======= -func newBatchEncoder(config *common.Config) codec.RowEventEncoder { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/maxwell/maxwell_encoder.go +func newBatchEncoder(config *common.Config) codec.EventBatchEncoder { batch := &BatchEncoder{ keyBuf: &bytes.Buffer{}, valueBuf: &bytes.Buffer{}, @@ -131,23 +127,13 @@ type batchEncoderBuilder struct { } // NewBatchEncoderBuilder creates a maxwell batchEncoderBuilder. -<<<<<<< HEAD:cdc/sink/codec/maxwell/maxwell_encoder.go -func NewBatchEncoderBuilder() codec.EncoderBuilder { - return &batchEncoderBuilder{} -} - -// Build a `maxwellBatchEncoder` -func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { - return newBatchEncoder() -======= -func NewBatchEncoderBuilder(config *common.Config) codec.RowEventEncoderBuilder { +func NewBatchEncoderBuilder(config *common.Config) codec.EncoderBuilder { return &batchEncoderBuilder{ config: config, } } // Build a `maxwellBatchEncoder` -func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { +func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { return newBatchEncoder(b.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/maxwell/maxwell_encoder.go } diff --git a/cdc/sink/codec/open/open_protocol_encoder.go b/cdc/sink/codec/open/open_protocol_encoder.go index 3a0aa7cc7b8..dac5b05b499 100644 --- a/cdc/sink/codec/open/open_protocol_encoder.go +++ b/cdc/sink/codec/open/open_protocol_encoder.go @@ -34,13 +34,7 @@ type BatchEncoder struct { callbackBuff []func() curBatchSize int -<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go - // configs - MaxMessageBytes int - MaxBatchSize int -======= config *common.Config ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go } // AppendRowChangedEvent implements the EventBatchEncoder interface @@ -55,11 +49,7 @@ func (d *BatchEncoder) AppendRowChangedEvent( if err != nil { return errors.Trace(err) } -<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go value, err := valueMsg.encode() -======= - value, err := valueMsg.encode(d.config.OnlyOutputUpdatedColumns) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go if err != nil { return errors.Trace(err) } @@ -197,17 +187,8 @@ type batchEncoderBuilder struct { } // Build a BatchEncoder -<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go func (b *batchEncoderBuilder) Build() codec.EventBatchEncoder { - encoder := NewBatchEncoder() - encoder.(*BatchEncoder).MaxMessageBytes = b.config.MaxMessageBytes - encoder.(*BatchEncoder).MaxBatchSize = b.config.MaxBatchSize - - return encoder -======= -func (b *batchEncoderBuilder) Build() codec.RowEventEncoder { return NewBatchEncoder(b.config) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go } // NewBatchEncoderBuilder creates an open-protocol batchEncoderBuilder. @@ -216,14 +197,8 @@ func NewBatchEncoderBuilder(config *common.Config) codec.EncoderBuilder { } // NewBatchEncoder creates a new BatchEncoder. -<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_encoder.go -func NewBatchEncoder() codec.EventBatchEncoder { - batch := &BatchEncoder{} - return batch -======= -func NewBatchEncoder(config *common.Config) codec.RowEventEncoder { +func NewBatchEncoder(config *common.Config) codec.EventBatchEncoder { return &BatchEncoder{ config: config, } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_encoder.go } diff --git a/cdc/sink/codec/open/open_protocol_message_test.go b/cdc/sink/codec/open/open_protocol_message_test.go index 4812bce2ebd..6b9b73bc20a 100644 --- a/cdc/sink/codec/open/open_protocol_message_test.go +++ b/cdc/sink/codec/open/open_protocol_message_test.go @@ -90,75 +90,6 @@ func TestVarBinaryCol(t *testing.T) { col2 := mqCol2.ToRowChangeColumn("test") require.Equal(t, col, col2) } -<<<<<<< HEAD:cdc/sink/codec/open/open_protocol_message_test.go -======= - -func TestOnlyOutputUpdatedColumn(t *testing.T) { - t.Parallel() - cases := []struct { - pre interface{} - updated interface{} - output bool - }{ - { - pre: []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}, - updated: []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}, - output: false, - }, - { - pre: uint64(1), - updated: uint64(1), - output: false, - }, - { - pre: nil, - updated: nil, - output: false, - }, - { - pre: float64(6.2), - updated: float32(6.2), - output: true, - }, - { - pre: uint64(1), - updated: int64(1), - output: true, - }, - { - pre: time.Time{}, - updated: time.Time{}, - output: false, - }, - { - pre: "time.Time{}", - updated: time.Time{}, - output: true, - }, - { - pre: "time.Time{}", - updated: "time.Time{}", - output: false, - }, - } - - for _, cs := range cases { - col := internal.Column{ - Value: cs.pre, - } - col2 := internal.Column{ - Value: cs.updated, - } - row := &messageRow{ - Update: map[string]internal.Column{"test": col2}, - PreColumns: map[string]internal.Column{"test": col}, - } - _, err := row.encode(true) - require.Nil(t, err) - _, ok := row.PreColumns["test"] - assert.Equal(t, cs.output, ok) - } -} func TestRowChanged2MsgOnlyHandleKeyColumns(t *testing.T) { t.Parallel() @@ -216,4 +147,3 @@ func TestRowChanged2MsgOnlyHandleKeyColumns(t *testing.T) { _, ok = value.Delete["a"] require.True(t, ok) } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)):pkg/sink/codec/open/open_protocol_message_test.go diff --git a/pkg/cmd/cli/cli_changefeed_create.go b/pkg/cmd/cli/cli_changefeed_create.go index 2e80fd2758d..a6aaac87325 100644 --- a/pkg/cmd/cli/cli_changefeed_create.go +++ b/pkg/cmd/cli/cli_changefeed_create.go @@ -155,28 +155,9 @@ func (o *createChangefeedOptions) completeReplicaCfg( return err } -<<<<<<< HEAD - protocol := sinkURIParsed.Query().Get(config.ProtocolKey) - if protocol != "" { - cfg.Sink.Protocol = protocol - } - for _, fp := range config.ForceEnableOldValueProtocols { - if cfg.Sink.Protocol == fp { - log.Warn("Attempting to replicate without old value enabled. CDC will enable old value and continue.", zap.String("protocol", cfg.Sink.Protocol)) - cfg.EnableOldValue = true - break - } - } - - if cfg.ForceReplicate { - log.Error("if use force replicate, old value feature must be enabled") - return cerror.ErrOldValueNotEnabled.GenWithStackByArgs() - } -======= err = cfg.AdjustEnableOldValueAndVerifyForceReplicate(uri) if err != nil { return err ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) } for _, rules := range cfg.Sink.DispatchRules { diff --git a/pkg/config/replica_config.go b/pkg/config/replica_config.go index abde6f49f63..e1741eb0d4d 100644 --- a/pkg/config/replica_config.go +++ b/pkg/config/replica_config.go @@ -17,6 +17,7 @@ import ( "encoding/json" "fmt" "net/url" + "strings" "time" "github.com/pingcap/errors" @@ -24,6 +25,7 @@ import ( "github.com/pingcap/tiflow/pkg/config/outdated" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/redo" + "github.com/pingcap/tiflow/pkg/sink" "go.uber.org/zap" ) @@ -259,10 +261,6 @@ func (c *ReplicaConfig) AdjustEnableOldValue(scheme, protocol string) { func (c *ReplicaConfig) AdjustEnableOldValueAndVerifyForceReplicate(sinkURI *url.URL) error { scheme := strings.ToLower(sinkURI.Scheme) protocol := sinkURI.Query().Get(ProtocolKey) - if protocol != "" { - c.Sink.Protocol = util.AddressOf(protocol) - } - protocol = util.GetOrZero(c.Sink.Protocol) c.AdjustEnableOldValue(scheme, protocol) if !c.ForceReplicate { diff --git a/pkg/config/replica_config_test.go b/pkg/config/replica_config_test.go index 59fd8225a18..4fc6319a5df 100644 --- a/pkg/config/replica_config_test.go +++ b/pkg/config/replica_config_test.go @@ -16,16 +16,11 @@ package config import ( "bytes" "encoding/json" + "net/url" "testing" "time" -<<<<<<< HEAD -======= - "github.com/aws/aws-sdk-go/aws" cerror "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/integrity" - "github.com/pingcap/tiflow/pkg/util" ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) "github.com/stretchr/testify/require" ) @@ -108,40 +103,26 @@ func TestReplicaConfigOutDated(t *testing.T) { func TestReplicaConfigValidate(t *testing.T) { t.Parallel() conf := GetDefaultReplicaConfig() -<<<<<<< HEAD - require.Nil(t, conf.ValidateAndAdjust(nil)) -======= sinkURL, err := url.Parse("blackhole://") require.NoError(t, err) require.NoError(t, conf.ValidateAndAdjust(sinkURL)) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) // Incorrect sink configuration. conf = GetDefaultReplicaConfig() conf.Sink.Protocol = "canal" conf.EnableOldValue = false -<<<<<<< HEAD - require.Regexp(t, ".*canal protocol requires old value to be enabled.*", - conf.ValidateAndAdjust(nil)) -======= err = conf.ValidateAndAdjust(sinkURL) require.NoError(t, err) require.True(t, conf.EnableOldValue) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) conf = GetDefaultReplicaConfig() conf.Sink.DispatchRules = []*DispatchRule{ {Matcher: []string{"a.b"}, DispatcherRule: "d1", PartitionRule: "r1"}, } -<<<<<<< HEAD - require.Regexp(t, ".*dispatcher and partition cannot be configured both.*", - conf.ValidateAndAdjust(nil)) -======= err = conf.ValidateAndAdjust(sinkURL) require.Regexp(t, ".*dispatcher and partition cannot be configured both.*", err) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) // Correct sink configuration. conf = GetDefaultReplicaConfig() @@ -150,7 +131,7 @@ func TestReplicaConfigValidate(t *testing.T) { {Matcher: []string{"a.c"}, PartitionRule: "p1"}, {Matcher: []string{"a.d"}}, } - err := conf.ValidateAndAdjust(nil) + err = conf.ValidateAndAdjust(nil) require.Nil(t, err) rules := conf.Sink.DispatchRules require.Equal(t, "d1", rules[0].PartitionRule) diff --git a/pkg/config/sink.go b/pkg/config/sink.go index 2df9d50d2af..7fbde76f538 100644 --- a/pkg/config/sink.go +++ b/pkg/config/sink.go @@ -249,101 +249,11 @@ type ColumnSelector struct { Columns []string `toml:"columns" json:"columns"` } -<<<<<<< HEAD -func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL, enableOldValue bool) error { -======= -// CodecConfig represents a MQ codec configuration -type CodecConfig struct { - EnableTiDBExtension *bool `toml:"enable-tidb-extension" json:"enable-tidb-extension,omitempty"` - MaxBatchSize *int `toml:"max-batch-size" json:"max-batch-size,omitempty"` - AvroEnableWatermark *bool `toml:"avro-enable-watermark" json:"avro-enable-watermark"` - AvroDecimalHandlingMode *string `toml:"avro-decimal-handling-mode" json:"avro-decimal-handling-mode,omitempty"` - AvroBigintUnsignedHandlingMode *string `toml:"avro-bigint-unsigned-handling-mode" json:"avro-bigint-unsigned-handling-mode,omitempty"` -} - -// KafkaConfig represents a kafka sink configuration -type KafkaConfig struct { - PartitionNum *int32 `toml:"partition-num" json:"partition-num,omitempty"` - ReplicationFactor *int16 `toml:"replication-factor" json:"replication-factor,omitempty"` - KafkaVersion *string `toml:"kafka-version" json:"kafka-version,omitempty"` - MaxMessageBytes *int `toml:"max-message-bytes" json:"max-message-bytes,omitempty"` - Compression *string `toml:"compression" json:"compression,omitempty"` - KafkaClientID *string `toml:"kafka-client-id" json:"kafka-client-id,omitempty"` - AutoCreateTopic *bool `toml:"auto-create-topic" json:"auto-create-topic,omitempty"` - DialTimeout *string `toml:"dial-timeout" json:"dial-timeout,omitempty"` - WriteTimeout *string `toml:"write-timeout" json:"write-timeout,omitempty"` - ReadTimeout *string `toml:"read-timeout" json:"read-timeout,omitempty"` - RequiredAcks *int `toml:"required-acks" json:"required-acks,omitempty"` - SASLUser *string `toml:"sasl-user" json:"sasl-user,omitempty"` - SASLPassword *string `toml:"sasl-password" json:"sasl-password,omitempty"` - SASLMechanism *string `toml:"sasl-mechanism" json:"sasl-mechanism,omitempty"` - SASLGssAPIAuthType *string `toml:"sasl-gssapi-auth-type" json:"sasl-gssapi-auth-type,omitempty"` - SASLGssAPIKeytabPath *string `toml:"sasl-gssapi-keytab-path" json:"sasl-gssapi-keytab-path,omitempty"` - SASLGssAPIKerberosConfigPath *string `toml:"sasl-gssapi-kerberos-config-path" json:"sasl-gssapi-kerberos-config-path,omitempty"` - SASLGssAPIServiceName *string `toml:"sasl-gssapi-service-name" json:"sasl-gssapi-service-name,omitempty"` - SASLGssAPIUser *string `toml:"sasl-gssapi-user" json:"sasl-gssapi-user,omitempty"` - SASLGssAPIPassword *string `toml:"sasl-gssapi-password" json:"sasl-gssapi-password,omitempty"` - SASLGssAPIRealm *string `toml:"sasl-gssapi-realm" json:"sasl-gssapi-realm,omitempty"` - SASLGssAPIDisablePafxfast *bool `toml:"sasl-gssapi-disable-pafxfast" json:"sasl-gssapi-disable-pafxfast,omitempty"` - SASLOAuthClientID *string `toml:"sasl-oauth-client-id" json:"sasl-oauth-client-id,omitempty"` - SASLOAuthClientSecret *string `toml:"sasl-oauth-client-secret" json:"sasl-oauth-client-secret,omitempty"` - SASLOAuthTokenURL *string `toml:"sasl-oauth-token-url" json:"sasl-oauth-token-url,omitempty"` - SASLOAuthScopes []string `toml:"sasl-oauth-scopes" json:"sasl-oauth-scopes,omitempty"` - SASLOAuthGrantType *string `toml:"sasl-oauth-grant-type" json:"sasl-oauth-grant-type,omitempty"` - SASLOAuthAudience *string `toml:"sasl-oauth-audience" json:"sasl-oauth-audience,omitempty"` - EnableTLS *bool `toml:"enable-tls" json:"enable-tls,omitempty"` - CA *string `toml:"ca" json:"ca,omitempty"` - Cert *string `toml:"cert" json:"cert,omitempty"` - Key *string `toml:"key" json:"key,omitempty"` - InsecureSkipVerify *bool `toml:"insecure-skip-verify" json:"insecure-skip-verify,omitempty"` - CodecConfig *CodecConfig `toml:"codec-config" json:"codec-config,omitempty"` -} - -// MySQLConfig represents a MySQL sink configuration -type MySQLConfig struct { - WorkerCount *int `toml:"worker-count" json:"worker-count,omitempty"` - MaxTxnRow *int `toml:"max-txn-row" json:"max-txn-row,omitempty"` - MaxMultiUpdateRowSize *int `toml:"max-multi-update-row-size" json:"max-multi-update-row-size,omitempty"` - MaxMultiUpdateRowCount *int `toml:"max-multi-update-row" json:"max-multi-update-row,omitempty"` - TiDBTxnMode *string `toml:"tidb-txn-mode" json:"tidb-txn-mode,omitempty"` - SSLCa *string `toml:"ssl-ca" json:"ssl-ca,omitempty"` - SSLCert *string `toml:"ssl-cert" json:"ssl-cert,omitempty"` - SSLKey *string `toml:"ssl-key" json:"ssl-key,omitempty"` - TimeZone *string `toml:"time-zone" json:"time-zone,omitempty"` - WriteTimeout *string `toml:"write-timeout" json:"write-timeout,omitempty"` - ReadTimeout *string `toml:"read-timeout" json:"read-timeout,omitempty"` - Timeout *string `toml:"timeout" json:"timeout,omitempty"` - EnableBatchDML *bool `toml:"enable-batch-dml" json:"enable-batch-dml,omitempty"` - EnableMultiStatement *bool `toml:"enable-multi-statement" json:"enable-multi-statement,omitempty"` - EnableCachePreparedStatement *bool `toml:"enable-cache-prepared-statement" json:"enable-cache-prepared-statement,omitempty"` -} - -// CloudStorageConfig represents a cloud storage sink configuration -type CloudStorageConfig struct { - WorkerCount *int `toml:"worker-count" json:"worker-count,omitempty"` - FlushInterval *string `toml:"flush-interval" json:"flush-interval,omitempty"` - FileSize *int `toml:"file-size" json:"file-size,omitempty"` -} - func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL) error { ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) if err := s.validateAndAdjustSinkURI(sinkURI); err != nil { return err } -<<<<<<< HEAD - if !enableOldValue { - for _, protocolStr := range ForceEnableOldValueProtocols { - if protocolStr == s.Protocol { - log.Error(fmt.Sprintf("Old value is not enabled when using `%s` protocol. "+ - "Please update changefeed config", s.Protocol)) - return cerror.WrapError(cerror.ErrKafkaInvalidConfig, - errors.New(fmt.Sprintf("%s protocol requires old value to be enabled", s.Protocol))) - } - } - } -======= ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) for _, rule := range s.DispatchRules { if rule.DispatcherRule != "" && rule.PartitionRule != "" { log.Error("dispatcher and partition cannot be configured both", zap.Any("rule", rule)) diff --git a/pkg/config/sink_test.go b/pkg/config/sink_test.go index 87ca87d47ca..27477a6a8d2 100644 --- a/pkg/config/sink_test.go +++ b/pkg/config/sink_test.go @@ -20,70 +20,6 @@ import ( "github.com/stretchr/testify/require" ) -<<<<<<< HEAD -func TestValidateOldValue(t *testing.T) { - t.Parallel() - testCases := []struct { - protocol string - enableOldValue bool - expectedErr string - }{ - { - protocol: "default", - enableOldValue: false, - expectedErr: "", - }, - { - protocol: "default", - enableOldValue: true, - expectedErr: "", - }, - { - protocol: "canal-json", - enableOldValue: false, - expectedErr: ".*canal-json protocol requires old value to be enabled.*", - }, - { - protocol: "canal-json", - enableOldValue: true, - expectedErr: "", - }, - { - protocol: "canal", - enableOldValue: false, - expectedErr: ".*canal protocol requires old value to be enabled.*", - }, - { - protocol: "canal", - enableOldValue: true, - expectedErr: "", - }, - { - protocol: "maxwell", - enableOldValue: false, - expectedErr: ".*maxwell protocol requires old value to be enabled.*", - }, - { - protocol: "maxwell", - enableOldValue: true, - expectedErr: "", - }, - } - - for _, tc := range testCases { - cfg := SinkConfig{ - Protocol: tc.protocol, - } - if tc.expectedErr == "" { - require.Nil(t, cfg.validateAndAdjust(nil, tc.enableOldValue)) - } else { - require.Regexp(t, tc.expectedErr, cfg.validateAndAdjust(nil, tc.enableOldValue)) - } - } -} - -======= ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) func TestValidateTxnAtomicity(t *testing.T) { t.Parallel() testCases := []struct { @@ -159,13 +95,8 @@ func TestValidateTxnAtomicity(t *testing.T) { parsedSinkURI, err := url.Parse(tc.sinkURI) require.Nil(t, err) if tc.expectedErr == "" { -<<<<<<< HEAD - require.Nil(t, cfg.validateAndAdjust(parsedSinkURI, true)) - require.Equal(t, tc.shouldSplitTxn, cfg.TxnAtomicity.ShouldSplitTxn()) -======= require.Nil(t, cfg.validateAndAdjust(parsedSinkURI)) - require.Equal(t, tc.shouldSplitTxn, util.GetOrZero(cfg.TxnAtomicity).ShouldSplitTxn()) ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) + require.Equal(t, tc.shouldSplitTxn, cfg.TxnAtomicity.ShouldSplitTxn()) } else { require.Regexp(t, tc.expectedErr, cfg.validateAndAdjust(parsedSinkURI)) } diff --git a/pkg/version/creator_version_gate.go b/pkg/version/creator_version_gate.go index 43504f29143..fc1324d1b05 100644 --- a/pkg/version/creator_version_gate.go +++ b/pkg/version/creator_version_gate.go @@ -95,19 +95,6 @@ func (g *CreatorVersionGate) ChangefeedAcceptProtocolInMysqlSinURI() bool { creatorVersion := semver.New(SanitizeVersion(g.version)) return creatorVersion.LessThan(changefeedAcceptProtocolInMysqlSinURI) } -<<<<<<< HEAD -======= - -// ChangefeedInheritSchedulerConfigFromV66 determines whether to inherit -// changefeed scheduler config created by v6.6.0. -func (g *CreatorVersionGate) ChangefeedInheritSchedulerConfigFromV66() bool { - if g.version == "" { - return false - } - - creatorVersion := semver.New(SanitizeVersion(g.version)) - return creatorVersion.Major == 6 && creatorVersion.Minor == 6 -} // ChangefeedAdjustEnableOldValueByProtocol determines whether to adjust // the `enable-old-value` configuration by the using encoding protocol. @@ -119,4 +106,3 @@ func (g *CreatorVersionGate) ChangefeedAdjustEnableOldValueByProtocol() bool { creatorVersion := semver.New(SanitizeVersion(g.version)) return creatorVersion.LessThan(changefeedAdjustEnableOldValueByProtocol) } ->>>>>>> 6537ab8fbc (config(ticdc): enable-old-value always false if using avro or csv as the encoding protocol (#9079)) From d1db8e0134686badbbb4d77590887f3f3bf3f35e Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Sun, 4 Jun 2023 00:43:49 +0800 Subject: [PATCH 3/9] fix make --- cdc/model/changefeed.go | 2 - cdc/sink/codec/canal/canal_json_encoder.go | 12 +- .../codec/maxwell/maxwell_encoder_test.go | 2 +- .../canal/canal_json_txn_event_encoder.go | 121 -------- .../canal_json_txn_event_encoder_test.go | 124 -------- pkg/sink/codec/common/config.go | 287 ------------------ 6 files changed, 7 insertions(+), 541 deletions(-) delete mode 100644 pkg/sink/codec/canal/canal_json_txn_event_encoder.go delete mode 100644 pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go delete mode 100644 pkg/sink/codec/common/config.go diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index 27aa145bedd..631c915698e 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -279,8 +279,6 @@ func (info *ChangeFeedInfo) VerifyAndComplete() { if info.Config.Consistent == nil { info.Config.Consistent = defaultConfig.Consistent } - - return nil } // FixIncompatible fixes incompatible changefeed meta info. diff --git a/cdc/sink/codec/canal/canal_json_encoder.go b/cdc/sink/codec/canal/canal_json_encoder.go index 0a1cbf50638..c8e6053bdba 100644 --- a/cdc/sink/codec/canal/canal_json_encoder.go +++ b/cdc/sink/codec/canal/canal_json_encoder.go @@ -38,8 +38,8 @@ type JSONBatchEncoder struct { } // newJSONRowEventEncoder creates a new JSONRowEventEncoder -func newJSONRowEventEncoder(config *common.Config) codec.RowEventEncoder { - encoder := &JSONRowEventEncoder{ +func newJSONRowEventEncoder(config *common.Config) codec.EventBatchEncoder { + encoder := &JSONBatchEncoder{ builder: newCanalEntryBuilder(), messages: make([]*common.Message, 0, 1), @@ -172,7 +172,7 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt emptyColumn := true for _, col := range columns { if col != nil { - if isDelete && config.OnlyHandleKeyColumns && !col.Flag.IsHandleKey() { + if isDelete && c.config.OnlyHandleKeyColumns && !col.Flag.IsHandleKey() { continue } if emptyColumn { @@ -223,7 +223,7 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt if e.IsDelete() { out.RawString(",\"old\":null") out.RawString(",\"data\":") - if err := filling(e.PreColumns, out, config.OnlyHandleKeyColumns); err != nil { + if err := filling(e.PreColumns, out, c.config.OnlyHandleKeyColumns); err != nil { return nil, err } } else if e.IsInsert() { @@ -238,14 +238,14 @@ func (c *JSONBatchEncoder) newJSONMessageForDML(e *model.RowChangedEvent) ([]byt return nil, err } out.RawString(",\"data\":") - if err := filling(e.Columns, out); err != nil { + if err := filling(e.Columns, out, false); err != nil { return nil, err } } else { log.Panic("unreachable event type", zap.Any("event", e)) } - if config.EnableTiDBExtension { + if c.config.EnableTiDBExtension { const prefix string = ",\"_tidb\":" out.RawString(prefix) out.RawByte('{') diff --git a/cdc/sink/codec/maxwell/maxwell_encoder_test.go b/cdc/sink/codec/maxwell/maxwell_encoder_test.go index d9703b8ce52..ff03fc1f711 100644 --- a/cdc/sink/codec/maxwell/maxwell_encoder_test.go +++ b/cdc/sink/codec/maxwell/maxwell_encoder_test.go @@ -20,7 +20,7 @@ import ( timodel "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tiflow/cdc/model" - "github.com/pingcap/tiflow/pkg/sink/codec/common" + "github.com/pingcap/tiflow/cdc/sink/codec/common" "github.com/stretchr/testify/require" ) diff --git a/pkg/sink/codec/canal/canal_json_txn_event_encoder.go b/pkg/sink/codec/canal/canal_json_txn_event_encoder.go deleted file mode 100644 index 67d1055c937..00000000000 --- a/pkg/sink/codec/canal/canal_json_txn_event_encoder.go +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2023 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package canal - -import ( - "bytes" - - "github.com/pingcap/errors" - "github.com/pingcap/log" - "github.com/pingcap/tiflow/cdc/model" - "github.com/pingcap/tiflow/pkg/config" - cerror "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/sink/codec" - "github.com/pingcap/tiflow/pkg/sink/codec/common" - "go.uber.org/zap" -) - -// JSONTxnEventEncoder encodes txn event in JSON format -type JSONTxnEventEncoder struct { - builder *canalEntryBuilder - - config *common.Config - - // the symbol separating two lines - terminator []byte - valueBuf *bytes.Buffer - batchSize int - callback func() - - // Store some fields of the txn event. - txnCommitTs uint64 - txnSchema *string - txnTable *string -} - -// AppendTxnEvent appends a txn event to the encoder. -func (j *JSONTxnEventEncoder) AppendTxnEvent( - txn *model.SingleTableTxn, - callback func(), -) error { - for _, row := range txn.Rows { - value, err := newJSONMessageForDML(j.builder, row, j.config) - if err != nil { - return errors.Trace(err) - } - length := len(value) + common.MaxRecordOverhead - // For single message that is longer than max-message-bytes, do not send it. - if length > j.config.MaxMessageBytes { - log.Warn("Single message is too large for canal-json", - zap.Int("maxMessageBytes", j.config.MaxMessageBytes), - zap.Int("length", length), - zap.Any("table", row.Table)) - return cerror.ErrMessageTooLarge.GenWithStackByArgs() - } - j.valueBuf.Write(value) - j.valueBuf.Write(j.terminator) - j.batchSize++ - } - j.callback = callback - j.txnCommitTs = txn.CommitTs - j.txnSchema = &txn.Table.Schema - j.txnTable = &txn.Table.Table - return nil -} - -// Build builds a message from the encoder and resets the encoder. -func (j *JSONTxnEventEncoder) Build() []*common.Message { - if j.batchSize == 0 { - return nil - } - - ret := common.NewMsg(config.ProtocolCanalJSON, nil, - j.valueBuf.Bytes(), j.txnCommitTs, model.MessageTypeRow, j.txnSchema, j.txnTable) - ret.SetRowsCount(j.batchSize) - ret.Callback = j.callback - j.valueBuf.Reset() - j.callback = nil - j.batchSize = 0 - j.txnCommitTs = 0 - j.txnSchema = nil - j.txnTable = nil - - return []*common.Message{ret} -} - -// newJSONTxnEventEncoder creates a new JSONTxnEventEncoder -func newJSONTxnEventEncoder(config *common.Config) codec.TxnEventEncoder { - encoder := &JSONTxnEventEncoder{ - builder: newCanalEntryBuilder(), - valueBuf: &bytes.Buffer{}, - terminator: []byte(config.Terminator), - - config: config, - } - return encoder -} - -type jsonTxnEventEncoderBuilder struct { - config *common.Config -} - -// NewJSONTxnEventEncoderBuilder creates a jsonTxnEventEncoderBuilder. -func NewJSONTxnEventEncoderBuilder(config *common.Config) codec.TxnEventEncoderBuilder { - return &jsonTxnEventEncoderBuilder{config: config} -} - -// Build a `jsonTxnEventEncoderBuilder` -func (b *jsonTxnEventEncoderBuilder) Build() codec.TxnEventEncoder { - return newJSONTxnEventEncoder(b.config) -} diff --git a/pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go b/pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go deleted file mode 100644 index 71db664d285..00000000000 --- a/pkg/sink/codec/canal/canal_json_txn_event_encoder_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2023 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package canal - -import ( - "testing" - - "github.com/pingcap/tidb/parser/mysql" - "github.com/pingcap/tiflow/cdc/model" - "github.com/pingcap/tiflow/pkg/config" - "github.com/pingcap/tiflow/pkg/sink/codec/common" - "github.com/stretchr/testify/require" -) - -func TestBuildCanalJSONTxnEventEncoder(t *testing.T) { - t.Parallel() - cfg := common.NewConfig(config.ProtocolCanalJSON) - - builder := NewJSONTxnEventEncoderBuilder(cfg) - encoder, ok := builder.Build().(*JSONTxnEventEncoder) - require.True(t, ok) - require.NotNil(t, encoder.config) -} - -func TestCanalJSONTxnEventEncoderMaxMessageBytes(t *testing.T) { - t.Parallel() - - // the size of `testEvent` after being encoded by canal-json is 200 - testEvent := &model.SingleTableTxn{ - Table: &model.TableName{Schema: "a", Table: "b"}, - Rows: []*model.RowChangedEvent{ - { - CommitTs: 1, - Table: &model.TableName{Schema: "a", Table: "b"}, - Columns: []*model.Column{{ - Name: "col1", - Type: mysql.TypeVarchar, - Value: []byte("aa"), - }}, - }, - }, - } - - // the test message length is smaller than max-message-bytes - maxMessageBytes := 300 - cfg := common.NewConfig(config.ProtocolCanalJSON).WithMaxMessageBytes(maxMessageBytes) - encoder := NewJSONTxnEventEncoderBuilder(cfg).Build() - err := encoder.AppendTxnEvent(testEvent, nil) - require.Nil(t, err) - - // the test message length is larger than max-message-bytes - cfg = cfg.WithMaxMessageBytes(100) - encoder = NewJSONTxnEventEncoderBuilder(cfg).Build() - err = encoder.AppendTxnEvent(testEvent, nil) - require.NotNil(t, err) -} - -func TestCanalJSONAppendTxnEventEncoderWithCallback(t *testing.T) { - t.Parallel() - - cfg := common.NewConfig(config.ProtocolCanalJSON) - encoder := NewJSONTxnEventEncoderBuilder(cfg).Build() - require.NotNil(t, encoder) - - count := 0 - - txn := &model.SingleTableTxn{ - Table: &model.TableName{Schema: "a", Table: "b"}, - Rows: []*model.RowChangedEvent{ - { - CommitTs: 1, - Table: &model.TableName{Schema: "a", Table: "b"}, - Columns: []*model.Column{{ - Name: "col1", - Type: mysql.TypeVarchar, - Value: []byte("aa"), - }}, - }, - { - CommitTs: 2, - Table: &model.TableName{Schema: "a", Table: "b"}, - Columns: []*model.Column{{ - Name: "col1", - Type: mysql.TypeVarchar, - Value: []byte("bb"), - }}, - }, - }, - } - - // Empty build makes sure that the callback build logic not broken. - msgs := encoder.Build() - require.Len(t, msgs, 0, "no message should be built and no panic") - - // Append the events. - callback := func() { - count++ - } - err := encoder.AppendTxnEvent(txn, callback) - require.Nil(t, err) - require.Equal(t, 0, count, "nothing should be called") - - msgs = encoder.Build() - require.Len(t, msgs, 1, "expected one message") - msgs[0].Callback() - require.Equal(t, 1, count, "expected one callback be called") - // Assert the build reset all the internal states. - require.Nil(t, encoder.(*JSONTxnEventEncoder).txnSchema) - require.Nil(t, encoder.(*JSONTxnEventEncoder).txnTable) - require.Nil(t, encoder.(*JSONTxnEventEncoder).callback) - require.Equal(t, 0, encoder.(*JSONTxnEventEncoder).batchSize) - require.Equal(t, 0, encoder.(*JSONTxnEventEncoder).valueBuf.Len()) -} diff --git a/pkg/sink/codec/common/config.go b/pkg/sink/codec/common/config.go deleted file mode 100644 index efe3ac64831..00000000000 --- a/pkg/sink/codec/common/config.go +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2022 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package common - -import ( - "net/http" - "net/url" - - "github.com/gin-gonic/gin/binding" - "github.com/imdario/mergo" - "github.com/pingcap/errors" - "github.com/pingcap/log" - "github.com/pingcap/tiflow/pkg/config" - cerror "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/util" - "go.uber.org/zap" -) - -// defaultMaxBatchSize sets the default value for max-batch-size -const defaultMaxBatchSize int = 16 - -// Config use to create the encoder -type Config struct { - Protocol config.Protocol - - // control batch behavior, only for `open-protocol` and `craft` at the moment. - MaxMessageBytes int - MaxBatchSize int - - // onlyHandleKeyColumns is true, for the delete event only output the handle key columns. - OnlyHandleKeyColumns bool - - EnableTiDBExtension bool - EnableRowChecksum bool - - // avro only - AvroSchemaRegistry string - AvroDecimalHandlingMode string - AvroBigintUnsignedHandlingMode string - - AvroEnableWatermark bool - - // for sinking to cloud storage - Delimiter string - Quote string - NullString string - IncludeCommitTs bool - Terminator string - - // for open protocol - OnlyOutputUpdatedColumns bool -} - -// NewConfig return a Config for codec -func NewConfig(protocol config.Protocol) *Config { - return &Config{ - Protocol: protocol, - - MaxMessageBytes: config.DefaultMaxMessageBytes, - MaxBatchSize: defaultMaxBatchSize, - - EnableTiDBExtension: false, - EnableRowChecksum: false, - - AvroSchemaRegistry: "", - AvroDecimalHandlingMode: "precise", - AvroBigintUnsignedHandlingMode: "long", - AvroEnableWatermark: false, - - OnlyOutputUpdatedColumns: false, - } -} - -const ( - codecOPTEnableTiDBExtension = "enable-tidb-extension" - codecOPTAvroDecimalHandlingMode = "avro-decimal-handling-mode" - codecOPTAvroBigintUnsignedHandlingMode = "avro-bigint-unsigned-handling-mode" - codecOPTAvroSchemaRegistry = "schema-registry" - - codecOPTOnlyOutputUpdatedColumns = "only-output-updated-columns" -) - -const ( - // DecimalHandlingModeString is the string mode for decimal handling - DecimalHandlingModeString = "string" - // DecimalHandlingModePrecise is the precise mode for decimal handling - DecimalHandlingModePrecise = "precise" - // BigintUnsignedHandlingModeString is the string mode for unsigned bigint handling - BigintUnsignedHandlingModeString = "string" - // BigintUnsignedHandlingModeLong is the long mode for unsigned bigint handling - BigintUnsignedHandlingModeLong = "long" -) - -type urlConfig struct { - EnableTiDBExtension *bool `form:"enable-tidb-extension"` - MaxBatchSize *int `form:"max-batch-size"` - MaxMessageBytes *int `form:"max-message-bytes"` - AvroDecimalHandlingMode *string `form:"avro-decimal-handling-mode"` - AvroBigintUnsignedHandlingMode *string `form:"avro-bigint-unsigned-handling-mode"` - - // AvroEnableWatermark is the option for enabling watermark in avro protocol - // only used for internal testing, do not set this in the production environment since the - // confluent official consumer cannot handle watermark. - AvroEnableWatermark *bool `form:"avro-enable-watermark"` - - AvroSchemaRegistry string `form:"schema-registry"` - OnlyOutputUpdatedColumns *bool `form:"only-output-updated-columns"` -} - -// Apply fill the Config -func (c *Config) Apply(sinkURI *url.URL, replicaConfig *config.ReplicaConfig) error { - req := &http.Request{URL: sinkURI} - var err error - urlParameter := &urlConfig{} - if err := binding.Query.Bind(req, urlParameter); err != nil { - return cerror.WrapError(cerror.ErrMySQLInvalidConfig, err) - } - if urlParameter, err = mergeConfig(replicaConfig, urlParameter); err != nil { - return err - } - - if urlParameter.EnableTiDBExtension != nil { - c.EnableTiDBExtension = *urlParameter.EnableTiDBExtension - } - - if urlParameter.MaxBatchSize != nil { - c.MaxBatchSize = *urlParameter.MaxBatchSize - } - - if urlParameter.MaxMessageBytes != nil { - c.MaxMessageBytes = *urlParameter.MaxMessageBytes - } - - if urlParameter.AvroDecimalHandlingMode != nil && - *urlParameter.AvroDecimalHandlingMode != "" { - c.AvroDecimalHandlingMode = *urlParameter.AvroDecimalHandlingMode - } - if urlParameter.AvroBigintUnsignedHandlingMode != nil && - *urlParameter.AvroBigintUnsignedHandlingMode != "" { - c.AvroBigintUnsignedHandlingMode = *urlParameter.AvroBigintUnsignedHandlingMode - } - if urlParameter.AvroEnableWatermark != nil { - if c.EnableTiDBExtension && c.Protocol == config.ProtocolAvro { - c.AvroEnableWatermark = *urlParameter.AvroEnableWatermark - } - } - - if urlParameter.AvroSchemaRegistry != "" { - c.AvroSchemaRegistry = urlParameter.AvroSchemaRegistry - } - - if replicaConfig.Sink != nil { - c.Terminator = util.GetOrZero(replicaConfig.Sink.Terminator) - if replicaConfig.Sink.CSVConfig != nil { - c.Delimiter = replicaConfig.Sink.CSVConfig.Delimiter - c.Quote = replicaConfig.Sink.CSVConfig.Quote - c.NullString = replicaConfig.Sink.CSVConfig.NullString - c.IncludeCommitTs = replicaConfig.Sink.CSVConfig.IncludeCommitTs - } - } - if urlParameter.OnlyOutputUpdatedColumns != nil { - c.OnlyOutputUpdatedColumns = *urlParameter.OnlyOutputUpdatedColumns - } - if c.OnlyOutputUpdatedColumns && !replicaConfig.EnableOldValue { - return cerror.ErrCodecInvalidConfig.GenWithStack( - `old value must be enabled when configuration "%s" is true.`, - codecOPTOnlyOutputUpdatedColumns, - ) - } - - if replicaConfig.Integrity != nil { - c.EnableRowChecksum = replicaConfig.Integrity.Enabled() - } - - c.OnlyHandleKeyColumns = !replicaConfig.EnableOldValue - - return nil -} - -func mergeConfig( - replicaConfig *config.ReplicaConfig, - urlParameters *urlConfig, -) (*urlConfig, error) { - dest := &urlConfig{} - if replicaConfig.Sink != nil { - dest.AvroSchemaRegistry = util.GetOrZero(replicaConfig.Sink.SchemaRegistry) - dest.OnlyOutputUpdatedColumns = replicaConfig.Sink.OnlyOutputUpdatedColumns - if replicaConfig.Sink.KafkaConfig != nil { - dest.MaxMessageBytes = replicaConfig.Sink.KafkaConfig.MaxMessageBytes - if replicaConfig.Sink.KafkaConfig.CodecConfig != nil { - codecConfig := replicaConfig.Sink.KafkaConfig.CodecConfig - dest.EnableTiDBExtension = codecConfig.EnableTiDBExtension - dest.MaxBatchSize = codecConfig.MaxBatchSize - dest.AvroEnableWatermark = codecConfig.AvroEnableWatermark - dest.AvroDecimalHandlingMode = codecConfig.AvroDecimalHandlingMode - dest.AvroBigintUnsignedHandlingMode = codecConfig.AvroBigintUnsignedHandlingMode - } - } - } - if err := mergo.Merge(dest, urlParameters, mergo.WithOverride); err != nil { - return nil, err - } - return dest, nil -} - -// WithMaxMessageBytes set the `maxMessageBytes` -func (c *Config) WithMaxMessageBytes(bytes int) *Config { - c.MaxMessageBytes = bytes - return c -} - -// Validate the Config -func (c *Config) Validate() error { - if c.EnableTiDBExtension && - !(c.Protocol == config.ProtocolCanalJSON || c.Protocol == config.ProtocolAvro) { - log.Warn("ignore invalid config, enable-tidb-extension"+ - "only supports canal-json/avro protocol", - zap.Bool("enableTidbExtension", c.EnableTiDBExtension), - zap.String("protocol", c.Protocol.String())) - } - - if c.Protocol == config.ProtocolAvro { - if c.AvroSchemaRegistry == "" { - return cerror.ErrCodecInvalidConfig.GenWithStack( - `Avro protocol requires parameter "%s"`, - codecOPTAvroSchemaRegistry, - ) - } - - if c.AvroDecimalHandlingMode != DecimalHandlingModePrecise && - c.AvroDecimalHandlingMode != DecimalHandlingModeString { - return cerror.ErrCodecInvalidConfig.GenWithStack( - `%s value could only be "%s" or "%s"`, - codecOPTAvroDecimalHandlingMode, - DecimalHandlingModeString, - DecimalHandlingModePrecise, - ) - } - - if c.AvroBigintUnsignedHandlingMode != BigintUnsignedHandlingModeLong && - c.AvroBigintUnsignedHandlingMode != BigintUnsignedHandlingModeString { - return cerror.ErrCodecInvalidConfig.GenWithStack( - `%s value could only be "%s" or "%s"`, - codecOPTAvroBigintUnsignedHandlingMode, - BigintUnsignedHandlingModeLong, - BigintUnsignedHandlingModeString, - ) - } - - if c.EnableRowChecksum { - if !(c.EnableTiDBExtension && c.AvroDecimalHandlingMode == DecimalHandlingModeString && - c.AvroBigintUnsignedHandlingMode == BigintUnsignedHandlingModeString) { - return cerror.ErrCodecInvalidConfig.GenWithStack( - `Avro protocol with row level checksum, - should set "%s" to "%s", and set "%s" to "%s" and "%s" to "%s"`, - codecOPTEnableTiDBExtension, "true", - codecOPTAvroDecimalHandlingMode, DecimalHandlingModeString, - codecOPTAvroBigintUnsignedHandlingMode, BigintUnsignedHandlingModeString) - } - } - } - - if c.MaxMessageBytes <= 0 { - return cerror.ErrCodecInvalidConfig.Wrap( - errors.Errorf("invalid max-message-bytes %d", c.MaxMessageBytes), - ) - } - - if c.MaxBatchSize <= 0 { - return cerror.ErrCodecInvalidConfig.Wrap( - errors.Errorf("invalid max-batch-size %d", c.MaxBatchSize), - ) - } - - return nil -} From 185f83c900da06c40668c7a6809601fed1c768d9 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Sun, 4 Jun 2023 01:28:00 +0800 Subject: [PATCH 4/9] fix code. --- cdc/model/changefeed.go | 8 ++++++++ cdc/sink/codec/canal/canal_json_encoder_test.go | 6 +++--- cdc/sink/mq/mq_test.go | 2 -- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index 631c915698e..ee3033edf57 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -307,6 +307,14 @@ func (info *ChangeFeedInfo) FixIncompatible() { info.fixMemoryQuota() log.Info("Fix incompatible memory quota completed", zap.String("changefeed", info.String())) } + + if creatorVersionGate.ChangefeedAdjustEnableOldValueByProtocol() { + log.Info("Start fixing incompatible enable old value", zap.String("changefeed", info.String()), + zap.Bool("enableOldValue", info.Config.EnableOldValue)) + info.fixEnableOldValue() + log.Info("Fix incompatible enable old value completed", zap.String("changefeed", info.String()), + zap.Bool("enableOldValue", info.Config.EnableOldValue)) + } } // fixState attempts to fix state loss from upgrading the old owner to the new owner. diff --git a/cdc/sink/codec/canal/canal_json_encoder_test.go b/cdc/sink/codec/canal/canal_json_encoder_test.go index 871a09a91f4..f4c60c9a33f 100644 --- a/cdc/sink/codec/canal/canal_json_encoder_test.go +++ b/cdc/sink/codec/canal/canal_json_encoder_test.go @@ -169,7 +169,7 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { func TestNewCanalJSONMessageFromDDL(t *testing.T) { t.Parallel() - encoder, ok := newJSONRowEventEncoder(&common.Config{}).(*JSONRowEventEncoder) + encoder, ok := newJSONRowEventEncoder(&common.Config{}).(*JSONBatchEncoder) require.True(t, ok) message := encoder.newJSONMessageForDDL(testCaseDDL) @@ -186,7 +186,7 @@ func TestNewCanalJSONMessageFromDDL(t *testing.T) { encoder, ok = newJSONRowEventEncoder(&common.Config{ EnableTiDBExtension: true, - }).(*JSONRowEventEncoder) + }).(*JSONBatchEncoder) require.True(t, ok) message = encoder.newJSONMessageForDDL(testCaseDDL) @@ -322,7 +322,7 @@ func TestCheckpointEventValueMarshal(t *testing.T) { func TestDDLEventWithExtensionValueMarshal(t *testing.T) { t.Parallel() - encoder := &JSONRowEventEncoder{ + encoder := &JSONBatchEncoder{ builder: newCanalEntryBuilder(), config: &common.Config{EnableTiDBExtension: true}, } diff --git a/cdc/sink/mq/mq_test.go b/cdc/sink/mq/mq_test.go index 422b33bf6d1..95713e7550c 100644 --- a/cdc/sink/mq/mq_test.go +++ b/cdc/sink/mq/mq_test.go @@ -94,8 +94,6 @@ func TestKafkaSink(t *testing.T) { encoder := sink.encoderBuilder.Build() require.IsType(t, &open.BatchEncoder{}, encoder) - require.Equal(t, 1, encoder.(*open.BatchEncoder).MaxBatchSize) - require.Equal(t, 1048576, encoder.(*open.BatchEncoder).MaxMessageBytes) // mock kafka broker processes 1 row changed event tableID := model.TableID(1) From 8601d8c4bcdc1f5585c52e5d9278f237bbe5a7c7 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Sun, 4 Jun 2023 01:29:42 +0800 Subject: [PATCH 5/9] remove set non primary key columns to nil in the split update event --- cdc/processor/pipeline/sink.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cdc/processor/pipeline/sink.go b/cdc/processor/pipeline/sink.go index b0db75213f2..1827e1ac6f1 100755 --- a/cdc/processor/pipeline/sink.go +++ b/cdc/processor/pipeline/sink.go @@ -303,15 +303,6 @@ func SplitUpdateEvent(updateEvent *model.PolymorphicEvent) (*model.PolymorphicEv deleteEvent.Row = &deleteEventRow deleteEvent.RawKV = &deleteEventRowKV - deleteEvent.Row.Columns = nil - for i := range deleteEvent.Row.PreColumns { - // NOTICE: Only the handle key pre column is retained in the delete event. - if deleteEvent.Row.PreColumns[i] != nil && - !deleteEvent.Row.PreColumns[i].Flag.IsHandleKey() { - deleteEvent.Row.PreColumns[i] = nil - } - } - insertEvent := *updateEvent insertEventRow := *updateEvent.Row insertEventRowKV := *updateEvent.RawKV From 6067db4ebe0c7e2f89f398b58c8f741f407701f7 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Sun, 4 Jun 2023 21:24:56 +0800 Subject: [PATCH 6/9] fix make check --- cdc/sink/codec/canal/canal_encoder_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdc/sink/codec/canal/canal_encoder_test.go b/cdc/sink/codec/canal/canal_encoder_test.go index 2fe8bc06372..b07cee9eeb0 100644 --- a/cdc/sink/codec/canal/canal_encoder_test.go +++ b/cdc/sink/codec/canal/canal_encoder_test.go @@ -20,8 +20,8 @@ import ( "github.com/golang/protobuf/proto" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/cdc/sink/codec/common" "github.com/pingcap/tiflow/pkg/config" - "github.com/pingcap/tiflow/pkg/sink/codec/common" canal "github.com/pingcap/tiflow/proto/canal" "github.com/stretchr/testify/require" ) From 7269f0677c63cc0c514d305934934671164f5cf7 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Sun, 4 Jun 2023 22:29:27 +0800 Subject: [PATCH 7/9] fix unit test --- cdc/processor/pipeline/sink.go | 1 + cdc/processor/pipeline/sink_test.go | 8 ------- .../codec/canal/canal_json_encoder_test.go | 16 ++++++++++++++ pkg/cmd/util/helper_test.go | 6 ++++- pkg/config/replica_config_test.go | 22 +++++++++++-------- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/cdc/processor/pipeline/sink.go b/cdc/processor/pipeline/sink.go index 1827e1ac6f1..2241583fdf9 100755 --- a/cdc/processor/pipeline/sink.go +++ b/cdc/processor/pipeline/sink.go @@ -302,6 +302,7 @@ func SplitUpdateEvent(updateEvent *model.PolymorphicEvent) (*model.PolymorphicEv deleteEventRowKV := *updateEvent.RawKV deleteEvent.Row = &deleteEventRow deleteEvent.RawKV = &deleteEventRowKV + deleteEvent.Row.Columns = nil insertEvent := *updateEvent insertEventRow := *updateEvent.Row diff --git a/cdc/processor/pipeline/sink_test.go b/cdc/processor/pipeline/sink_test.go index 592530c3589..911465c6f2f 100644 --- a/cdc/processor/pipeline/sink_test.go +++ b/cdc/processor/pipeline/sink_test.go @@ -584,14 +584,6 @@ func TestSplitUpdateEventWhenDisableOldValue(t *testing.T) { require.Len(t, sink.Received[deleteEventIndex].Row.PreColumns, 3) nilColIndex := 0 require.Nil(t, sink.Received[deleteEventIndex].Row.PreColumns[nilColIndex]) - nonHandleKeyColIndex := 1 - handleKeyColIndex := 2 - // NOTICE: When old value disabled, we only keep the handle key pre cols. - require.Nil(t, sink.Received[deleteEventIndex].Row.PreColumns[nonHandleKeyColIndex]) - require.Equal(t, "col2", sink.Received[deleteEventIndex].Row.PreColumns[handleKeyColIndex].Name) - require.True(t, - sink.Received[deleteEventIndex].Row.PreColumns[handleKeyColIndex].Flag.IsHandleKey(), - ) insertEventIndex := 1 require.Len(t, sink.Received[insertEventIndex].Row.Columns, 3) diff --git a/cdc/sink/codec/canal/canal_json_encoder_test.go b/cdc/sink/codec/canal/canal_json_encoder_test.go index f4c60c9a33f..90af3287849 100644 --- a/cdc/sink/codec/canal/canal_json_encoder_test.go +++ b/cdc/sink/codec/canal/canal_json_encoder_test.go @@ -135,6 +135,22 @@ func TestNewCanalJSONMessage4DML(t *testing.T) { require.NotNil(t, jsonMsg.Data) require.Nil(t, jsonMsg.Old) + for _, col := range testCaseDelete.PreColumns { + require.Contains(t, jsonMsg.Data[0], col.Name) + require.Contains(t, jsonMsg.SQLType, col.Name) + require.Contains(t, jsonMsg.MySQLType, col.Name) + } + + encoder, ok = newJSONBatchEncoder(&common.Config{OnlyHandleKeyColumns: true}).(*JSONBatchEncoder) + require.True(t, ok) + data, err = encoder.newJSONMessageForDML(testCaseDelete) + require.NoError(t, err) + jsonMsg = &JSONMessage{} + err = json.Unmarshal(data, jsonMsg) + require.NoError(t, err) + require.NotNil(t, jsonMsg.Data) + require.Nil(t, jsonMsg.Old) + for _, col := range testCaseDelete.PreColumns { if col.Flag.IsHandleKey() { require.Contains(t, jsonMsg.Data[0], col.Name) diff --git a/pkg/cmd/util/helper_test.go b/pkg/cmd/util/helper_test.go index d96ebe9b278..44adace0e5d 100644 --- a/pkg/cmd/util/helper_test.go +++ b/pkg/cmd/util/helper_test.go @@ -183,7 +183,11 @@ func TestAndWriteExampleReplicaTOML(t *testing.T) { require.Equal(t, &config.MounterConfig{ WorkerNum: 16, }, cfg.Mounter) - err = cfg.ValidateAndAdjust(nil) + + sinkURL, err := url.Parse("kafka://127.0.0.1:9092") + require.NoError(t, err) + + err = cfg.ValidateAndAdjust(sinkURL) require.Nil(t, err) require.Equal(t, &config.SinkConfig{ EncoderConcurrency: 16, diff --git a/pkg/config/replica_config_test.go b/pkg/config/replica_config_test.go index 4fc6319a5df..e48872493c3 100644 --- a/pkg/config/replica_config_test.go +++ b/pkg/config/replica_config_test.go @@ -104,7 +104,7 @@ func TestReplicaConfigValidate(t *testing.T) { t.Parallel() conf := GetDefaultReplicaConfig() - sinkURL, err := url.Parse("blackhole://") + sinkURL, err := url.Parse("blackhole://xxx?protocol=canal") require.NoError(t, err) require.NoError(t, conf.ValidateAndAdjust(sinkURL)) @@ -131,7 +131,7 @@ func TestReplicaConfigValidate(t *testing.T) { {Matcher: []string{"a.c"}, PartitionRule: "p1"}, {Matcher: []string{"a.d"}}, } - err = conf.ValidateAndAdjust(nil) + err = conf.ValidateAndAdjust(sinkURL) require.Nil(t, err) rules := conf.Sink.DispatchRules require.Equal(t, "d1", rules[0].PartitionRule) @@ -141,12 +141,12 @@ func TestReplicaConfigValidate(t *testing.T) { // Test memory quota can be adjusted conf = GetDefaultReplicaConfig() conf.MemoryQuota = 0 - err = conf.ValidateAndAdjust(nil) + err = conf.ValidateAndAdjust(sinkURL) require.NoError(t, err) require.Equal(t, uint64(DefaultChangefeedMemoryQuota), conf.MemoryQuota) conf.MemoryQuota = uint64(1024) - err = conf.ValidateAndAdjust(nil) + err = conf.ValidateAndAdjust(sinkURL) require.NoError(t, err) require.Equal(t, uint64(1024), conf.MemoryQuota) } @@ -154,20 +154,24 @@ func TestReplicaConfigValidate(t *testing.T) { func TestValidateAndAdjust(t *testing.T) { cfg := GetDefaultReplicaConfig() require.False(t, cfg.EnableSyncPoint) - require.NoError(t, cfg.ValidateAndAdjust(nil)) + + sinkURL, err := url.Parse("blackhole://") + require.NoError(t, err) + + require.NoError(t, cfg.ValidateAndAdjust(sinkURL)) cfg.EnableSyncPoint = true - require.NoError(t, cfg.ValidateAndAdjust(nil)) + require.NoError(t, cfg.ValidateAndAdjust(sinkURL)) cfg.SyncPointInterval = time.Second * 29 - require.Error(t, cfg.ValidateAndAdjust(nil)) + require.Error(t, cfg.ValidateAndAdjust(sinkURL)) cfg.SyncPointInterval = time.Second * 30 cfg.SyncPointRetention = time.Minute * 10 - require.Error(t, cfg.ValidateAndAdjust(nil)) + require.Error(t, cfg.ValidateAndAdjust(sinkURL)) cfg.Sink.EncoderConcurrency = -1 - require.Error(t, cfg.ValidateAndAdjust(nil)) + require.Error(t, cfg.ValidateAndAdjust(sinkURL)) } func TestAdjustEnableOldValueAndVerifyForceReplicate(t *testing.T) { From 6ede7ea34670e136539d7f10ac858046a41f4126 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Mon, 5 Jun 2023 11:25:07 +0800 Subject: [PATCH 8/9] set retry to 20 --- tests/integration_tests/bank/case.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/bank/case.go b/tests/integration_tests/bank/case.go index a88597330dc..c10cdabc19a 100644 --- a/tests/integration_tests/bank/case.go +++ b/tests/integration_tests/bank/case.go @@ -133,7 +133,9 @@ func (s *sequenceTest) prepare(ctx context.Context, db *sql.DB, accounts, tableI for j := 0; j < batchSize; j++ { args[j] = fmt.Sprintf("(%d, 0, 0, 0)", offset+j) } - return fmt.Sprintf("INSERT IGNORE INTO accounts_seq%d (id, counter, sequence, startts) VALUES %s", tableID, strings.Join(args, ",")) + sql := fmt.Sprintf("INSERT IGNORE INTO accounts_seq%d (id, counter, sequence, startts) VALUES %s", tableID, strings.Join(args, ",")) + log.Info("batch insert sql", zap.String("sql", sql)) + return sql } prepareImpl(ctx, s, createTable, batchInsertSQLF, db, accounts, tableID, concurrency) @@ -187,7 +189,7 @@ func (*sequenceTest) verify(ctx context.Context, db *sql.DB, accounts, tableID i } return nil - }, retry.WithBackoffMaxDelay(500), retry.WithBackoffMaxDelay(120*1000), retry.WithMaxTries(10), retry.WithIsRetryableErr(cerror.IsRetryableError)) + }, retry.WithBackoffMaxDelay(500), retry.WithBackoffMaxDelay(120*1000), retry.WithMaxTries(20), retry.WithIsRetryableErr(cerror.IsRetryableError)) } // tryDropDB will drop table if data incorrect and panic error likes bad connect. From 0bfcb666f70adc7873aa420cd37375bbd2d0dd30 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Mon, 5 Jun 2023 14:06:18 +0800 Subject: [PATCH 9/9] fix canal-json it --- cdc/sink/codec/canal/canal_json_encoder.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cdc/sink/codec/canal/canal_json_encoder.go b/cdc/sink/codec/canal/canal_json_encoder.go index c8e6053bdba..f730b813035 100644 --- a/cdc/sink/codec/canal/canal_json_encoder.go +++ b/cdc/sink/codec/canal/canal_json_encoder.go @@ -328,6 +328,9 @@ func (c *JSONBatchEncoder) AppendRowChangedEvent( if err != nil { return errors.Trace(err) } + if len(c.config.Terminator) > 0 { + value = append(value, c.config.Terminator...) + } length := len(value) + common.MaxRecordOverhead // for single message that is longer than max-message-bytes, do not send it.