From adf6de56a6953248113bcbd3a2c605830f03155d Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 15 Aug 2023 21:05:00 +0800 Subject: [PATCH] lightning: make OpLevelOptional suppress the error of DoChecksum (#45486) (#45866) close pingcap/tidb#45382 --- br/pkg/lightning/restore/BUILD.bazel | 2 ++ br/pkg/lightning/restore/table_restore.go | 36 ++++++++++++++++------- br/tests/lightning_routes/config.toml | 3 ++ br/tests/lightning_routes/run.sh | 5 ++++ 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/br/pkg/lightning/restore/BUILD.bazel b/br/pkg/lightning/restore/BUILD.bazel index 2b34396fe4b68..01bf6145d86d3 100644 --- a/br/pkg/lightning/restore/BUILD.bazel +++ b/br/pkg/lightning/restore/BUILD.bazel @@ -82,7 +82,9 @@ go_library( "@com_github_tikv_pd_client//:client", "@io_etcd_go_etcd_client_v3//:client", "@org_golang_google_grpc//:grpc", + "@org_golang_google_grpc//codes", "@org_golang_google_grpc//keepalive", + "@org_golang_google_grpc//status", "@org_golang_x_exp//maps", "@org_golang_x_exp//slices", "@org_golang_x_sync//errgroup", diff --git a/br/pkg/lightning/restore/table_restore.go b/br/pkg/lightning/restore/table_restore.go index fc925c56f58cf..9f442075d6327 100644 --- a/br/pkg/lightning/restore/table_restore.go +++ b/br/pkg/lightning/restore/table_restore.go @@ -44,6 +44,8 @@ import ( "go.uber.org/multierr" "go.uber.org/zap" "golang.org/x/exp/slices" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) type TableRestore struct { @@ -843,15 +845,26 @@ func (tr *TableRestore) postProcess( var remoteChecksum *RemoteChecksum remoteChecksum, err = DoChecksum(ctx, tr.tableInfo) + failpoint.Inject("checksum-error", func() { + tr.logger.Info("failpoint checksum-error injected.") + remoteChecksum = nil + err = status.Error(codes.Unknown, "Checksum meets error.") + }) if err != nil { - return false, err + if rc.cfg.PostRestore.Checksum != config.OpLevelOptional { + return false, err + } + tr.logger.Warn("do checksum failed, will skip this error and go on", log.ShortError(err)) + err = nil } - err = tr.compareChecksum(remoteChecksum, localChecksum) - // with post restore level 'optional', we will skip checksum error - if rc.cfg.PostRestore.Checksum == config.OpLevelOptional { - if err != nil { - tr.logger.Warn("compare checksum failed, will skip this error and go on", log.ShortError(err)) - err = nil + if remoteChecksum != nil { + err = tr.compareChecksum(remoteChecksum, localChecksum) + // with post restore level 'optional', we will skip checksum error + if rc.cfg.PostRestore.Checksum == config.OpLevelOptional { + if err != nil { + tr.logger.Warn("compare checksum failed, will skip this error and go on", log.ShortError(err)) + err = nil + } } } } else { @@ -893,11 +906,12 @@ func (tr *TableRestore) postProcess( case forcePostProcess || !rc.cfg.PostRestore.PostProcessAtLast: err := tr.analyzeTable(ctx, rc.tidbGlue.GetSQLExecutor()) // witch post restore level 'optional', we will skip analyze error - if rc.cfg.PostRestore.Analyze == config.OpLevelOptional { - if err != nil { - tr.logger.Warn("analyze table failed, will skip this error and go on", log.ShortError(err)) - err = nil + if err != nil { + if rc.cfg.PostRestore.Analyze != config.OpLevelOptional { + return false, err } + tr.logger.Warn("analyze table failed, will skip this error and go on", log.ShortError(err)) + err = nil } saveCpErr := rc.saveStatusCheckpoint(ctx, tr.tableName, checkpoints.WholeTableEngineID, err, checkpoints.CheckpointStatusAnalyzed) if err = firstErr(err, saveCpErr); err != nil { diff --git a/br/tests/lightning_routes/config.toml b/br/tests/lightning_routes/config.toml index bb54609dd03b1..74913091c5916 100644 --- a/br/tests/lightning_routes/config.toml +++ b/br/tests/lightning_routes/config.toml @@ -8,3 +8,6 @@ schema-pattern = "routes_a*" table-pattern = "t*" target-schema = "routes_b" target-table = "u" + +[post-restore] +checksum = "optional" diff --git a/br/tests/lightning_routes/run.sh b/br/tests/lightning_routes/run.sh index 1db0ce2035021..501d17924fdbe 100755 --- a/br/tests/lightning_routes/run.sh +++ b/br/tests/lightning_routes/run.sh @@ -4,12 +4,17 @@ set -eux +echo "testing checksum-error..." +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/lightning/restore/checksum-error=1*return()" + run_sql 'DROP DATABASE IF EXISTS routes_a0;' run_sql 'DROP DATABASE IF EXISTS routes_a1;' run_sql 'DROP DATABASE IF EXISTS routes_b;' run_lightning +echo "test checksum-error success!" + run_sql 'SELECT count(1), sum(x) FROM routes_b.u;' check_contains 'count(1): 4' check_contains 'sum(x): 259'