Skip to content

Commit

Permalink
lightning: move invalid and dirty checkpoint's check in dataCheck. (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
3pointer authored Aug 19, 2021
1 parent 7ac5843 commit 56c943b
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 115 deletions.
37 changes: 37 additions & 0 deletions br/pkg/lightning/restore/check_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,43 @@ func (rc *Controller) CheckpointIsValid(ctx context.Context, tableInfo *mydump.M
return nil, false, nil
}

if tableCheckPoint.Status <= checkpoints.CheckpointStatusMaxInvalid {
failedStep := tableCheckPoint.Status * 10
var action strings.Builder
action.WriteString("./tidb-lightning-ctl --checkpoint-error-")
switch failedStep {
case checkpoints.CheckpointStatusAlteredAutoInc, checkpoints.CheckpointStatusAnalyzed:
action.WriteString("ignore")
default:
action.WriteString("destroy")
}
action.WriteString("='")
action.WriteString(uniqueName)
action.WriteString("' --config=...")

msgs = append(msgs, fmt.Sprintf("TiDB Lightning has failed last time. To prevent data loss, this run will stop now, "+
"%s failed in step(%s), please run command %s,"+
"You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch,"+
"For details of this failure, read the log file from the PREVIOUS run",
uniqueName, failedStep.MetricName(), action.String()))
return msgs, false, nil
}

dbInfo, ok := rc.dbInfos[tableInfo.DB]
if ok {
t, ok := dbInfo.Tables[tableInfo.Name]
if ok {
if tableCheckPoint.TableID > 0 && tableCheckPoint.TableID != t.ID {
msgs = append(msgs, fmt.Sprintf("TiDB Lightning has detected tables with illegal checkpoints. To prevent data loss, this run will stop now,"+
"please run command \"./tidb-lightning-ctl --checkpoint-remove='%s' --config=...\""+
"You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch,"+
"For details of this failure, read the log file from the PREVIOUS run",
uniqueName))
return msgs, false, nil
}
}
}

var permFromCheckpoint []int
var columns []string
for _, eng := range tableCheckPoint.Engines {
Expand Down
111 changes: 12 additions & 99 deletions br/pkg/lightning/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -734,16 +734,10 @@ func (rc *Controller) restoreSchema(ctx context.Context) error {
}
rc.dbInfos = dbInfos

if rc.cfg.App.CheckRequirements && rc.tidbGlue.OwnsSQLExecutor() {
if rc.tidbGlue.OwnsSQLExecutor() {
if err = rc.DataCheck(ctx); err != nil {
return errors.Trace(err)
}
// print check template only if check requirements is true.
fmt.Println(rc.checkTemplate.Output())
if !rc.checkTemplate.Success() {
return errors.Errorf("tidb-lightning pre-check failed." +
" Please fix the failed check(s) or set --check-requirements=false to skip checks")
}
}

// Load new checkpoints
Expand Down Expand Up @@ -1312,87 +1306,6 @@ func (rc *Controller) restoreTables(ctx context.Context) error {
}()
}

// first collect all tables where the checkpoint is invalid
allInvalidCheckpoints := make(map[string]checkpoints.CheckpointStatus)
// collect all tables whose checkpoint's tableID can't match current tableID
allDirtyCheckpoints := make(map[string]struct{})
for _, dbMeta := range rc.dbMetas {
dbInfo, ok := rc.dbInfos[dbMeta.Name]
if !ok {
return errors.Errorf("database %s not found in rc.dbInfos", dbMeta.Name)
}
for _, tableMeta := range dbMeta.Tables {
tableInfo, ok := dbInfo.Tables[tableMeta.Name]
if !ok {
return errors.Errorf("table info %s.%s not found", dbMeta.Name, tableMeta.Name)
}

tableName := common.UniqueTable(dbInfo.Name, tableInfo.Name)
cp, err := rc.checkpointsDB.Get(ctx, tableName)
if err != nil {
return errors.Trace(err)
}
if cp.Status <= checkpoints.CheckpointStatusMaxInvalid {
allInvalidCheckpoints[tableName] = cp.Status
} else if cp.TableID > 0 && cp.TableID != tableInfo.ID {
allDirtyCheckpoints[tableName] = struct{}{}
}
}
}

if len(allInvalidCheckpoints) != 0 {
logger := log.L()
logger.Error(
"TiDB Lightning has failed last time. To prevent data loss, this run will stop now. Please resolve errors first",
zap.Int("count", len(allInvalidCheckpoints)),
)

for tableName, status := range allInvalidCheckpoints {
failedStep := status * 10
var action strings.Builder
action.WriteString("./tidb-lightning-ctl --checkpoint-error-")
switch failedStep {
case checkpoints.CheckpointStatusAlteredAutoInc, checkpoints.CheckpointStatusAnalyzed:
action.WriteString("ignore")
default:
action.WriteString("destroy")
}
action.WriteString("='")
action.WriteString(tableName)
action.WriteString("' --config=...")

logger.Info("-",
zap.String("table", tableName),
zap.Uint8("status", uint8(status)),
zap.String("failedStep", failedStep.MetricName()),
zap.Stringer("recommendedAction", &action),
)
}

logger.Info("You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch")
logger.Info("For details of this failure, read the log file from the PREVIOUS run")

return errors.New("TiDB Lightning has failed last time; please resolve these errors first")
}
if len(allDirtyCheckpoints) > 0 {
logger := log.L()
logger.Error(
"TiDB Lightning has detected tables with illegal checkpoints. To prevent data mismatch, this run will stop now. Please remove these checkpoints first",
zap.Int("count", len(allDirtyCheckpoints)),
)

for tableName := range allDirtyCheckpoints {
logger.Info("-",
zap.String("table", tableName),
zap.String("recommendedAction", "./tidb-lightning-ctl --checkpoint-remove='"+tableName+"' --config=..."),
)
}

logger.Info("You may also run `./tidb-lightning-ctl --checkpoint-remove=all --config=...` to start from scratch")

return errors.New("TiDB Lightning has detected tables with illegal checkpoints; please remove these checkpoints first")
}

for _, dbMeta := range rc.dbMetas {
dbInfo := rc.dbInfos[dbMeta.Name]
for _, tableMeta := range dbMeta.Tables {
Expand Down Expand Up @@ -1811,10 +1724,11 @@ func (rc *Controller) preCheckRequirements(ctx context.Context) error {
}
}
}
if rc.cfg.App.CheckRequirements && rc.tidbGlue.OwnsSQLExecutor() {
// print check template only if check requirements is true.
if rc.tidbGlue.OwnsSQLExecutor() {
// print check info at any time.
fmt.Print(rc.checkTemplate.Output())
if !rc.checkTemplate.Success() {
if rc.cfg.App.CheckRequirements && !rc.checkTemplate.Success() {
// if check requirements is true, return error.
if !taskExist && rc.taskMgr != nil {
rc.taskMgr.CleanupTask(ctx)
}
Expand All @@ -1827,14 +1741,12 @@ func (rc *Controller) preCheckRequirements(ctx context.Context) error {

// DataCheck checks the data schema which needs #rc.restoreSchema finished.
func (rc *Controller) DataCheck(ctx context.Context) error {
if !rc.cfg.App.CheckRequirements {
log.L().Info("skip data check due to user requirement")
return nil
}
var err error
err = rc.HasLargeCSV(rc.dbMetas)
if err != nil {
return errors.Trace(err)
if rc.cfg.App.CheckRequirements {
err = rc.HasLargeCSV(rc.dbMetas)
if err != nil {
return errors.Trace(err)
}
}
checkPointCriticalMsgs := make([]string, 0, len(rc.dbMetas))
schemaCriticalMsgs := make([]string, 0, len(rc.dbMetas))
Expand All @@ -1852,7 +1764,8 @@ func (rc *Controller) DataCheck(ctx context.Context) error {
checkPointCriticalMsgs = append(checkPointCriticalMsgs, msgs...)
}
}
if noCheckpoint && rc.cfg.TikvImporter.Backend != config.BackendTiDB {

if rc.cfg.App.CheckRequirements && noCheckpoint && rc.cfg.TikvImporter.Backend != config.BackendTiDB {
if msgs, err = rc.SchemaIsValid(ctx, tableInfo); err != nil {
return errors.Trace(err)
}
Expand Down
20 changes: 12 additions & 8 deletions br/tests/lightning_checkpoint_dirty_tableid/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ run_sql 'DROP DATABASE IF EXISTS cpdt'

export GO_FAILPOINTS=""
set +e
run_lightning --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" --config "tests/$TEST_NAME/mysql.toml" -d "tests/$TEST_NAME/data"
# put stdout to log file for next grep
run_lightning --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" --config "tests/$TEST_NAME/mysql.toml" -d "tests/$TEST_NAME/data" >> "$TEST_DIR/lightning-checkpoint-dirty-tableid.log"
set -e

ILLEGAL_CP_COUNT=$(grep "TiDB Lightning has detected tables with illegal checkpoints. To prevent data mismatch, this run will stop now. Please remove these checkpoints first" "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)
TABLE_SUGGEST=$(grep "./tidb-lightning-ctl --checkpoint-remove=" "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)
# some msg will split into two lines when put them into chart.
ILLEGAL_CP_COUNT=$(grep "TiDB Lightning has detected tables with illegal checkpoints. To prevent data loss, this run will stop now." "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)
TABLE_SUGGEST=$(grep "checkpoint-remove=" "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)

[ $ILLEGAL_CP_COUNT -eq 1 ]
[ $TABLE_SUGGEST -eq 2 ]
[ $TABLE_SUGGEST -eq 1 ]

# Try again with the file checkpoints

Expand All @@ -50,11 +52,13 @@ run_sql 'DROP DATABASE IF EXISTS cpdt'

export GO_FAILPOINTS=""
set +e
run_lightning --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" --config "tests/$TEST_NAME/file.toml" -d "tests/$TEST_NAME/data"
# put stdout to log file for next grep
run_lightning --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" --config "tests/$TEST_NAME/file.toml" -d "tests/$TEST_NAME/data" >> "$TEST_DIR/lightning-checkpoint-dirty-tableid.log"
set -e

ILLEGAL_CP_COUNT=$(grep "TiDB Lightning has detected tables with illegal checkpoints. To prevent data mismatch, this run will stop now. Please remove these checkpoints first" "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)
TABLE_SUGGEST=$(grep "./tidb-lightning-ctl --checkpoint-remove=" "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)
# some msg will split into two lines when put them into chart.
ILLEGAL_CP_COUNT=$(grep "TiDB Lightning has detected tables with illegal checkpoints. To prevent data loss, this run will stop now." "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)
TABLE_SUGGEST=$(grep "checkpoint-remove=" "$TEST_DIR/lightning-checkpoint-dirty-tableid.log" | wc -l)

[ $ILLEGAL_CP_COUNT -eq 1 ]
[ $TABLE_SUGGEST -eq 2 ]
[ $TABLE_SUGGEST -eq 1 ]
18 changes: 10 additions & 8 deletions br/tests/lightning_error_summary/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,18 @@ grep -Fq '[-] [table=`error_summary`.`c`] [status=checksum] [error="checksum mis
# Now check the error log when the checkpoint is not cleaned.

set +e
run_lightning --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-error-summary.log"
# put stdout to log for next grep
run_lightning --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-error-summary.log" >> "$TEST_DIR/lightning-error-summary.log"
ERRORCODE=$?
set -e

[ "$ERRORCODE" -ne 0 ]

tail -20 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '["TiDB Lightning has failed last time. To prevent data loss, this run will stop now. Please resolve errors first"] [count=2]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '[-] [table=`error_summary`.`a`] [status=18] [failedStep=checksum] [recommendedAction="./tidb-lightning-ctl --checkpoint-error-destroy='"'"'`error_summary`.`a`'"'"' --config=..."]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '[-] [table=`error_summary`.`c`] [status=18] [failedStep=checksum] [recommendedAction="./tidb-lightning-ctl --checkpoint-error-destroy='"'"'`error_summary`.`c`'"'"' --config=..."]' "$TEST_DIR/lightning-error-summary.tail"
! grep -Fq '[-] [table=`error_summary`.`b`] [status=18] [failedStep=checksum]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '["You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch"]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '["For details of this failure, read the log file from the PREVIOUS run"]' "$TEST_DIR/lightning-error-summary.tail"
tail -100 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail"
grep -Fq 'TiDB Lightning has failed last time. To prevent data loss, this run will stop now' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq './tidb-lightning-ctl --checkpoint-error-destroy='"'"'`error_summary`.`a`'"'"' --config=...' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq './tidb-lightning-ctl --checkpoint-error-destroy='"'"'`error_summary`.`c`'"'"' --config=...' "$TEST_DIR/lightning-error-summary.tail"
! grep -Fq './tidb-lightning-ctl --checkpoint-error-destroy='"'"'`error_summary`.`b`'"'"' --config=...' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq 'checkpoint-error-destroy=all --config=...` to start from scratch' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq 'For details of this failure, read the log file' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq 'PREVIOUS run' "$TEST_DIR/lightning-error-summary.tail"

0 comments on commit 56c943b

Please sign in to comment.