From d9ab4da5f110a6caba6147692de8cb8f3f749dc7 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Mon, 25 Nov 2024 21:57:14 +0100 Subject: [PATCH 01/13] plan --- br/pkg/backup/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 4cdb5673aea0d..0a91d3a3372ff 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1172,7 +1172,7 @@ func (bc *Client) fineGrainedBackup( for { select { case err := <-errCh: - // TODO: should we handle err here? + // TODO: (Ris)handle error here return errors.Trace(err) case resp, ok := <-respCh: if !ok { From 4b3535ad9a20d505a45e0510318439e2b66c0d8d Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 05:12:08 +0100 Subject: [PATCH 02/13] add retry limit --- br/pkg/backup/client.go | 34 +++++++++++++++++++-- br/tests/br_finegrained_disconnect/run.sh | 37 +++++++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 br/tests/br_finegrained_disconnect/run.sh diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 0a91d3a3372ff..c312b8d1ba48e 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1125,6 +1125,7 @@ func (bc *Client) fineGrainedBackup( }) bo := utils.AdaptTiKVBackoffer(ctx, backupFineGrainedMaxBackoff, berrors.ErrUnknown) + maxDisconnect := make(map[int]int) for { // Step1, check whether there is any incomplete range incomplete := pr.Res.GetIncompleteRange(req.StartKey, req.EndKey) @@ -1172,7 +1173,28 @@ func (bc *Client) fineGrainedBackup( for { select { case err := <-errCh: - // TODO: (Ris)handle error here + if berrors.Is(err, berrors.ErrFailedToConnect) { + storeID := 0 + if strings.Contains(err.Error(), "store") { + _, scanErr := fmt.Sscanf(err.Error(), "failed to connect to store %d", &storeID) + if scanErr != nil { + log.Warn("failed to parse store ID from error message", zap.Error(scanErr)) + } + } + + if _, ok := maxDisconnect[storeID]; !ok { + maxDisconnect[storeID] = 0 + } else { + maxDisconnect[storeID]++ + } + + if maxDisconnect[storeID] > 3 { + return errors.Annotatef(err, "failed to connect to store %d for 3 times", storeID) + } else { + break + } + } + return errors.Trace(err) case resp, ok := <-respCh: if !ok { @@ -1274,12 +1296,18 @@ func (bc *Client) handleFineGrained( storeID := targetPeer.GetStoreId() lockResolver := bc.mgr.GetLockResolver() client, err := bc.mgr.GetBackupClient(ctx, storeID) + + failpoint.Inject("connect-error", func(v failpoint.Value) { + // create a berrors.ErrFailedToConnect + err = berrors.ErrFailedToConnect + }) + if err != nil { if berrors.Is(err, berrors.ErrFailedToConnect) { // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, nil + return 20000, errors.Annotatef(err, "failed to connect to store %d", storeID) } logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID)) @@ -1318,7 +1346,7 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, nil + return 20000, errors.Annotatef(err, "failed to connect to store %d", storeID) } logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err)) return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)", diff --git a/br/tests/br_finegrained_disconnect/run.sh b/br/tests/br_finegrained_disconnect/run.sh new file mode 100644 index 0000000000000..9a1d0bd59cd0e --- /dev/null +++ b/br/tests/br_finegrained_disconnect/run.sh @@ -0,0 +1,37 @@ +#! /bin/bash + +set -eux + +. run_services + +. br_tikv_outage_util + +load + +hint_finegrained=$TEST_DIR/hint_finegrained +hint_backup_start=$TEST_DIR/hint_backup_start +hint_get_backup_client=$TEST_DIR/hint_get_backup_client + +cases=${cases:-'shutdown'} + +for failure in $cases; do + rm -f "$hint_finegrained" "$hint_backup_start" "$hint_get_backup_client" + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/hint-backup-start=1*return(\"$hint_backup_start\");\ +github.com/pingcap/tidb/br/pkg/backup/hint-fine-grained-backup=1*return(\"$hint_finegrained\");\ +github.com/pingcap/tidb/br/pkg/utils/hint-get-backup-client=1*return(\"$hint_get_backup_client\")" + + backup_dir=${TEST_DIR:?}/"backup{test:${TEST_NAME}|with:${failure}}" + rm -rf "${backup_dir:?}" + # Add ratelimit for backup task, otherwise the backup task will finishes too quickly. + run_br backup full -s local://"$backup_dir" --concurrency 1 --ratelimit 3 & + backup_pid=$! + single_point_fault $failure + wait $backup_pid + + # case 'shutdown' need to restart services + stop_services + start_services + + + check +done \ No newline at end of file From b2168bf443df46f64aee621a02861eace2b1112f Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 05:25:02 +0100 Subject: [PATCH 03/13] no need for integration test --- br/tests/br_finegrained_disconnect/run.sh | 37 ----------------------- 1 file changed, 37 deletions(-) delete mode 100644 br/tests/br_finegrained_disconnect/run.sh diff --git a/br/tests/br_finegrained_disconnect/run.sh b/br/tests/br_finegrained_disconnect/run.sh deleted file mode 100644 index 9a1d0bd59cd0e..0000000000000 --- a/br/tests/br_finegrained_disconnect/run.sh +++ /dev/null @@ -1,37 +0,0 @@ -#! /bin/bash - -set -eux - -. run_services - -. br_tikv_outage_util - -load - -hint_finegrained=$TEST_DIR/hint_finegrained -hint_backup_start=$TEST_DIR/hint_backup_start -hint_get_backup_client=$TEST_DIR/hint_get_backup_client - -cases=${cases:-'shutdown'} - -for failure in $cases; do - rm -f "$hint_finegrained" "$hint_backup_start" "$hint_get_backup_client" - export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/hint-backup-start=1*return(\"$hint_backup_start\");\ -github.com/pingcap/tidb/br/pkg/backup/hint-fine-grained-backup=1*return(\"$hint_finegrained\");\ -github.com/pingcap/tidb/br/pkg/utils/hint-get-backup-client=1*return(\"$hint_get_backup_client\")" - - backup_dir=${TEST_DIR:?}/"backup{test:${TEST_NAME}|with:${failure}}" - rm -rf "${backup_dir:?}" - # Add ratelimit for backup task, otherwise the backup task will finishes too quickly. - run_br backup full -s local://"$backup_dir" --concurrency 1 --ratelimit 3 & - backup_pid=$! - single_point_fault $failure - wait $backup_pid - - # case 'shutdown' need to restart services - stop_services - start_services - - - check -done \ No newline at end of file From 99720dc593d2547e646dc9a154fc97f03f6a11c6 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 08:35:07 +0100 Subject: [PATCH 04/13] less message --- br/pkg/backup/client.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index c312b8d1ba48e..bc81d0042bc4d 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1175,10 +1175,10 @@ func (bc *Client) fineGrainedBackup( case err := <-errCh: if berrors.Is(err, berrors.ErrFailedToConnect) { storeID := 0 - if strings.Contains(err.Error(), "store") { + if strings.Contains(err.Error(), "failed to connect to store") { _, scanErr := fmt.Sscanf(err.Error(), "failed to connect to store %d", &storeID) if scanErr != nil { - log.Warn("failed to parse store ID from error message", zap.Error(scanErr)) + log.Warn("failed to parse store ID", zap.Error(scanErr)) } } @@ -1297,17 +1297,12 @@ func (bc *Client) handleFineGrained( lockResolver := bc.mgr.GetLockResolver() client, err := bc.mgr.GetBackupClient(ctx, storeID) - failpoint.Inject("connect-error", func(v failpoint.Value) { - // create a berrors.ErrFailedToConnect - err = berrors.ErrFailedToConnect - }) - if err != nil { if berrors.Is(err, berrors.ErrFailedToConnect) { // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, errors.Annotatef(err, "failed to connect to store %d", storeID) + return 20000, errors.WithMessage(err, fmt.Sprintf("failed to connect to store %d", storeID)) } logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID)) @@ -1346,7 +1341,7 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, errors.Annotatef(err, "failed to connect to store %d", storeID) + return 20000, errors.WithMessage(err, fmt.Sprintf("failed to connect to store %d", storeID)) } logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err)) return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)", From 04888afcce332fe988387c70019e59bddcf9cf0a Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:24:59 +0100 Subject: [PATCH 05/13] normalize --- br/pkg/backup/client.go | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index bc81d0042bc4d..fa435f0462c5d 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -77,6 +77,20 @@ type Checksum struct { // ProgressUnit represents the unit of progress. type ProgressUnit string +type StoreBasedErr struct{ + storeID uint64 + message string + err error +} + +func (e *StoreBasedErr) Error() string { + return fmt.Sprintf("Store ID '%d': %v", e.storeID, e.err.Error()) +} + +func (e *StoreBasedErr) Unwrap() error { + return e.err +} + const ( // backupFineGrainedMaxBackoff is 1 hour. // given it begins the fine-grained backup, there must be some problems in the cluster. @@ -1125,7 +1139,7 @@ func (bc *Client) fineGrainedBackup( }) bo := utils.AdaptTiKVBackoffer(ctx, backupFineGrainedMaxBackoff, berrors.ErrUnknown) - maxDisconnect := make(map[int]int) + maxDisconnect := make(map[uint64]uint) for { // Step1, check whether there is any incomplete range incomplete := pr.Res.GetIncompleteRange(req.StartKey, req.EndKey) @@ -1174,12 +1188,11 @@ func (bc *Client) fineGrainedBackup( select { case err := <-errCh: if berrors.Is(err, berrors.ErrFailedToConnect) { - storeID := 0 - if strings.Contains(err.Error(), "failed to connect to store") { - _, scanErr := fmt.Sscanf(err.Error(), "failed to connect to store %d", &storeID) - if scanErr != nil { - log.Warn("failed to parse store ID", zap.Error(scanErr)) - } + var storeID uint64 + if storeErr, ok := err.(*StoreBasedErr); ok { + storeID = storeErr.storeID + } else { + break } if _, ok := maxDisconnect[storeID]; !ok { @@ -1302,7 +1315,11 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, errors.WithMessage(err, fmt.Sprintf("failed to connect to store %d", storeID)) + return 20000, &StoreBasedErr{ + storeID: storeID, + message: "failed to connect to store", + err: err, + } } logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID)) @@ -1341,7 +1358,11 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, errors.WithMessage(err, fmt.Sprintf("failed to connect to store %d", storeID)) + return 20000, &StoreBasedErr{ + storeID: storeID, + message: "failed to connect to store", + err: err, + } } logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err)) return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)", From 05c1942d91122d9d686072c6bcc5dcb3b19bb653 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:32:00 +0100 Subject: [PATCH 06/13] lint --- br/pkg/backup/client.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index fa435f0462c5d..6f01f317f2c2f 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -77,7 +77,7 @@ type Checksum struct { // ProgressUnit represents the unit of progress. type ProgressUnit string -type StoreBasedErr struct{ +type StoreBasedErr struct { storeID uint64 message string err error @@ -1189,8 +1189,10 @@ func (bc *Client) fineGrainedBackup( case err := <-errCh: if berrors.Is(err, berrors.ErrFailedToConnect) { var storeID uint64 + var message string if storeErr, ok := err.(*StoreBasedErr); ok { storeID = storeErr.storeID + message = storeErr.message } else { break } @@ -1202,12 +1204,12 @@ func (bc *Client) fineGrainedBackup( } if maxDisconnect[storeID] > 3 { - return errors.Annotatef(err, "failed to connect to store %d for 3 times", storeID) + return errors.Annotatef(err, "Store ID %d: %s", storeID, message) } else { break } } - + return errors.Trace(err) case resp, ok := <-respCh: if !ok { @@ -1319,7 +1321,7 @@ func (bc *Client) handleFineGrained( storeID: storeID, message: "failed to connect to store", err: err, - } + } } logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID)) @@ -1362,7 +1364,7 @@ func (bc *Client) handleFineGrained( storeID: storeID, message: "failed to connect to store", err: err, - } + } } logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err)) return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)", From 7b35a45ab8596832c73226d47f9a7b55e89d119b Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:53:12 +0100 Subject: [PATCH 07/13] fix --- br/pkg/backup/client.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 6f01f317f2c2f..47c56271badaa 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1194,23 +1194,16 @@ func (bc *Client) fineGrainedBackup( storeID = storeErr.storeID message = storeErr.message } else { - break - } - - if _, ok := maxDisconnect[storeID]; !ok { - maxDisconnect[storeID] = 0 - } else { - maxDisconnect[storeID]++ + return errors.Trace(err) } + maxDisconnect[storeID]++ if maxDisconnect[storeID] > 3 { return errors.Annotatef(err, "Store ID %d: %s", storeID, message) - } else { - break } + } else { + return errors.Trace(err) } - - return errors.Trace(err) case resp, ok := <-respCh: if !ok { // Finished. From 89afa50cc709bd2f51c064d46cabd20b43d52a17 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:31:37 +0100 Subject: [PATCH 08/13] improve readability --- br/pkg/backup/client.go | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 47c56271badaa..ea51331100ba9 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1187,23 +1187,20 @@ func (bc *Client) fineGrainedBackup( for { select { case err := <-errCh: - if berrors.Is(err, berrors.ErrFailedToConnect) { - var storeID uint64 - var message string - if storeErr, ok := err.(*StoreBasedErr); ok { - storeID = storeErr.storeID - message = storeErr.message - } else { - return errors.Trace(err) - } - - maxDisconnect[storeID]++ - if maxDisconnect[storeID] > 3 { - return errors.Annotatef(err, "Store ID %d: %s", storeID, message) - } - } else { + if !berrors.Is(err, berrors.ErrFailedToConnect) { return errors.Trace(err) } + storeErr, ok := err.(*StoreBasedErr) + if !ok { + return errors.Trace(err) + } + + storeID := storeErr.storeID + message := storeErr.message + maxDisconnect[storeID]++ + if maxDisconnect[storeID] > 3 { + return errors.Annotatef(err, "Store ID %d: %s", storeID, message) + } case resp, ok := <-respCh: if !ok { // Finished. From 6274e510fccfed1c33be663efed1433728f17b65 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:44:49 +0100 Subject: [PATCH 09/13] lint --- br/pkg/backup/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index ea51331100ba9..e348d6a33d9d6 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1194,7 +1194,7 @@ func (bc *Client) fineGrainedBackup( if !ok { return errors.Trace(err) } - + storeID := storeErr.storeID message := storeErr.message maxDisconnect[storeID]++ From 47e8d12fd9aa48f9638e747a523e6876ec635ab0 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Wed, 27 Nov 2024 03:48:46 +0100 Subject: [PATCH 10/13] improvement --- br/pkg/backup/client.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index e348d6a33d9d6..236c827a91b1e 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -79,7 +79,6 @@ type ProgressUnit string type StoreBasedErr struct { storeID uint64 - message string err error } @@ -1196,10 +1195,9 @@ func (bc *Client) fineGrainedBackup( } storeID := storeErr.storeID - message := storeErr.message maxDisconnect[storeID]++ - if maxDisconnect[storeID] > 3 { - return errors.Annotatef(err, "Store ID %d: %s", storeID, message) + if maxDisconnect[storeID] > backupRetryTimes { + return errors.Annotatef(err, "Failed to connect to store %d more than %d times", storeID, backupRetryTimes) } case resp, ok := <-respCh: if !ok { @@ -1309,7 +1307,6 @@ func (bc *Client) handleFineGrained( logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) return 20000, &StoreBasedErr{ storeID: storeID, - message: "failed to connect to store", err: err, } } @@ -1352,7 +1349,6 @@ func (bc *Client) handleFineGrained( logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) return 20000, &StoreBasedErr{ storeID: storeID, - message: "failed to connect to store", err: err, } } From dcd412c6030c20ed001b871683bc1fbf3c9018d9 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Fri, 29 Nov 2024 07:52:37 +0100 Subject: [PATCH 11/13] add const --- br/pkg/backup/client.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 236c827a91b1e..8fa06b77a8e6b 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -96,6 +96,7 @@ const ( // We need to be more patient. backupFineGrainedMaxBackoff = 3600000 backupRetryTimes = 5 + disconnectRetryTimeout = 20000 // RangeUnit represents the progress updated counter when a range finished. RangeUnit ProgressUnit = "range" // RegionUnit represents the progress updated counter when a region finished. @@ -1305,7 +1306,7 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, &StoreBasedErr{ + return disconnectRetryTimeout, &StoreBasedErr{ storeID: storeID, err: err, } @@ -1347,7 +1348,7 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, &StoreBasedErr{ + return disconnectRetryTimeout, &StoreBasedErr{ storeID: storeID, err: err, } From c2cb60bc5ca1d4571fd71ce4ad773c2af298a572 Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Wed, 4 Dec 2024 08:09:54 +0100 Subject: [PATCH 12/13] add integration test --- br/pkg/backup/client.go | 6 +++ br/tests/br_fine_grained_disconnect/run.sh | 46 ++++++++++++++++++++ br/tests/br_fine_grained_disconnect/workload | 12 +++++ 3 files changed, 64 insertions(+) create mode 100755 br/tests/br_fine_grained_disconnect/run.sh create mode 100644 br/tests/br_fine_grained_disconnect/workload diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 8fa06b77a8e6b..06b5e78b26726 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -1301,6 +1301,12 @@ func (bc *Client) handleFineGrained( lockResolver := bc.mgr.GetLockResolver() client, err := bc.mgr.GetBackupClient(ctx, storeID) + // inject a disconnect failpoint + failpoint.Inject("disconnect", func(_ failpoint.Value) { + logutil.CL(ctx).Warn("This is a injected disconnection error") + err = berrors.ErrFailedToConnect + }) + if err != nil { if berrors.Is(err, berrors.ErrFailedToConnect) { // When the leader store is died, diff --git a/br/tests/br_fine_grained_disconnect/run.sh b/br/tests/br_fine_grained_disconnect/run.sh new file mode 100755 index 0000000000000..90f08ea157bae --- /dev/null +++ b/br/tests/br_fine_grained_disconnect/run.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# +# Copyright 2022 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu +DB="$TEST_NAME" +TABLE="usertable" +DB_COUNT=3 +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +function create_db_with_table(){ + for i in $(seq $DB_COUNT); do + run_sql "CREATE DATABASE $DB${i};" + go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB${i} + done +} + +function drop_db(){ + for i in $(seq $DB_COUNT); do + run_sql "DROP DATABASE $DB${i};" + done +} + +# Create dbs with table +create_db_with_table + +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/noop-backup=100*return(1)" +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/disconnect=100" +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/${CRYPTER_METHOD}_file" + + + +# Drop dbs finally +drop_db diff --git a/br/tests/br_fine_grained_disconnect/workload b/br/tests/br_fine_grained_disconnect/workload new file mode 100644 index 0000000000000..448ca3c1a477f --- /dev/null +++ b/br/tests/br_fine_grained_disconnect/workload @@ -0,0 +1,12 @@ +recordcount=1000 +operationcount=0 +workload=core + +readallfields=true + +readproportion=0 +updateproportion=0 +scanproportion=0 +insertproportion=0 + +requestdistribution=uniform \ No newline at end of file From 38d6e324e02cebeaff8bb259898711fdd0e1821f Mon Sep 17 00:00:00 2001 From: RidRisR <79858083+RidRisR@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:26:08 +0100 Subject: [PATCH 13/13] add redirect --- br/tests/br_fine_grained_disconnect/run.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/br/tests/br_fine_grained_disconnect/run.sh b/br/tests/br_fine_grained_disconnect/run.sh index 90f08ea157bae..bdbeaed651716 100755 --- a/br/tests/br_fine_grained_disconnect/run.sh +++ b/br/tests/br_fine_grained_disconnect/run.sh @@ -38,9 +38,12 @@ create_db_with_table export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/noop-backup=100*return(1)" export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/disconnect=100" -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/${CRYPTER_METHOD}_file" +output=$(run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/${CRYPTER_METHOD}_file" 2>&1) +if ! echo "$output" | grep -q "Failed to connect to store"; then + exit 1 +fi # Drop dbs finally drop_db