Skip to content

Commit

Permalink
Slack 19.0 backport 16997 (#568)
Browse files Browse the repository at this point in the history
* PRS and ERS don't promote replicas taking backups (vitessio#16997)

Signed-off-by: Eduardo J. Ortega U <[email protected]>
  • Loading branch information
ejortegau authored Nov 26, 2024
1 parent 508c86d commit 6f406db
Show file tree
Hide file tree
Showing 19 changed files with 1,199 additions and 631 deletions.
195 changes: 108 additions & 87 deletions go/vt/proto/replicationdata/replicationdata.pb.go

Large diffs are not rendered by default.

74 changes: 72 additions & 2 deletions go/vt/proto/replicationdata/replicationdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

959 changes: 490 additions & 469 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata.pb.go

Large diffs are not rendered by default.

72 changes: 70 additions & 2 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ type TabletManagerClient struct {
EventJitter time.Duration
ErrorAfter time.Duration
}
// Backing Up - keyed by tablet alias.
TabletsBackupState map[string]bool
// keyed by tablet alias.
ChangeTabletTypeResult map[string]error
ChangeTabletTypeDelays map[string]time.Duration
Expand Down Expand Up @@ -918,6 +920,9 @@ func (fake *TabletManagerClient) ReplicationStatus(ctx context.Context, tablet *
}

if result, ok := fake.ReplicationStatusResults[key]; ok {
if _, ok = fake.TabletsBackupState[key]; ok {
result.Position.BackupRunning = fake.TabletsBackupState[key]
}
return result.Position, result.Error
}

Expand Down
24 changes: 19 additions & 5 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
// 2. Remove the tablets with the Must_not promote rule
// 3. Remove cross-cell tablets if PreventCrossCellPromotion is specified
// Our final primary candidate MUST belong to this list of valid candidates
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, prevPrimary, opts)
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, stoppedReplicationSnapshot.tabletsBackupState, prevPrimary, opts)
if err != nil {
return err
}
Expand Down Expand Up @@ -740,9 +740,12 @@ func (erp *EmergencyReparenter) promoteNewPrimary(
return nil
}

// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any constraint failures and can make forward progress on being promoted
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any
// constraint failures and can make forward progress on being promoted. It will filter out candidates taking backups
// if possible.
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackupState map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
var restrictedValidTablets []*topodatapb.Tablet
var notPreferredValidTablets []*topodatapb.Tablet
for _, tablet := range validTablets {
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
// Remove tablets which have MustNot promote rule since they must never be promoted
Expand All @@ -769,7 +772,18 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
}
continue
}
restrictedValidTablets = append(restrictedValidTablets, tablet)
// Put candidates that are running a backup in a separate list
backingUp, ok := tabletsBackupState[tabletAliasStr]
if ok && backingUp {
erp.logger.Infof("Setting %s in list of valid candidates taking a backup", tabletAliasStr)
notPreferredValidTablets = append(notPreferredValidTablets, tablet)
} else {
restrictedValidTablets = append(restrictedValidTablets, tablet)
}
}
if len(restrictedValidTablets) > 0 {
return restrictedValidTablets, nil
}
return restrictedValidTablets, nil

return notPreferredValidTablets, nil
}
Loading

0 comments on commit 6f406db

Please sign in to comment.