Skip to content

Commit

Permalink
Merge pull request #88488 from msbutler/backport22.1-87312
Browse files Browse the repository at this point in the history
release-22.1: backupccl: elide spans from backups that were subsequently reintroduced
  • Loading branch information
msbutler authored Sep 30, 2022
2 parents a87b3d0 + 4ee23ee commit 568c2df
Show file tree
Hide file tree
Showing 15 changed files with 1,359 additions and 119 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/backupccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ go_test(
"//pkg/util/protoutil",
"//pkg/util/randutil",
"//pkg/util/retry",
"//pkg/util/span",
"//pkg/util/stop",
"//pkg/util/syncutil",
"//pkg/util/timeutil",
Expand Down
6 changes: 6 additions & 0 deletions pkg/ccl/backupccl/backup.proto
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ message BackupManifest {
// here are covered in the interval (0, startTime], which, in conjunction with
// the coverage from (startTime, endTime] implied for all spans in Spans,
// results in coverage from [0, endTime] for these spans.
//
// The first set of spans in this field are new spans that did not
// exist in the previous backup (a new index, for example), while the remaining
// spans are re-introduced spans, which need to be backed up again from (0,
// startTime] because a non-mvcc operation may have occurred on this span. See
// the getReintroducedSpans() for more information.
repeated roachpb.Span introduced_spans = 15 [(gogoproto.nullable) = false];

repeated DescriptorRevision descriptor_changes = 16 [(gogoproto.nullable) = false];
Expand Down
51 changes: 46 additions & 5 deletions pkg/ccl/backupccl/backup_planning.go
Original file line number Diff line number Diff line change
Expand Up @@ -1420,8 +1420,27 @@ func getReintroducedSpans(
) ([]roachpb.Span, error) {
reintroducedTables := make(map[descpb.ID]struct{})

// First, create a map that indicates which tables from the previous backup
// were offline when the last backup was taken. To create this map, we must
// iterate two fields in the _last_ backup's manifest:
//
// 1. manifest.Descriptors contains a list of descriptors _explicitly_
// included in the backup, gathered at backup startTime. This includes all
// public descriptors, and in the case of cluster backups, offline
// descriptors.
//
// 2. manifest.DescriptorChanges contains a list of descriptor changes tracked
// in the backup. While investigating #88042, it was discovered that during
// revision history database and table.* backups, a table can get included in
// manifest.DescriptorChanges when it transitions from an online to offline
// state, causing its spans to get backed up. However, if this table was
// offline when the backup began, it's excluded from manifest.Descriptors.
// Therefore, to find all descriptors covered in the backup that were offline
// at backup time, we must find all tables in manifest.DescriptorChanges whose
// last change brought the table offline.
offlineInLastBackup := make(map[descpb.ID]struct{})
lastBackup := prevBackups[len(prevBackups)-1]

for _, desc := range lastBackup.Descriptors {
// TODO(pbardea): Also check that lastWriteTime is set once those are
// populated on the table descriptor.
Expand All @@ -1430,6 +1449,29 @@ func getReintroducedSpans(
}
}

// Gather all the descriptors that were offline at the endTime of the last
// backup, according the backup's descriptor changes. If the last descriptor
// change in the previous backup interval put the table offline, then that
// backup was offline at the endTime of the last backup.
latestTableDescChangeInLastBackup := make(map[descpb.ID]*descpb.TableDescriptor)
for _, rev := range lastBackup.DescriptorChanges {
if table, _, _, _ := descpb.FromDescriptor(rev.Desc); table != nil {
if trackedRev, ok := latestTableDescChangeInLastBackup[table.GetID()]; !ok {
latestTableDescChangeInLastBackup[table.GetID()] = table
} else if trackedRev.Version < table.Version {
latestTableDescChangeInLastBackup[table.GetID()] = table
}
}
}

if knobs := execCfg.BackupRestoreTestingKnobs; knobs == nil || !knobs.SkipDescriptorChangeIntroduction {
for _, table := range latestTableDescChangeInLastBackup {
if table.Offline() {
offlineInLastBackup[table.GetID()] = struct{}{}
}
}
}

// If the table was offline in the last backup, but becomes PUBLIC, then it
// needs to be re-included since we may have missed non-transactional writes.
tablesToReinclude := make([]catalog.TableDescriptor, 0)
Expand All @@ -1444,6 +1486,7 @@ func getReintroducedSpans(
// table may have been OFFLINE at the time of the last backup, and OFFLINE at
// the time of the current backup, but may have been PUBLIC at some time in
// between.

for _, rev := range revs {
rawTable, _, _, _ := descpb.FromDescriptor(rev.Desc)
if rawTable == nil {
Expand All @@ -1468,7 +1511,6 @@ func getReintroducedSpans(
allRevs = append(allRevs, rev)
}
}

tableSpans, err := spansForAllTableIndexes(execCfg, tablesToReinclude, allRevs)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1830,8 +1872,7 @@ func getBackupDetailAndManifest(
}
}

var newSpans roachpb.Spans

var newSpans, reIntroducedSpans roachpb.Spans
var priorIDs map[descpb.ID]descpb.ID

var revs []BackupManifest_DescriptorRevision
Expand Down Expand Up @@ -1912,11 +1953,11 @@ func getBackupDetailAndManifest(

newSpans = filterSpans(spans, prevBackups[len(prevBackups)-1].Spans)

tableSpans, err := getReintroducedSpans(ctx, execCfg, prevBackups, tables, revs, endTime)
reIntroducedSpans, err = getReintroducedSpans(ctx, execCfg, prevBackups, tables, revs, endTime)
if err != nil {
return jobspb.BackupDetails{}, BackupManifest{}, err
}
newSpans = append(newSpans, tableSpans...)
newSpans = append(newSpans, reIntroducedSpans...)
}

// if CompleteDbs is lost by a 1.x node, FormatDescriptorTrackingVersion
Expand Down
10 changes: 7 additions & 3 deletions pkg/ccl/backupccl/bench_covering_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ package backupccl

import (
"context"
fmt "fmt"
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/stretchr/testify/require"
)

func BenchmarkCoverageChecks(b *testing.B) {
Expand Down Expand Up @@ -44,7 +46,6 @@ func BenchmarkCoverageChecks(b *testing.B) {

func BenchmarkRestoreEntryCover(b *testing.B) {
r, _ := randutil.NewTestRand()

for _, numBackups := range []int{1, 2, 24, 24 * 4} {
b.Run(fmt.Sprintf("numBackups=%d", numBackups), func(b *testing.B) {
for _, baseFiles := range []int{0, 100, 10000} {
Expand All @@ -58,7 +59,10 @@ func BenchmarkRestoreEntryCover(b *testing.B) {
if err := checkCoverage(ctx, backups[numBackups-1].Spans, backups); err != nil {
b.Fatal(err)
}
cov := makeSimpleImportSpans(backups[numBackups-1].Spans, backups, nil, nil)
introducedSpanFrontier, err := createIntroducedSpanFrontier(backups, hlc.Timestamp{})
require.NoError(b, err)

cov := makeSimpleImportSpans(backups[numBackups-1].Spans, backups, nil, introducedSpanFrontier, nil)
b.ReportMetric(float64(len(cov)), "coverSize")
}
})
Expand Down
Loading

0 comments on commit 568c2df

Please sign in to comment.