diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 7e4d2cda1809..6d51d8e5db23 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -121,6 +121,11 @@ func makeTableSpan(tableID uint32) roachpb.Span { return roachpb.Span{Key: k, EndKey: k.PrefixEnd()} } +func makeIndexSpan(tableID uint32, indexID uint32) roachpb.Span { + k := keys.SystemSQLCodec.IndexPrefix(tableID, indexID) + return roachpb.Span{Key: k, EndKey: k.PrefixEnd()} +} + func TestBackupRestoreStatementResult(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/ccl/backupccl/bench_covering_test.go b/pkg/ccl/backupccl/bench_covering_test.go index 0aacc7b777e1..cb8f5c8455a3 100644 --- a/pkg/ccl/backupccl/bench_covering_test.go +++ b/pkg/ccl/backupccl/bench_covering_test.go @@ -13,7 +13,10 @@ import ( fmt "fmt" "testing" + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/randutil" + "github.com/stretchr/testify/require" ) func BenchmarkCoverageChecks(b *testing.B) { @@ -53,12 +56,20 @@ func BenchmarkRestoreEntryCover(b *testing.B) { b.Run(fmt.Sprintf("numSpans=%d", numSpans), func(b *testing.B) { ctx := context.Background() backups := MockBackupChain(numBackups, numSpans, baseFiles, r) + latestIntrosByIndex, err := findLatestIntroFromManifests(backups, keys.SystemSQLCodec, hlc.Timestamp{}) + require.NoError(b, err) + latestIntrosBySpan, err := findLatestIntroBySpan(backups[numBackups-1].Spans, keys.SystemSQLCodec, latestIntrosByIndex) + require.NoError(b, err) b.ResetTimer() for i := 0; i < b.N; i++ { if err := checkCoverage(ctx, backups[numBackups-1].Spans, backups); err != nil { b.Fatal(err) } - cov := makeSimpleImportSpans(backups[numBackups-1].Spans, backups, nil, nil, 0) + restoreData := restorationDataBase{ + spans: backups[numBackups-1].Spans, + latestIntros: latestIntrosBySpan, + } + cov := makeSimpleImportSpans(&restoreData, backups, nil, nil, 0) b.ReportMetric(float64(len(cov)), "coverSize") } }) diff --git a/pkg/ccl/backupccl/restoration_data.go b/pkg/ccl/backupccl/restoration_data.go index 1bb319e7f6d5..b15060808346 100644 --- a/pkg/ccl/backupccl/restoration_data.go +++ b/pkg/ccl/backupccl/restoration_data.go @@ -13,7 +13,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" ) // restorationData specifies the data that is to be restored in a restoration flow. @@ -25,6 +27,10 @@ type restorationData interface { // getSpans returns the data spans that we're restoring into this cluster. getSpans() []roachpb.Span + // getLastIntros returns the end time of the last backup that reintroduced + // span i. + getLatestIntros() []hlc.Timestamp + // getSystemTables returns nil for non-cluster restores. It returns the // descriptors of the temporary system tables that should be restored into the // real table descriptors. The data for these temporary tables should be @@ -66,6 +72,10 @@ func (*mainRestorationData) isMainBundle() bool { return true } type restorationDataBase struct { // spans is the spans included in this bundle. spans []roachpb.Span + + // latestIntros is the last time each span was introduced. + latestIntros []hlc.Timestamp + // rekeys maps old table IDs to their new table descriptor. tableRekeys []execinfrapb.TableRekey // tenantRekeys maps tenants being restored to their new ID. @@ -105,6 +115,11 @@ func (b *restorationDataBase) getSpans() []roachpb.Span { return b.spans } +// getLastReIntros implements restorationData. +func (b *restorationDataBase) getLatestIntros() []hlc.Timestamp { + return b.latestIntros +} + // getSystemTables implements restorationData. func (b *restorationDataBase) getSystemTables() []catalog.TableDescriptor { return b.systemTables @@ -114,6 +129,7 @@ func (b *restorationDataBase) getSystemTables() []catalog.TableDescriptor { func (b *restorationDataBase) addTenant(fromTenantID, toTenantID roachpb.TenantID) { prefix := keys.MakeTenantPrefix(fromTenantID) b.spans = append(b.spans, roachpb.Span{Key: prefix, EndKey: prefix.PrefixEnd()}) + b.latestIntros = append(b.latestIntros, hlc.Timestamp{}) b.tenantRekeys = append(b.tenantRekeys, execinfrapb.TenantRekey{ OldID: fromTenantID, NewID: toTenantID, @@ -148,3 +164,21 @@ func checkForMigratedData(details jobspb.RestoreDetails, dataToRestore restorati return false } + +// findLatestIntroBySpan finds the latest intro time for the inputted spans. +// This function assumes that each span's start and end key belong to the same +// index. +func findLatestIntroBySpan( + spans roachpb.Spans, codec keys.SQLCodec, latestIntros map[tableAndIndex]hlc.Timestamp, +) ([]hlc.Timestamp, error) { + latestIntrosBySpan := make([]hlc.Timestamp, len(spans)) + for i, sp := range spans { + _, tablePrefix, indexPrefix, err := codec.DecodeIndexPrefix(sp.Key) + if err != nil { + return nil, err + } + introKey := tableAndIndex{descpb.ID(tablePrefix), descpb.IndexID(indexPrefix)} + latestIntrosBySpan[i] = latestIntros[introKey] + } + return latestIntrosBySpan, nil +} diff --git a/pkg/ccl/backupccl/restore_job.go b/pkg/ccl/backupccl/restore_job.go index 7ee2343ec8e2..1cb5d023867e 100644 --- a/pkg/ccl/backupccl/restore_job.go +++ b/pkg/ccl/backupccl/restore_job.go @@ -274,9 +274,8 @@ func restore( // which are grouped by keyrange. highWaterMark := job.Progress().Details.(*jobspb.Progress_Restore).Restore.HighWater - importSpans := makeSimpleImportSpans(dataToRestore.getSpans(), backupManifests, backupLocalityMap, - highWaterMark, targetRestoreSpanSize) - + importSpans := makeSimpleImportSpans(dataToRestore, backupManifests, + backupLocalityMap, highWaterMark, targetRestoreSpanSize) if len(importSpans) == 0 { // There are no files to restore. return emptyRowCount, nil @@ -651,6 +650,28 @@ func spansForAllRestoreTableIndexes( return spans } +// findLatestIntroFromManifests finds the endtime of the latest incremental +// backup that introduced each backed up index, as of restore time. +func findLatestIntroFromManifests( + manifests []backuppb.BackupManifest, codec keys.SQLCodec, asOf hlc.Timestamp, +) (map[tableAndIndex]hlc.Timestamp, error) { + latestIntro := make(map[tableAndIndex]hlc.Timestamp) + for _, b := range manifests { + if !asOf.IsEmpty() && asOf.Less(b.StartTime) { + break + } + for _, sp := range b.IntroducedSpans { + _, tablePrefix, indexPrefix, err := codec.DecodeIndexPrefix(sp.Key) + if err != nil { + return nil, err + } + introKey := tableAndIndex{descpb.ID(tablePrefix), descpb.IndexID(indexPrefix)} + latestIntro[introKey] = b.EndTime + } + } + return latestIntro, nil +} + func shouldPreRestore(table *tabledesc.Mutable) bool { if table.GetParentID() != keys.SystemDatabaseID { return false @@ -695,6 +716,7 @@ func createImportingDescriptors( p sql.JobExecContext, backupCodec keys.SQLCodec, sqlDescs []catalog.Descriptor, + latestIntrosByIndex map[tableAndIndex]hlc.Timestamp, r *restoreResumer, ) ( dataToPreRestore *restorationDataBase, @@ -1240,11 +1262,21 @@ func createImportingDescriptors( pkIDs[roachpb.BulkOpSummaryID(uint64(tbl.GetID()), uint64(tbl.GetPrimaryIndexID()))] = true } + preRestoreLatestIntros, err := findLatestIntroBySpan(preRestoreSpans, backupCodec, latestIntrosByIndex) + if err != nil { + return nil, nil, nil, err + } dataToPreRestore = &restorationDataBase{ spans: preRestoreSpans, tableRekeys: rekeys, tenantRekeys: tenantRekeys, pkIDs: pkIDs, + latestIntros: preRestoreLatestIntros, + } + + postRestoreLatestIntros, err := findLatestIntroBySpan(postRestoreSpans, backupCodec, latestIntrosByIndex) + if err != nil { + return nil, nil, nil, err } trackedRestore = &mainRestorationData{ @@ -1253,6 +1285,7 @@ func createImportingDescriptors( tableRekeys: rekeys, tenantRekeys: tenantRekeys, pkIDs: pkIDs, + latestIntros: postRestoreLatestIntros, }, } @@ -1264,6 +1297,12 @@ func createImportingDescriptors( if details.VerifyData { trackedRestore.restorationDataBase.spans = verifySpans trackedRestore.restorationDataBase.validateOnly = true + verifySpansLatestIntros, err := findLatestIntroBySpan(verifySpans, backupCodec, + latestIntrosByIndex) + if err != nil { + return nil, nil, nil, err + } + trackedRestore.latestIntros = verifySpansLatestIntros // Before the main (validation) flow, during a cluster level restore, // we still need to restore system tables that do NOT get restored in the dataToPreRestore @@ -1275,6 +1314,7 @@ func createImportingDescriptors( preValidation.spans = postRestoreSpans preValidation.tableRekeys = rekeys preValidation.pkIDs = pkIDs + preValidation.latestIntros = postRestoreLatestIntros } if tempSystemDBID != descpb.InvalidID { @@ -1432,9 +1472,12 @@ func (r *restoreResumer) doResume(ctx context.Context, execCtx interface{}) erro if err != nil { return err } - + latestIntrosByIndex, err := findLatestIntroFromManifests(backupManifests, backupCodec, details.EndTime) + if err != nil { + return err + } preData, preValidateData, mainData, err := createImportingDescriptors(ctx, p, backupCodec, - sqlDescs, r) + sqlDescs, latestIntrosByIndex, r) if err != nil { return err } diff --git a/pkg/ccl/backupccl/restore_span_covering.go b/pkg/ccl/backupccl/restore_span_covering.go index d65cef3d0ed0..2923de51d3da 100644 --- a/pkg/ccl/backupccl/restore_span_covering.go +++ b/pkg/ccl/backupccl/restore_span_covering.go @@ -75,7 +75,7 @@ const targetRestoreSpanSize = 384 << 20 // if its current data size plus that of the new span is less than the target // size. func makeSimpleImportSpans( - requiredSpans []roachpb.Span, + data restorationData, backups []backuppb.BackupManifest, backupLocalityMap map[int]storeByLocalityKV, lowWaterMark roachpb.Key, @@ -88,9 +88,10 @@ func makeSimpleImportSpans( for i := range backups { sort.Sort(backupinfo.BackupFileDescriptors(backups[i].Files)) } - var cover []execinfrapb.RestoreSpanEntry - for _, span := range requiredSpans { + spans := data.getSpans() + latestIntros := data.getLatestIntros() + for spanIdx, span := range spans { if span.EndKey.Compare(lowWaterMark) < 0 { continue } @@ -99,8 +100,14 @@ func makeSimpleImportSpans( } spanCoverStart := len(cover) - for layer := range backups { + if backups[layer].EndTime.Less(latestIntros[spanIdx]) { + // Don't use this backup to cover this span if the span was reintroduced + // after the backup's endTime. In this case, this backup may + // have invalid data, and further, a subsequent backup will contain all of + // this span's data. + continue + } covPos := spanCoverStart // lastCovSpanSize is the size of files added to the right-most span of diff --git a/pkg/ccl/backupccl/restore_span_covering_test.go b/pkg/ccl/backupccl/restore_span_covering_test.go index bea046ee8950..bd10af7dc402 100644 --- a/pkg/ccl/backupccl/restore_span_covering_test.go +++ b/pkg/ccl/backupccl/restore_span_covering_test.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/ccl/backupccl/backuppb" "github.com/cockroachdb/cockroach/pkg/cloud/cloudpb" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" "github.com/cockroachdb/cockroach/pkg/util/encoding" @@ -36,13 +37,13 @@ func MockBackupChain(length, spans, baseFiles int, r *rand.Rand) []backuppb.Back for i := range backups { backups[i].Spans = make(roachpb.Spans, spans) for j := range backups[i].Spans { - backups[i].Spans[j] = makeTableSpan(uint32(100 + j + (i / 3))) + backups[i].Spans[j] = makeIndexSpan(uint32(100+j), 1) } backups[i].EndTime = ts.Add(time.Minute.Nanoseconds()*int64(i), 0) if i > 0 { backups[i].StartTime = backups[i-1].EndTime if i%3 == 0 { - backups[i].IntroducedSpans = roachpb.Spans{backups[i].Spans[spans-1]} + backups[i].IntroducedSpans = roachpb.Spans{backups[i].Spans[r.Intn(spans)]} } } @@ -84,15 +85,20 @@ func MockBackupChain(length, spans, baseFiles int, r *rand.Rand) []backuppb.Back // thus sensitive to ordering; the coverage correctness check however is not. func checkRestoreCovering( backups []backuppb.BackupManifest, - spans roachpb.Spans, + data restorationData, cov []execinfrapb.RestoreSpanEntry, merged bool, ) error { var expectedPartitions int required := make(map[string]*roachpb.SpanGroup) - for _, s := range spans { + spans := data.getSpans() + latestIntros := data.getLatestIntros() + for sIdx, s := range spans { var last roachpb.Key for _, b := range backups { + if b.EndTime.Less(latestIntros[sIdx]) { + continue + } for _, f := range b.Files { if sp := s.Intersect(f.Span); sp.Valid() { if required[f.Path] == nil { @@ -150,15 +156,22 @@ func TestRestoreEntryCoverExample(t *testing.T) { {Files: []backuppb.BackupManifest_File{f("a", "h", "6"), f("j", "k", "7")}}, {Files: []backuppb.BackupManifest_File{f("h", "i", "8"), f("l", "m", "9")}}, } + latestIntros := []hlc.Timestamp{{WallTime: 3}, {}, {}} - // Pretend every span has 1MB. for i := range backups { + backups[i].StartTime = hlc.Timestamp{WallTime: int64(i)} + backups[i].EndTime = hlc.Timestamp{WallTime: int64(i + 1)} + for j := range backups[i].Files { + // Pretend every span has 1MB. backups[i].Files[j].EntryCounts.DataSize = 1 << 20 } } - - cover := makeSimpleImportSpans(spans, backups, nil, nil, noSpanTargetSize) + restoreData := restorationDataBase{ + spans: spans, + latestIntros: make([]hlc.Timestamp, len(spans)), + } + cover := makeSimpleImportSpans(&restoreData, backups, nil, nil, noSpanTargetSize) require.Equal(t, []execinfrapb.RestoreSpanEntry{ {Span: sp("a", "c"), Files: paths("1", "4", "6")}, {Span: sp("c", "e"), Files: paths("2", "4", "6")}, @@ -167,13 +180,21 @@ func TestRestoreEntryCoverExample(t *testing.T) { {Span: sp("l", "m"), Files: paths("9")}, }, cover) - coverSized := makeSimpleImportSpans(spans, backups, nil, nil, 2<<20) + coverSized := makeSimpleImportSpans(&restoreData, backups, nil, nil, 2<<20) require.Equal(t, []execinfrapb.RestoreSpanEntry{ {Span: sp("a", "f"), Files: paths("1", "2", "4", "6")}, {Span: sp("f", "i"), Files: paths("3", "5", "6", "8")}, {Span: sp("l", "m"), Files: paths("9")}, }, coverSized) + restoreData.latestIntros = latestIntros + coverTimeFiltering := makeSimpleImportSpans(&restoreData, backups, nil, nil, 2<<20) + require.Equal(t, []execinfrapb.RestoreSpanEntry{ + {Span: sp("a", "f"), Files: paths("6")}, + {Span: sp("f", "i"), Files: paths("3", "5", "6", "8")}, + {Span: sp("l", "m"), Files: paths("9")}, + }, coverTimeFiltering) + } func TestRestoreEntryCover(t *testing.T) { @@ -184,10 +205,20 @@ func TestRestoreEntryCover(t *testing.T) { for _, spans := range []int{1, 2, 3, 5, 9, 11, 12} { for _, files := range []int{0, 1, 2, 3, 4, 10, 12, 50} { backups := MockBackupChain(numBackups, spans, files, r) + latestIntrosByIndex, err := findLatestIntroFromManifests(backups, keys.SystemSQLCodec, hlc.Timestamp{}) + require.NoError(t, err) + latestIntrosBySpan, err := findLatestIntroBySpan(backups[numBackups-1].Spans, keys.SystemSQLCodec, latestIntrosByIndex) + require.NoError(t, err) + for _, target := range []int64{0, 1, 4, 100, 1000} { t.Run(fmt.Sprintf("numBackups=%d, numSpans=%d, numFiles=%d, merge=%d", numBackups, spans, files, target), func(t *testing.T) { - cover := makeSimpleImportSpans(backups[numBackups-1].Spans, backups, nil, nil, target<<20) - if err := checkRestoreCovering(backups, backups[numBackups-1].Spans, cover, target != noSpanTargetSize); err != nil { + restoreData := restorationDataBase{ + spans: backups[numBackups-1].Spans, + latestIntros: latestIntrosBySpan, + } + cover := makeSimpleImportSpans(&restoreData, backups, nil, nil, + target<<20) + if err := checkRestoreCovering(backups, &restoreData, cover, target != noSpanTargetSize); err != nil { t.Fatal(err) } }) diff --git a/pkg/ccl/backupccl/targets.go b/pkg/ccl/backupccl/targets.go index 472e8067d05d..e51edf4cbf5a 100644 --- a/pkg/ccl/backupccl/targets.go +++ b/pkg/ccl/backupccl/targets.go @@ -489,15 +489,17 @@ func MakeBackupTableEntry( if err := checkCoverage(ctx, []roachpb.Span{tablePrimaryIndexSpan}, backupManifests); err != nil { return BackupTableEntry{}, errors.Wrapf(err, "making spans for table %s", fullyQualifiedTableName) } - + restoreData := restorationDataBase{ + spans: []roachpb.Span{tablePrimaryIndexSpan}, + latestIntros: make([]hlc.Timestamp, 1), + } entry := makeSimpleImportSpans( - []roachpb.Span{tablePrimaryIndexSpan}, + &restoreData, backupManifests, nil, /*backupLocalityInfo*/ roachpb.Key{}, /*lowWaterMark*/ targetRestoreSpanSize, ) - lastSchemaChangeTime := findLastSchemaChangeTime(backupManifests, tbDesc, endTime) backupTableEntry := BackupTableEntry{ diff --git a/pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback b/pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback new file mode 100644 index 000000000000..f2b3dd9837ce --- /dev/null +++ b/pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback @@ -0,0 +1,339 @@ +# test that we properly fully backup an offline span when it can be non-mvcc +# +# TODO(msbutler): waiting for https://github.com/cockroachdb/cockroach/pull/86689 to land +# Part 1 - ensure clear range induces full reintroduction of spans +# - begin import jobs and pause it +# - run inc backup - verify inc has captured the data +# - roll it back it back non-mvcc +# - run an inc backup and ensure we reintroduce the table spans + +new-server name=s1 +---- + +########### +# Case 1: an incremental backup captures a non-mvcc rollback +########### + +exec-sql +CREATE DATABASE d; +USE d; +CREATE TABLE foo (i INT PRIMARY KEY, s STRING); +CREATE TABLE foofoo (i INT PRIMARY KEY, s STRING); +INSERT INTO foofoo VALUES (10, 'x0'); +CREATE TABLE baz (i INT PRIMARY KEY, s STRING); +INSERT INTO baz VALUES (1, 'x'),(2,'y'),(3,'z'); +---- + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest'; +---- + + +exec-sql +SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = false; +---- + + +exec-sql +EXPORT INTO CSV 'nodelocal://0/export1/' FROM SELECT * FROM baz; +---- + + +# Pause the import job, in order to back up the importing data. +import expect-pausepoint tag=a +IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +import expect-pausepoint tag=aa +IMPORT INTO foofoo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +# Ensure table, database, and cluster full backups capture importing rows. +exec-sql +BACKUP INTO 'nodelocal://0/cluster/'; +---- + + +exec-sql +BACKUP DATABASE d INTO 'nodelocal://0/database/'; +---- + +exec-sql +BACKUP TABLE d.* INTO 'nodelocal://0/table/'; +---- + + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = ''; +---- + + +# Resume the job so the next set of incremental backups observes that tables are back online +job cancel=a +---- + +job cancel=aa +---- + +job tag=a wait-for-state=cancelled +---- + + +job tag=aa wait-for-state=cancelled +---- + + +# Ensure incremental backups backup the newly online spans from ts=0, as the +# import was rolled back via non-mvcc clear range. So, backup 0 rows from foo +# (it was empty pre-import), and 1 row from foo (had 1 row pre-import); +exec-sql +BACKUP INTO LATEST IN 'nodelocal://0/cluster/'; +---- + +exec-sql +BACKUP DATABASE d INTO LATEST IN 'nodelocal://0/database/'; +---- + + +exec-sql +BACKUP TABLE d.* INTO LATEST IN 'nodelocal://0/table/'; +---- + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/cluster/'] +WHERE + object_name = 'foo' or object_name = 'foofoo' +ORDER BY + start_time, database_name; +---- +d foo table 3 full +d foofoo table 4 full +d foo table 0 incremental +d foofoo table 1 incremental + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/database/'] +WHERE + object_name = 'foo' or object_name = 'foofoo' +ORDER BY + start_time, database_name; +---- +d foo table 3 full +d foofoo table 4 full +d foo table 0 incremental +d foofoo table 1 incremental + + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/table/'] +WHERE + object_name = 'foo' or object_name = 'foofoo' +ORDER BY + start_time, database_name; +---- +d foo table 3 full +d foofoo table 4 full +d foo table 0 incremental +d foofoo table 1 incremental + + +query-sql +SELECT count(*) FROM d.foo; +---- +0 + + +query-sql +SELECT count(*) FROM d.foofoo; +---- +1 + + +# To verify the incremental backed up the pre-import state table, restore d and ensure all tables +# are in their pre-import state. + +exec-sql +RESTORE DATABASE d FROM LATEST IN 'nodelocal://0/database/' with new_db_name=d2; +---- + + +query-sql +SELECT count(*) FROM d2.foo; +---- +0 + + +query-sql +SELECT count(*) FROM d2.foofoo; +---- +1 + +########### +# Case 2: an incremental backup captures an mvcc rollback +########### + +exec-sql +DROP DATABASE d2; +CREATE TABLE foo2 (i INT PRIMARY KEY, s STRING); +CREATE TABLE foofoo2 (i INT PRIMARY KEY, s STRING); +INSERT INTO foofoo2 VALUES (10, 'x0'); +---- + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest'; +---- + + +exec-sql +SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = true; +---- + +# Pause the import job, in order to back up the importing data. +import expect-pausepoint tag=b +IMPORT INTO foo2 (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +import expect-pausepoint tag=bb +IMPORT INTO foofoo2 (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +# Ensure table, database, and cluster full backups capture importing rows. +exec-sql +BACKUP INTO 'nodelocal://0/cluster/'; +---- + + +exec-sql +BACKUP DATABASE d INTO 'nodelocal://0/database/'; +---- + +exec-sql +BACKUP TABLE d.* INTO 'nodelocal://0/table/'; +---- + + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = ''; +---- + + +# Resume the job so the next set of incremental backups observes that tables are back online +job cancel=b +---- + +job cancel=bb +---- + +job tag=b wait-for-state=cancelled +---- + + +job tag=bb wait-for-state=cancelled +---- + +# These incremental backups will back up all mvcc history from foo2 and foofoo2 because the +# tables returned online. For each table, this means: +# - foofoo2 will have 7 rows: +# - 1 row from before the import +# - 3 rows from the import +# - 3 delete tombstones from the import rollback +# - foo2: will have 3 rows: +# - 3 rows from the import +# - Note because foo2 had no data pre import, an mvcc range tombstone will delete the imported data. +# The incremental backup will capture this range tombstone, however, SHOW BACKUP currently will +# not record this range as a "row" in the backup. + +exec-sql +BACKUP INTO LATEST IN 'nodelocal://0/cluster/'; +---- + +exec-sql +BACKUP DATABASE d INTO LATEST IN 'nodelocal://0/database/'; +---- + + +exec-sql +BACKUP TABLE d.* INTO LATEST IN 'nodelocal://0/table/'; +---- + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/cluster/'] +WHERE + object_name = 'foo2' or object_name = 'foofoo2' +ORDER BY + start_time, database_name; +---- +d foo2 table 3 full +d foofoo2 table 4 full +d foo2 table 3 incremental +d foofoo2 table 7 incremental + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/database/'] +WHERE + object_name = 'foo2' or object_name = 'foofoo2' +ORDER BY + start_time, database_name; +---- +d foo2 table 3 full +d foofoo2 table 4 full +d foo2 table 3 incremental +d foofoo2 table 7 incremental + + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/table/'] +WHERE + object_name = 'foo2' or object_name = 'foofoo2' +ORDER BY + start_time, database_name; +---- +d foo2 table 3 full +d foofoo2 table 4 full +d foo2 table 3 incremental +d foofoo2 table 7 incremental + +# To verify the incremental backup captured the tombstones, restore d and ensure all tables +# are in their pre-import state. + +exec-sql +RESTORE DATABASE d FROM LATEST IN 'nodelocal://0/database/' with new_db_name=d2; +---- + + +query-sql +SELECT count(*) FROM d2.foo2; +---- +0 + + +query-sql +SELECT count(*) FROM d2.foofoo2; +---- +1