From f3180be6b82472d02c370a24dc8f753d08289755 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Wed, 31 Aug 2022 19:51:32 -0500 Subject: [PATCH] backupccl: elide spans from backups that were subsequently reintroduced Currently RESTORE may restore invalid backup data from a backed up table that underwent an IMPORT rollback. See #87305 for a detailed explanation. This patch ensures that RESTORE elides older backup data that were deleted via a non-MVCC operation. Because incremental backups always reintroduce spans that may have undergone a non-mvcc operation, restore can identify restoring spans with potentially corrupt data in the backup chain and only ingest the spans' reintroduced data to any system time, without the corrupt data. Here's the basic impliemenation in Restore: - For each index we want to restore - identify the last time, l, the index was re-introduced, using the manifests - dont restore the index using a backup if backup.EndTime < l This implementation rests on the following assumption: the input spans for each restoration flow (created in createImportingDescriptors) and the restoreSpanEntries (created by makeSimpleImportSpans) do not span across multiple sql indices. Given this assumption, makeSimpleImportSpans skips adding files from a backups for a given input span that was reintroduced in a subsequent backup. Release justification: bug fix Release note: none --- pkg/ccl/backupccl/backup_test.go | 2 +- pkg/ccl/backupccl/bench_covering_test.go | 7 +- pkg/ccl/backupccl/restoration_data.go | 34 ++ pkg/ccl/backupccl/restore_job.go | 53 ++- pkg/ccl/backupccl/restore_span_covering.go | 15 +- .../backupccl/restore_span_covering_test.go | 49 ++- pkg/ccl/backupccl/targets.go | 8 +- .../in-progress-import-rollback | 333 ++++++++++++++++++ 8 files changed, 478 insertions(+), 23 deletions(-) create mode 100644 pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index dde171b6f341..9d6ab8599b25 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -118,7 +118,7 @@ func init() { } func makeTableSpan(tableID uint32) roachpb.Span { - k := keys.SystemSQLCodec.TablePrefix(tableID) + k := keys.SystemSQLCodec.IndexPrefix(tableID, 1) return roachpb.Span{Key: k, EndKey: k.PrefixEnd()} } diff --git a/pkg/ccl/backupccl/bench_covering_test.go b/pkg/ccl/backupccl/bench_covering_test.go index 0aacc7b777e1..fe6bc7194c83 100644 --- a/pkg/ccl/backupccl/bench_covering_test.go +++ b/pkg/ccl/backupccl/bench_covering_test.go @@ -13,6 +13,7 @@ import ( fmt "fmt" "testing" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/randutil" ) @@ -58,7 +59,11 @@ func BenchmarkRestoreEntryCover(b *testing.B) { if err := checkCoverage(ctx, backups[numBackups-1].Spans, backups); err != nil { b.Fatal(err) } - cov := makeSimpleImportSpans(backups[numBackups-1].Spans, backups, nil, nil, 0) + restoreData := restorationDataBase{ + spans: backups[numBackups-1].Spans, + latestIntros: make([]hlc.Timestamp, len(backups[numBackups-1].Spans)), + } + cov := makeSimpleImportSpans(&restoreData, backups, nil, nil, 0) b.ReportMetric(float64(len(cov)), "coverSize") } }) diff --git a/pkg/ccl/backupccl/restoration_data.go b/pkg/ccl/backupccl/restoration_data.go index 1bb319e7f6d5..b15060808346 100644 --- a/pkg/ccl/backupccl/restoration_data.go +++ b/pkg/ccl/backupccl/restoration_data.go @@ -13,7 +13,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" ) // restorationData specifies the data that is to be restored in a restoration flow. @@ -25,6 +27,10 @@ type restorationData interface { // getSpans returns the data spans that we're restoring into this cluster. getSpans() []roachpb.Span + // getLastIntros returns the end time of the last backup that reintroduced + // span i. + getLatestIntros() []hlc.Timestamp + // getSystemTables returns nil for non-cluster restores. It returns the // descriptors of the temporary system tables that should be restored into the // real table descriptors. The data for these temporary tables should be @@ -66,6 +72,10 @@ func (*mainRestorationData) isMainBundle() bool { return true } type restorationDataBase struct { // spans is the spans included in this bundle. spans []roachpb.Span + + // latestIntros is the last time each span was introduced. + latestIntros []hlc.Timestamp + // rekeys maps old table IDs to their new table descriptor. tableRekeys []execinfrapb.TableRekey // tenantRekeys maps tenants being restored to their new ID. @@ -105,6 +115,11 @@ func (b *restorationDataBase) getSpans() []roachpb.Span { return b.spans } +// getLastReIntros implements restorationData. +func (b *restorationDataBase) getLatestIntros() []hlc.Timestamp { + return b.latestIntros +} + // getSystemTables implements restorationData. func (b *restorationDataBase) getSystemTables() []catalog.TableDescriptor { return b.systemTables @@ -114,6 +129,7 @@ func (b *restorationDataBase) getSystemTables() []catalog.TableDescriptor { func (b *restorationDataBase) addTenant(fromTenantID, toTenantID roachpb.TenantID) { prefix := keys.MakeTenantPrefix(fromTenantID) b.spans = append(b.spans, roachpb.Span{Key: prefix, EndKey: prefix.PrefixEnd()}) + b.latestIntros = append(b.latestIntros, hlc.Timestamp{}) b.tenantRekeys = append(b.tenantRekeys, execinfrapb.TenantRekey{ OldID: fromTenantID, NewID: toTenantID, @@ -148,3 +164,21 @@ func checkForMigratedData(details jobspb.RestoreDetails, dataToRestore restorati return false } + +// findLatestIntroBySpan finds the latest intro time for the inputted spans. +// This function assumes that each span's start and end key belong to the same +// index. +func findLatestIntroBySpan( + spans roachpb.Spans, codec keys.SQLCodec, latestIntros map[tableAndIndex]hlc.Timestamp, +) ([]hlc.Timestamp, error) { + latestIntrosBySpan := make([]hlc.Timestamp, len(spans)) + for i, sp := range spans { + _, tablePrefix, indexPrefix, err := codec.DecodeIndexPrefix(sp.Key) + if err != nil { + return nil, err + } + introKey := tableAndIndex{descpb.ID(tablePrefix), descpb.IndexID(indexPrefix)} + latestIntrosBySpan[i] = latestIntros[introKey] + } + return latestIntrosBySpan, nil +} diff --git a/pkg/ccl/backupccl/restore_job.go b/pkg/ccl/backupccl/restore_job.go index c9ad3567361b..70b0642c93c8 100644 --- a/pkg/ccl/backupccl/restore_job.go +++ b/pkg/ccl/backupccl/restore_job.go @@ -274,9 +274,8 @@ func restore( // which are grouped by keyrange. highWaterMark := job.Progress().Details.(*jobspb.Progress_Restore).Restore.HighWater - importSpans := makeSimpleImportSpans(dataToRestore.getSpans(), backupManifests, backupLocalityMap, - highWaterMark, targetRestoreSpanSize) - + importSpans := makeSimpleImportSpans(dataToRestore, backupManifests, + backupLocalityMap, highWaterMark, targetRestoreSpanSize) if len(importSpans) == 0 { // There are no files to restore. return emptyRowCount, nil @@ -651,6 +650,28 @@ func spansForAllRestoreTableIndexes( return spans } +// findLatestIntroFromManifests finds the endtime of the latest incremental +// backup that introduced each backed up index, as of restore time. +func findLatestIntroFromManifests( + manifests []backuppb.BackupManifest, codec keys.SQLCodec, asOf hlc.Timestamp, +) (map[tableAndIndex]hlc.Timestamp, error) { + latestIntro := make(map[tableAndIndex]hlc.Timestamp) + for _, b := range manifests { + if !asOf.IsEmpty() && asOf.Less(b.StartTime) { + break + } + for _, sp := range b.IntroducedSpans { + _, tablePrefix, indexPrefix, err := codec.DecodeIndexPrefix(sp.Key) + if err != nil { + return nil, err + } + introKey := tableAndIndex{descpb.ID(tablePrefix), descpb.IndexID(indexPrefix)} + latestIntro[introKey] = b.EndTime + } + } + return latestIntro, nil +} + func shouldPreRestore(table *tabledesc.Mutable) bool { if table.GetParentID() != keys.SystemDatabaseID { return false @@ -695,6 +716,7 @@ func createImportingDescriptors( p sql.JobExecContext, backupCodec keys.SQLCodec, sqlDescs []catalog.Descriptor, + latestIntrosByIndex map[tableAndIndex]hlc.Timestamp, r *restoreResumer, ) ( dataToPreRestore *restorationDataBase, @@ -1240,11 +1262,21 @@ func createImportingDescriptors( pkIDs[roachpb.BulkOpSummaryID(uint64(tbl.GetID()), uint64(tbl.GetPrimaryIndexID()))] = true } + preRestoreLatestIntros, err := findLatestIntroBySpan(preRestoreSpans, backupCodec, latestIntrosByIndex) + if err != nil { + return nil, nil, nil, err + } dataToPreRestore = &restorationDataBase{ spans: preRestoreSpans, tableRekeys: rekeys, tenantRekeys: tenantRekeys, pkIDs: pkIDs, + latestIntros: preRestoreLatestIntros, + } + + postRestoreLatestIntros, err := findLatestIntroBySpan(postRestoreSpans, backupCodec, latestIntrosByIndex) + if err != nil { + return nil, nil, nil, err } trackedRestore = &mainRestorationData{ @@ -1253,6 +1285,7 @@ func createImportingDescriptors( tableRekeys: rekeys, tenantRekeys: tenantRekeys, pkIDs: pkIDs, + latestIntros: postRestoreLatestIntros, }, } @@ -1264,6 +1297,12 @@ func createImportingDescriptors( if details.VerifyData { trackedRestore.restorationDataBase.spans = verifySpans trackedRestore.restorationDataBase.validateOnly = true + verifySpansLatestIntros, err := findLatestIntroBySpan(verifySpans, backupCodec, + latestIntrosByIndex) + if err != nil { + return nil, nil, nil, err + } + trackedRestore.latestIntros = verifySpansLatestIntros // Before the main (validation) flow, during a cluster level restore, // we still need to restore system tables that do NOT get restored in the dataToPreRestore @@ -1275,6 +1314,7 @@ func createImportingDescriptors( preValidation.spans = postRestoreSpans preValidation.tableRekeys = rekeys preValidation.pkIDs = pkIDs + preValidation.latestIntros = postRestoreLatestIntros } if tempSystemDBID != descpb.InvalidID { @@ -1432,9 +1472,12 @@ func (r *restoreResumer) doResume(ctx context.Context, execCtx interface{}) erro if err != nil { return err } - + latestIntrosByIndex, err := findLatestIntroFromManifests(backupManifests, backupCodec, details.EndTime) + if err != nil { + return err + } preData, preValidateData, mainData, err := createImportingDescriptors(ctx, p, backupCodec, - sqlDescs, r) + sqlDescs, latestIntrosByIndex, r) if err != nil { return err } diff --git a/pkg/ccl/backupccl/restore_span_covering.go b/pkg/ccl/backupccl/restore_span_covering.go index b47cc37e2777..6da6c7732942 100644 --- a/pkg/ccl/backupccl/restore_span_covering.go +++ b/pkg/ccl/backupccl/restore_span_covering.go @@ -72,7 +72,7 @@ const targetRestoreSpanSize = 384 << 20 // if its current data size plus that of the new span is less than the target // size. func makeSimpleImportSpans( - requiredSpans []roachpb.Span, + data restorationData, backups []backuppb.BackupManifest, backupLocalityMap map[int]storeByLocalityKV, lowWaterMark roachpb.Key, @@ -85,9 +85,10 @@ func makeSimpleImportSpans( for i := range backups { sort.Sort(backupinfo.BackupFileDescriptors(backups[i].Files)) } - var cover []execinfrapb.RestoreSpanEntry - for _, span := range requiredSpans { + spans := data.getSpans() + latestIntros := data.getLatestIntros() + for spanIdx, span := range spans { if span.EndKey.Compare(lowWaterMark) < 0 { continue } @@ -96,8 +97,14 @@ func makeSimpleImportSpans( } spanCoverStart := len(cover) - for layer := range backups { + if backups[layer].EndTime.Less(latestIntros[spanIdx]) { + // Don't use this backup to cover this span if the span was reintroduced + // after the backup's endTime. In this case, this backup may + // have invalid data, and further, a subsequent backup will contain all of + // this span's data. + continue + } covPos := spanCoverStart // lastCovSpanSize is the size of files added to the right-most span of diff --git a/pkg/ccl/backupccl/restore_span_covering_test.go b/pkg/ccl/backupccl/restore_span_covering_test.go index bea046ee8950..932929b4ba87 100644 --- a/pkg/ccl/backupccl/restore_span_covering_test.go +++ b/pkg/ccl/backupccl/restore_span_covering_test.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/ccl/backupccl/backuppb" "github.com/cockroachdb/cockroach/pkg/cloud/cloudpb" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" "github.com/cockroachdb/cockroach/pkg/util/encoding" @@ -42,7 +43,7 @@ func MockBackupChain(length, spans, baseFiles int, r *rand.Rand) []backuppb.Back if i > 0 { backups[i].StartTime = backups[i-1].EndTime if i%3 == 0 { - backups[i].IntroducedSpans = roachpb.Spans{backups[i].Spans[spans-1]} + backups[i].IntroducedSpans = roachpb.Spans{backups[i].Spans[r.Intn(spans)]} } } @@ -84,15 +85,20 @@ func MockBackupChain(length, spans, baseFiles int, r *rand.Rand) []backuppb.Back // thus sensitive to ordering; the coverage correctness check however is not. func checkRestoreCovering( backups []backuppb.BackupManifest, - spans roachpb.Spans, + data restorationData, cov []execinfrapb.RestoreSpanEntry, merged bool, ) error { var expectedPartitions int required := make(map[string]*roachpb.SpanGroup) - for _, s := range spans { + spans := data.getSpans() + latestIntros := data.getLatestIntros() + for sIdx, s := range spans { var last roachpb.Key for _, b := range backups { + if b.EndTime.Less(latestIntros[sIdx]) { + continue + } for _, f := range b.Files { if sp := s.Intersect(f.Span); sp.Valid() { if required[f.Path] == nil { @@ -150,15 +156,22 @@ func TestRestoreEntryCoverExample(t *testing.T) { {Files: []backuppb.BackupManifest_File{f("a", "h", "6"), f("j", "k", "7")}}, {Files: []backuppb.BackupManifest_File{f("h", "i", "8"), f("l", "m", "9")}}, } + latestIntros := []hlc.Timestamp{{WallTime: 3}, {}, {}} - // Pretend every span has 1MB. for i := range backups { + backups[i].StartTime = hlc.Timestamp{WallTime: int64(i)} + backups[i].EndTime = hlc.Timestamp{WallTime: int64(i + 1)} + for j := range backups[i].Files { + // Pretend every span has 1MB. backups[i].Files[j].EntryCounts.DataSize = 1 << 20 } } - - cover := makeSimpleImportSpans(spans, backups, nil, nil, noSpanTargetSize) + restoreData := restorationDataBase{ + spans: spans, + latestIntros: make([]hlc.Timestamp, len(spans)), + } + cover := makeSimpleImportSpans(&restoreData, backups, nil, nil, noSpanTargetSize) require.Equal(t, []execinfrapb.RestoreSpanEntry{ {Span: sp("a", "c"), Files: paths("1", "4", "6")}, {Span: sp("c", "e"), Files: paths("2", "4", "6")}, @@ -167,13 +180,21 @@ func TestRestoreEntryCoverExample(t *testing.T) { {Span: sp("l", "m"), Files: paths("9")}, }, cover) - coverSized := makeSimpleImportSpans(spans, backups, nil, nil, 2<<20) + coverSized := makeSimpleImportSpans(&restoreData, backups, nil, nil, 2<<20) require.Equal(t, []execinfrapb.RestoreSpanEntry{ {Span: sp("a", "f"), Files: paths("1", "2", "4", "6")}, {Span: sp("f", "i"), Files: paths("3", "5", "6", "8")}, {Span: sp("l", "m"), Files: paths("9")}, }, coverSized) + restoreData.latestIntros = latestIntros + coverTimeFiltering := makeSimpleImportSpans(&restoreData, backups, nil, nil, 2<<20) + require.Equal(t, []execinfrapb.RestoreSpanEntry{ + {Span: sp("a", "f"), Files: paths("6")}, + {Span: sp("f", "i"), Files: paths("3", "5", "6", "8")}, + {Span: sp("l", "m"), Files: paths("9")}, + }, coverTimeFiltering) + } func TestRestoreEntryCover(t *testing.T) { @@ -184,10 +205,20 @@ func TestRestoreEntryCover(t *testing.T) { for _, spans := range []int{1, 2, 3, 5, 9, 11, 12} { for _, files := range []int{0, 1, 2, 3, 4, 10, 12, 50} { backups := MockBackupChain(numBackups, spans, files, r) + latestIntrosByIndex, err := findLatestIntroFromManifests(backups, keys.SystemSQLCodec, hlc.Timestamp{}) + require.NoError(t, err) + latestIntrosBySpan, err := findLatestIntroBySpan(backups[numBackups-1].Spans, keys.SystemSQLCodec, latestIntrosByIndex) + require.NoError(t, err) + for _, target := range []int64{0, 1, 4, 100, 1000} { t.Run(fmt.Sprintf("numBackups=%d, numSpans=%d, numFiles=%d, merge=%d", numBackups, spans, files, target), func(t *testing.T) { - cover := makeSimpleImportSpans(backups[numBackups-1].Spans, backups, nil, nil, target<<20) - if err := checkRestoreCovering(backups, backups[numBackups-1].Spans, cover, target != noSpanTargetSize); err != nil { + restoreData := restorationDataBase{ + spans: backups[numBackups-1].Spans, + latestIntros: latestIntrosBySpan, + } + cover := makeSimpleImportSpans(&restoreData, backups, nil, nil, + target<<20) + if err := checkRestoreCovering(backups, &restoreData, cover, target != noSpanTargetSize); err != nil { t.Fatal(err) } }) diff --git a/pkg/ccl/backupccl/targets.go b/pkg/ccl/backupccl/targets.go index 61bb2f1b9744..1284331a20c9 100644 --- a/pkg/ccl/backupccl/targets.go +++ b/pkg/ccl/backupccl/targets.go @@ -489,15 +489,17 @@ func MakeBackupTableEntry( if err := checkCoverage(ctx, []roachpb.Span{tablePrimaryIndexSpan}, backupManifests); err != nil { return BackupTableEntry{}, errors.Wrapf(err, "making spans for table %s", fullyQualifiedTableName) } - + restoreData := restorationDataBase{ + spans: []roachpb.Span{tablePrimaryIndexSpan}, + latestIntros: make([]hlc.Timestamp, 1), + } entry := makeSimpleImportSpans( - []roachpb.Span{tablePrimaryIndexSpan}, + &restoreData, backupManifests, nil, /*backupLocalityInfo*/ roachpb.Key{}, /*lowWaterMark*/ targetRestoreSpanSize, ) - lastSchemaChangeTime := findLastSchemaChangeTime(backupManifests, tbDesc, endTime) backupTableEntry := BackupTableEntry{ diff --git a/pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback b/pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback new file mode 100644 index 000000000000..8264039021b9 --- /dev/null +++ b/pkg/ccl/backupccl/testdata/backup-restore/in-progress-import-rollback @@ -0,0 +1,333 @@ +# test that we properly fully backup an offline span when it can be non-mvcc +# +# TODO(msbutler): waiting for https://github.com/cockroachdb/cockroach/pull/86689 to land +# Part 1 - ensure clear range induces full reintroduction of spans +# - begin import jobs and pause it +# - run inc backup - verify inc has captured the data +# - roll it back it back non-mvcc +# - run an inc backup and ensure we reintroduce the table spans + +new-server name=s1 +---- + +########### +# Case 1: an incremental backup captures a non-mvcc rollback +########### + +exec-sql +CREATE DATABASE d; +USE d; +CREATE TABLE foo (i INT PRIMARY KEY, s STRING); +CREATE TABLE foofoo (i INT PRIMARY KEY, s STRING); +INSERT INTO foofoo VALUES (10, 'x0'); +CREATE TABLE baz (i INT PRIMARY KEY, s STRING); +INSERT INTO baz VALUES (1, 'x'),(2,'y'),(3,'z'); +---- + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest'; +---- + + +exec-sql +SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = false; +---- + + +exec-sql +EXPORT INTO CSV 'nodelocal://0/export1/' FROM SELECT * FROM baz; +---- + + +# Pause the import job, in order to back up the importing data. +import expect-pausepoint tag=a +IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +import expect-pausepoint tag=aa +IMPORT INTO foofoo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +# Ensure table, database, and cluster full backups capture importing rows. +exec-sql +BACKUP INTO 'nodelocal://0/cluster/'; +---- + + +exec-sql +BACKUP DATABASE d INTO 'nodelocal://0/database/'; +---- + +exec-sql +BACKUP TABLE d.* INTO 'nodelocal://0/table/'; +---- + + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = ''; +---- + + +# Resume the job so the next set of incremental backups observes that tables are back online +job cancel=a +---- + +job cancel=aa +---- + +job tag=a wait-for-state=cancelled +---- + + +job tag=aa wait-for-state=cancelled +---- + + +# Ensure incremental backups backup the newly online spans from ts=0, as the +# import was rolled back via non-mvcc clear range. So, backup 0 rows from foo +# (it was empty pre-import), and 1 row from foo (had 1 row pre-import); +exec-sql +BACKUP INTO LATEST IN 'nodelocal://0/cluster/'; +---- + +exec-sql +BACKUP DATABASE d INTO LATEST IN 'nodelocal://0/database/'; +---- + + +exec-sql +BACKUP TABLE d.* INTO LATEST IN 'nodelocal://0/table/'; +---- + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/cluster/'] +WHERE + object_name = 'foo' or object_name = 'foofoo' +ORDER BY + start_time, database_name; +---- +d foo table 3 full +d foofoo table 4 full +d foo table 0 incremental +d foofoo table 1 incremental + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/database/'] +WHERE + object_name = 'foo' or object_name = 'foofoo' +ORDER BY + start_time, database_name; +---- +d foo table 3 full +d foofoo table 4 full +d foo table 0 incremental +d foofoo table 1 incremental + + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/table/'] +WHERE + object_name = 'foo' or object_name = 'foofoo' +ORDER BY + start_time, database_name; +---- +d foo table 3 full +d foofoo table 4 full +d foo table 0 incremental +d foofoo table 1 incremental + + +query-sql +SELECT count(*) FROM d.foo; +---- +0 + + +query-sql +SELECT count(*) FROM d.foofoo; +---- +1 + + +# To verify the incremental backed up the pre-import state table, restore d and ensure all tables +# are in their pre-import state. + +exec-sql +RESTORE DATABASE d FROM LATEST IN 'nodelocal://0/database/' with new_db_name=d2; +---- + + +query-sql +SELECT count(*) FROM d2.foo; +---- +0 + + +query-sql +SELECT count(*) FROM d2.foofoo; +---- +1 + +########### +# Case 2: an incremental backup captures an mvcc rollback +########### + +exec-sql +DROP DATABASE d2; +CREATE TABLE foo2 (i INT PRIMARY KEY, s STRING); +CREATE TABLE foofoo2 (i INT PRIMARY KEY, s STRING); +INSERT INTO foofoo2 VALUES (10, 'x0'); +---- + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest'; +---- + + +exec-sql +SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = true; +---- + +# Pause the import job, in order to back up the importing data. +import expect-pausepoint tag=b +IMPORT INTO foo2 (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +import expect-pausepoint tag=bb +IMPORT INTO foofoo2 (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') +---- +job paused at pausepoint + + +# Ensure table, database, and cluster full backups capture importing rows. +exec-sql +BACKUP INTO 'nodelocal://0/cluster/'; +---- + + +exec-sql +BACKUP DATABASE d INTO 'nodelocal://0/database/'; +---- + +exec-sql +BACKUP TABLE d.* INTO 'nodelocal://0/table/'; +---- + + +exec-sql +SET CLUSTER SETTING jobs.debug.pausepoints = ''; +---- + + +# Resume the job so the next set of incremental backups observes that tables are back online +job cancel=b +---- + +job cancel=bb +---- + +job tag=b wait-for-state=cancelled +---- + + +job tag=bb wait-for-state=cancelled +---- + +# Ensure incremental backups backup the mvcc delete tombstones from the import rollbacks +# NOTE: because foo2 had no data pre import, an mvcc range tombstone will delete the imported data. +# The incremental backup will capture this range tombstone, however, SHOW BACKUP currently will +# not record this range as a "row" in the backup. foofoo2 will get deleted with point +# tombstones, which will show up as "rows" in show backup. + +exec-sql +BACKUP INTO LATEST IN 'nodelocal://0/cluster/'; +---- + +exec-sql +BACKUP DATABASE d INTO LATEST IN 'nodelocal://0/database/'; +---- + + +exec-sql +BACKUP TABLE d.* INTO LATEST IN 'nodelocal://0/table/'; +---- + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/cluster/'] +WHERE + object_name = 'foo2' or object_name = 'foofoo2' +ORDER BY + start_time, database_name; +---- +d foo2 table 3 full +d foofoo2 table 4 full +d foo2 table 3 incremental +d foofoo2 table 7 incremental + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/database/'] +WHERE + object_name = 'foo2' or object_name = 'foofoo2' +ORDER BY + start_time, database_name; +---- +d foo2 table 3 full +d foofoo2 table 4 full +d foo2 table 3 incremental +d foofoo2 table 7 incremental + + +query-sql +SELECT + database_name, object_name, object_type, rows, backup_type +FROM + [SHOW BACKUP FROM LATEST IN 'nodelocal://0/table/'] +WHERE + object_name = 'foo2' or object_name = 'foofoo2' +ORDER BY + start_time, database_name; +---- +d foo2 table 3 full +d foofoo2 table 4 full +d foo2 table 3 incremental +d foofoo2 table 7 incremental + +# To verify the incremental backup captured the tombstones, restore d and ensure all tables +# are in their pre-import state. + +exec-sql +RESTORE DATABASE d FROM LATEST IN 'nodelocal://0/database/' with new_db_name=d2; +---- + + +query-sql +SELECT count(*) FROM d2.foo2; +---- +0 + + +query-sql +SELECT count(*) FROM d2.foofoo2; +---- +1