Skip to content

Commit

Permalink
Merge #64285
Browse files Browse the repository at this point in the history
64285: cliccl/debug_backup.go: add `--with-revisions` flag to allow debugging revisions of data r=pbardea a=Elliebababa

This patch adds `--with-revisions` on `debug export` to allow users
to export revisions of table data. If `--with-revisions` is specified,
revisions of data are returned to users, with an extra column
displaying the revision time of that record.

Note: 
Schema changes make it hard to maintain correct display of columns at different timestamps in one shot,
So this feature:
- do not display schema changes across revisions. 
- display data changes since last schema change only. 

That is, `--with-revisons` only display *part of* revisions history instead of *all* revision history.
We migitate this by giving message saying `DETECTED SCHEMA CHANGE AT t1, ONLY SHOWING UPDATES IN RANGE [t1,t2]`.
Then users are able to take exploratory steps to trace back to revision history of data by repeatedly running 
`cockroach debug backup export <backup_url> --table=<table> --with-revisions --up-to=t1-1`

----------------------------------------

Example usage:
```
$cockroach debug backup export <backup_url> --table=<table> --with-revisions 
Data changes happened between t1 and t2 :
DETECTED SCHEMA CHANGE AT t1, ONLY SHOWING UPDATES IN RANGE [t1,t2]
1,null,2021-04-22 18:12:47.685284 +0000 UTC
2,'3rd update',2021-04-22 18:13:41.27284 +0000 UTC
2,'2nd update',2021-04-22 18:13:34.718665 +0000 UTC
2,'1st update',2021-04-22 18:13:28.966741 +0000 UTC
2,null,2021-04-22 18:12:50.053996 +0000 UTC
```
```
$cockroach debug backup export <backup_url> --table=<table> --with-revisions --up-to=’2021-04-22 18:13:40’
DETECTED SCHEMA CHANGE AT t1, ONLY SHOWING UPDATES IN RANGE [t1,2021-04-22 18:13:40]
1,null,2021-04-22 18:12:47.685284 +0000 UTC
2,'2nd update',2021-04-22 18:13:34.718665 +0000 UTC
2,'1st update',2021-04-22 18:13:28.966741 +0000 UTC
2,null,2021-04-22 18:12:50.053996 +0000 UTC
```
*t1 is the last schema changes before the time specified by --up-to*
*t2 is the endtime of backup*

--------------------

Release note (cli change): 
This is an experimenal/beta feature of backup debug tool to
 allow users to export revisions of data from backup. We add 
`--with-revisions` on `debug export` to allow users to  export 
revisions of table data. If `--with-revisions` is specified, revisions
of data are returned to users, with an extra column displaying the 
revision time of that record.

Co-authored-by: elliebababa <[email protected]>
  • Loading branch information
craig[bot] and Elliebababa committed May 6, 2021
2 parents 42bc239 + 75f22bc commit 2b84142
Show file tree
Hide file tree
Showing 6 changed files with 300 additions and 41 deletions.
37 changes: 34 additions & 3 deletions pkg/ccl/backupccl/targets.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ package backupccl

import (
"context"
"reflect"
"sort"
"strings"

Expand Down Expand Up @@ -491,9 +492,10 @@ type EntryFiles []roachpb.ImportRequest_File
// from backup manifests.
// exported to cliccl for exporting data directly from backup sst.
type BackupTableEntry struct {
Desc catalog.TableDescriptor
Span roachpb.Span
Files []EntryFiles
Desc catalog.TableDescriptor
Span roachpb.Span
Files []EntryFiles
LastSchemaChangeTime hlc.Timestamp
}

// MakeBackupTableEntry looks up the descriptor of fullyQualifiedTableName
Expand Down Expand Up @@ -568,10 +570,13 @@ func MakeBackupTableEntry(
return BackupTableEntry{}, errors.Wrapf(err, "making spans for table %s", fullyQualifiedTableName)
}

lastSchemaChangeTime := findLastSchemaChangeTime(backupManifests, tbDesc, endTime)

backupTableEntry := BackupTableEntry{
tbDesc,
tablePrimaryIndexSpan,
make([]EntryFiles, 0),
lastSchemaChangeTime,
}

for _, e := range entry {
Expand All @@ -580,3 +585,29 @@ func MakeBackupTableEntry(

return backupTableEntry, nil
}

func findLastSchemaChangeTime(
backupManifests []BackupManifest, tbDesc catalog.TableDescriptor, endTime hlc.Timestamp,
) hlc.Timestamp {
lastSchemaChangeTime := endTime
for i := len(backupManifests) - 1; i >= 0; i-- {
manifest := backupManifests[i]
for j := len(manifest.DescriptorChanges) - 1; j >= 0; j-- {
rev := manifest.DescriptorChanges[j]

if endTime.LessEq(rev.Time) {
continue
}

if rev.ID == tbDesc.GetID() {
d := catalogkv.NewBuilder(rev.Desc).BuildExistingMutable()
revDesc, _ := catalog.AsTableDescriptor(d)
if !reflect.DeepEqual(revDesc.PublicColumns(), tbDesc.PublicColumns()) {
return lastSchemaChangeTime
}
lastSchemaChangeTime = rev.Time
}
}
}
return lastSchemaChangeTime
}
1 change: 1 addition & 0 deletions pkg/ccl/cliccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ go_library(
"//pkg/server",
"//pkg/settings/cluster",
"//pkg/sql/catalog",
"//pkg/sql/catalog/colinfo",
"//pkg/sql/catalog/descpb",
"//pkg/sql/catalog/tabledesc",
"//pkg/sql/row",
Expand Down
73 changes: 66 additions & 7 deletions pkg/ccl/cliccl/debug_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/sql/row"
Expand All @@ -54,6 +55,10 @@ import (
"github.com/spf13/cobra"
)

const (
backupOptRevisionHistory = "revision_history"
)

type key struct {
rawByte []byte
typ string
Expand Down Expand Up @@ -115,6 +120,7 @@ var debugBackupArgs struct {
nullas string
maxRows int
startKey key
withRevisions bool

rowCount int
}
Expand All @@ -132,6 +138,7 @@ func setDebugContextDefault() {
debugBackupArgs.maxRows = 0
debugBackupArgs.startKey = key{}
debugBackupArgs.rowCount = 0
debugBackupArgs.withRevisions = false
}

func init() {
Expand Down Expand Up @@ -232,7 +239,18 @@ func init() {
cliflags.StartKey.Name,
cliflags.StartKey.Usage())

cli.DebugCmd.AddCommand(backupCmds)
exportDataCmd.Flags().BoolVar(
&debugBackupArgs.withRevisions,
cliflags.ExportRevisions.Name,
false, /*value*/
cliflags.ExportRevisions.Usage())

exportDataCmd.Flags().StringVarP(
&debugBackupArgs.readTime,
cliflags.ExportRevisionsUpTo.Name,
cliflags.ExportRevisionsUpTo.Shorthand,
"", /*value*/
cliflags.ExportRevisionsUpTo.Usage())

backupSubCmds := []*cobra.Command{
showCmd,
Expand All @@ -245,6 +263,7 @@ func init() {
backupCmds.AddCommand(cmd)
cmd.Flags().AddFlagSet(backupFlags)
}
cli.DebugCmd.AddCommand(backupCmds)
}

func newBlobFactory(ctx context.Context, dialing roachpb.NodeID) (blobs.BlobClient, error) {
Expand Down Expand Up @@ -403,7 +422,14 @@ func runExportDataCmd(cmd *cobra.Command, args []string) error {
manifests = append(manifests, manifest)
}

endTime, err := evalAsOfTimestamp(debugBackupArgs.readTime)
if debugBackupArgs.withRevisions && manifests[0].MVCCFilter != backupccl.MVCCFilter_All {
return errors.WithHintf(
errors.Newf("invalid flag: %s", cliflags.ExportRevisions.Name),
"requires backup created with %q", backupOptRevisionHistory,
)
}

endTime, err := evalAsOfTimestamp(debugBackupArgs.readTime, manifests)
if err != nil {
return errors.Wrapf(err, "eval as of timestamp %s", debugBackupArgs.readTime)
}
Expand All @@ -427,9 +453,11 @@ func runExportDataCmd(cmd *cobra.Command, args []string) error {
return nil
}

func evalAsOfTimestamp(readTime string) (hlc.Timestamp, error) {
func evalAsOfTimestamp(
readTime string, manifests []backupccl.BackupManifest,
) (hlc.Timestamp, error) {
if readTime == "" {
return hlc.Timestamp{}, nil
return manifests[len(manifests)-1].EndTime, nil
}
var err error
// Attempt to parse as timestamp.
Expand Down Expand Up @@ -468,8 +496,14 @@ func showData(
}
defer rf.Close(ctx)

if debugBackupArgs.withRevisions {
startT := entry.LastSchemaChangeTime.GoTime().UTC()
endT := endTime.GoTime().UTC()
fmt.Fprintf(os.Stderr, "DETECTED SCHEMA CHANGE AT %s, ONLY SHOWING UPDATES IN RANGE [%s, %s]\n", startT, startT, endT)
}

for _, files := range entry.Files {
if err := processEntryFiles(ctx, rf, files, entry.Span, endTime, writer); err != nil {
if err := processEntryFiles(ctx, rf, files, entry.Span, entry.LastSchemaChangeTime, endTime, writer); err != nil {
return err
}
if debugBackupArgs.maxRows != 0 && debugBackupArgs.rowCount >= debugBackupArgs.maxRows {
Expand Down Expand Up @@ -531,17 +565,31 @@ func makeRowFetcher(
) (row.Fetcher, error) {
var colIdxMap catalog.TableColMap
var valNeededForCol util.FastIntSet
colDescs := make([]catalog.Column, len(entry.Desc.PublicColumns()))
for i, col := range entry.Desc.PublicColumns() {
colIdxMap.Set(col.GetID(), i)
valNeededForCol.Add(i)
colDescs[i] = col
}

if debugBackupArgs.withRevisions {
newIndex := len(entry.Desc.PublicColumns())
newCol, err := entry.Desc.FindColumnWithName(colinfo.MVCCTimestampColumnName)
if err != nil {
return row.Fetcher{}, errors.Wrapf(err, "get mvcc timestamp column")
}
colIdxMap.Set(newCol.GetID(), newIndex)
valNeededForCol.Add(newIndex)
colDescs = append(colDescs, newCol)
}

table := row.FetcherTableArgs{
Spans: []roachpb.Span{entry.Span},
Desc: entry.Desc,
Index: entry.Desc.GetPrimaryIndex(),
ColIdxMap: colIdxMap,
IsSecondaryIndex: false,
Cols: entry.Desc.PublicColumns(),
Cols: colDescs,
ValNeededForCol: valNeededForCol,
}

Expand All @@ -567,6 +615,7 @@ func processEntryFiles(
rf row.Fetcher,
files backupccl.EntryFiles,
span roachpb.Span,
startTime hlc.Timestamp,
endTime hlc.Timestamp,
writer *csv.Writer,
) (err error) {
Expand All @@ -592,7 +641,7 @@ func processEntryFiles(
startKeyMVCC.Key = roachpb.Key(debugBackupArgs.startKey.rawByte)
}
}
kvFetcher := row.MakeBackupSSTKVFetcher(startKeyMVCC, endKeyMVCC, iter, endTime)
kvFetcher := row.MakeBackupSSTKVFetcher(startKeyMVCC, endKeyMVCC, iter, startTime, endTime, debugBackupArgs.withRevisions)

if err := rf.StartScanFrom(ctx, &kvFetcher); err != nil {
return errors.Wrapf(err, "row fetcher starts scan")
Expand All @@ -608,6 +657,16 @@ func processEntryFiles(
}
rowDisplay := make([]string, datums.Len())
for i, datum := range datums {

if debugBackupArgs.withRevisions && i == datums.Len()-1 {
approx, err := tree.DecimalToInexactDTimestamp(datum.(*tree.DDecimal))
if err != nil {
return errors.Wrapf(err, "convert datum %s to mvcc timestamp", datum)
}
rowDisplay[i] = approx.UTC().String()
break
}

if datum == tree.DNull {
rowDisplay[i] = debugBackupArgs.nullas
} else {
Expand Down
Loading

0 comments on commit 2b84142

Please sign in to comment.