Skip to content

Commit

Permalink
Merge #67497
Browse files Browse the repository at this point in the history
67497: backupccl: stop splitting at invalid split keys r=pbardea a=pbardea

Restore could perform invalid splits during the split and scatter phase.
This commit ensures that splits are always performed at safe keys.

Fixes #67488.

Release note (bug fix): Fix a bug where restores of data with multiple
column families could be split illegally (within a single SQL row). This
could result in temporary data unavailability until the ranges on either
side of the invalid split are merged.

Co-authored-by: Paul Bardea <[email protected]>
  • Loading branch information
craig[bot] and pbardea committed Jul 27, 2021
2 parents d8ba460 + 5bea8fe commit 1375a40
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
10 changes: 10 additions & 0 deletions pkg/ccl/backupccl/split_and_scatter_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ func (s dbSplitAndScatterer) split(
if err != nil {
return err
}
if splitAt, err := keys.EnsureSafeSplitKey(newSplitKey); err != nil {
// Ignore the error, not all keys are table keys.
} else if len(splitAt) != 0 {
newSplitKey = splitAt
}
log.VEventf(ctx, 1, "presplitting new key %+v", newSplitKey)
if err := s.db.AdminSplit(ctx, newSplitKey, expirationTime); err != nil {
return errors.Wrapf(err, "splitting key %s", newSplitKey)
Expand All @@ -107,6 +112,11 @@ func (s dbSplitAndScatterer) scatter(
if err != nil {
return 0, err
}
if scatterAt, err := keys.EnsureSafeSplitKey(newScatterKey); err != nil {
// Ignore the error, not all keys are table keys.
} else if len(scatterAt) != 0 {
newScatterKey = scatterAt
}

log.VEventf(ctx, 1, "scattering new key %+v", newScatterKey)
req := &roachpb.AdminScatterRequest{
Expand Down
29 changes: 29 additions & 0 deletions pkg/ccl/backupccl/testdata/backup-restore/column-families
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
new-server name=s1 localities=us-east-1,us-west-1,us-west-2,eu-central-1
----

exec-sql
CREATE DATABASE orig;
USE orig;
CREATE TABLE cfs (a INT PRIMARY KEY, b STRING, c STRING, d STRING, FAMILY (b), FAMILY (c));
INSERT INTO cfs SELECT x, repeat('abc', 100), repeat('abc', 100) FROM generate_series(0, 3) AS x;
ALTER TABLE cfs SPLIT AT SELECT a FROM cfs;
-- Split the output files very small to catch output SSTs mid-row.
SET CLUSTER SETTING kv.bulk_sst.target_size = '1';
BACKUP cfs TO 'nodelocal://1/foo';
CREATE DATABASE r1;
RESTORE cfs FROM 'nodelocal://1/foo' WITH into_db='r1';
----

query-sql
SELECT max(length(start_key)) FROM [SHOW RANGES FROM TABLE orig.cfs];
----
2

# Regression test for #67488.
# This can return up to 6 if RESTORE improperly splits the ranges in the middle
# of a SQL row since column family keys are longer. E.g. Keys are expected to be
# of the form '/1' (row with PK 1), and not '/3/1/1' (row with PK 3, and CF 1).
query-sql
SELECT max(length(start_key)) FROM [SHOW RANGES FROM TABLE r1.cfs];
----
2

0 comments on commit 1375a40

Please sign in to comment.