From b49ba6c069d38527c57729a43d607a28c7b0eb29 Mon Sep 17 00:00:00 2001 From: adityamaru Date: Thu, 27 Oct 2022 10:40:04 -0400 Subject: [PATCH 1/2] batcheval: refactor MVCCExportToSST to accept a storage.Writer This is a refactor only change that pulls out the logic in `MVCCExportToSST` into `mvccExportToWriter` that accepts a `storage.Writer` interface. This will allow us to pass in a `FingerprintWriter` in a future commit. Informs: #89336 Release note: None --- pkg/kv/kvserver/batcheval/cmd_export.go | 9 ---- pkg/storage/mvcc.go | 61 +++++++++++++++---------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/pkg/kv/kvserver/batcheval/cmd_export.go b/pkg/kv/kvserver/batcheval/cmd_export.go index fa5d1a567b7f..8a4c491c76dd 100644 --- a/pkg/kv/kvserver/batcheval/cmd_export.go +++ b/pkg/kv/kvserver/batcheval/cmd_export.go @@ -26,7 +26,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/errors" - "github.com/gogo/protobuf/types" ) // SSTTargetSizeSetting is the cluster setting name for the @@ -105,14 +104,6 @@ func evalExport( ctx, evalExportSpan := tracing.ChildSpan(ctx, "evalExport") defer evalExportSpan.Finish() - var evalExportTrace types.StringValue - if cArgs.EvalCtx.NodeID() == h.GatewayNodeID { - evalExportTrace.Value = fmt.Sprintf("evaluating Export on gateway node %d", cArgs.EvalCtx.NodeID()) - } else { - evalExportTrace.Value = fmt.Sprintf("evaluating Export on remote node %d", cArgs.EvalCtx.NodeID()) - } - evalExportSpan.RecordStructured(&evalExportTrace) - // Table's marked to be excluded from backup are expected to be configured // with a short GC TTL. Additionally, backup excludes such table's from being // protected from GC when writing ProtectedTimestamp records. The diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index ad62efe0768e..522e3dd29dd5 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -5767,8 +5767,33 @@ func MVCCIsSpanEmpty( } // MVCCExportToSST exports changes to the keyrange [StartKey, EndKey) over the -// interval (StartTS, EndTS] as a Pebble SST. See MVCCExportOptions for options. -// StartTS may be zero. +// interval (StartTS, EndTS] as a Pebble SST. See mvccExportToWriter for more +// details. +func MVCCExportToSST( + ctx context.Context, cs *cluster.Settings, reader Reader, opts MVCCExportOptions, dest io.Writer, +) (roachpb.BulkOpSummary, MVCCKey, error) { + ctx, span := tracing.ChildSpan(ctx, "storage.MVCCExportToSST") + defer span.Finish() + sstWriter := MakeBackupSSTWriter(ctx, cs, dest) + defer sstWriter.Close() + + summary, resumeKey, err := mvccExportToWriter(ctx, reader, opts, &sstWriter) + if err != nil { + return roachpb.BulkOpSummary{}, MVCCKey{}, err + } + + if summary.DataSize == 0 { + // If no records were added to the sstable, skip completing it and return a + // nil slice – the export code will discard it anyway (based on 0 DataSize). + return roachpb.BulkOpSummary{}, MVCCKey{}, nil + } + + return summary, resumeKey, sstWriter.Finish() +} + +// mvccExportToWriter exports changes to the keyrange [StartKey, EndKey) over +// the interval (StartTS, EndTS] to the passed in writer. See MVCCExportOptions +// for options. StartTS may be zero. // // This comes in two principal flavors: all revisions or latest revision only. // In all-revisions mode, exports everything matching the span and time bounds, @@ -5788,16 +5813,14 @@ func MVCCIsSpanEmpty( // intents outside are ignored. // // Returns an export summary and a resume key that allows resuming the export if -// it reached a limit. Data is written to dest as it is collected. If an error -// is returned then dest contents are undefined. -func MVCCExportToSST( - ctx context.Context, cs *cluster.Settings, reader Reader, opts MVCCExportOptions, dest io.Writer, +// it reached a limit. Data is written to the writer as it is collected. If an +// error is returned then the writer's contents are undefined. It is the +// responsibility of the caller to Finish() / Close() the passed in writer. +func mvccExportToWriter( + ctx context.Context, reader Reader, opts MVCCExportOptions, writer Writer, ) (roachpb.BulkOpSummary, MVCCKey, error) { - var span *tracing.Span - ctx, span = tracing.ChildSpan(ctx, "storage.MVCCExportToSST") + ctx, span := tracing.ChildSpan(ctx, "storage.mvccExportToWriter") defer span.Finish() - sstWriter := MakeBackupSSTWriter(ctx, cs, dest) - defer sstWriter.Close() // If we're not exporting all revisions then we can mask point keys below any // MVCC range tombstones, since we don't care about them. @@ -5936,7 +5959,7 @@ func MVCCExportToSST( } // Export only the inner roachpb.Value, not the MVCCValue header. rawValue := mvccValue.Value.RawBytes - if err := sstWriter.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), rawValue); err != nil { + if err := writer.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), rawValue); err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, err } } @@ -6047,11 +6070,11 @@ func MVCCExportToSST( if unsafeKey.Timestamp.IsEmpty() { // This should never be an intent since the incremental iterator returns // an error when encountering intents. - if err := sstWriter.PutUnversioned(unsafeKey.Key, unsafeValue); err != nil { + if err := writer.PutUnversioned(unsafeKey.Key, unsafeValue); err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, errors.Wrapf(err, "adding key %s", unsafeKey) } } else { - if err := sstWriter.PutRawMVCC(unsafeKey, unsafeValue); err != nil { + if err := writer.PutRawMVCC(unsafeKey, unsafeValue); err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, errors.Wrapf(err, "adding key %s", unsafeKey) } } @@ -6109,23 +6132,13 @@ func MVCCExportToSST( } // Export only the inner roachpb.Value, not the MVCCValue header. rawValue := mvccValue.Value.RawBytes - if err := sstWriter.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), rawValue); err != nil { + if err := writer.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), rawValue); err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, err } } rows.BulkOpSummary.DataSize += rangeKeysSize } - if rows.BulkOpSummary.DataSize == 0 { - // If no records were added to the sstable, skip completing it and return a - // nil slice – the export code will discard it anyway (based on 0 DataSize). - return roachpb.BulkOpSummary{}, MVCCKey{}, nil - } - - if err := sstWriter.Finish(); err != nil { - return roachpb.BulkOpSummary{}, MVCCKey{}, err - } - return rows.BulkOpSummary, resumeKey, nil } From b0c957dcc99f8c1ba2b22aa8e8676aeb8e9a585a Mon Sep 17 00:00:00 2001 From: adityamaru Date: Fri, 28 Oct 2022 11:56:07 -0400 Subject: [PATCH 2/2] storage: add MVCCExportFingerprint method and fingerprintWriter This change introduces a fingerprintWriter that hashes every key/timestamp and value for point keys, and combines their hashes via a XOR into a running aggregate. Range keys are not fingerprinted but instead written to a pebble SST that is returned to the caller. This is because range keys do not have a stable, discrete identity and so it is up to the caller to define a deterministic fingerprinting scheme across all returned range keys. The fingerprintWriter is used by `MVCCExportFingerprint` that exports a fingerprint for point keys in the keyrange [StartKey, EndKey) over the interval (StartTS, EndTS]. The export logic used by `MVCCExportFingerprint` is the same that drives `MVCCExportToSST`. The former writes to a fingerprintWriter while the latter writes to an sstWriter. Currently, this method only support using an `fnv64` hasher to fingerprint each KV. This change does not wire `MVCCExportFingerprint` to ExportRequest command evaluation. This will be done as a followup. Informs: #89336 Release note: None --- pkg/storage/BUILD.bazel | 1 + pkg/storage/fingerprint_writer.go | 146 +++++ pkg/storage/mvcc.go | 65 +- pkg/storage/mvcc_history_test.go | 32 +- pkg/storage/mvcc_key.go | 17 + pkg/storage/mvcc_key_test.go | 9 + pkg/storage/mvcc_test.go | 345 ++++++++++ pkg/storage/sst_writer.go | 1 + .../mvcc_histories/export_fingerprint | 605 ++++++++++++++++++ 9 files changed, 1212 insertions(+), 9 deletions(-) create mode 100644 pkg/storage/fingerprint_writer.go create mode 100644 pkg/storage/testdata/mvcc_histories/export_fingerprint diff --git a/pkg/storage/BUILD.bazel b/pkg/storage/BUILD.bazel index 47bb6db5dc2a..2b2737ef4571 100644 --- a/pkg/storage/BUILD.bazel +++ b/pkg/storage/BUILD.bazel @@ -13,6 +13,7 @@ go_library( "doc.go", "engine.go", "engine_key.go", + "fingerprint_writer.go", "in_mem.go", "intent_interleaving_iter.go", "intent_reader_writer.go", diff --git a/pkg/storage/fingerprint_writer.go b/pkg/storage/fingerprint_writer.go new file mode 100644 index 000000000000..e52c4d33e295 --- /dev/null +++ b/pkg/storage/fingerprint_writer.go @@ -0,0 +1,146 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package storage + +import ( + "context" + "hash" + "io" + + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/errors" +) + +// fingerprintWriter hashes every key/timestamp and value for point keys, and +// combines their hashes via a XOR into a running aggregate. +// +// Range keys are not fingerprinted but instead written to a pebble SST that is +// returned to the caller. This is because range keys do not have a stable, +// discrete identity and so it is up to the caller to define a deterministic +// fingerprinting scheme across all returned range keys. +// +// The caller must Finish() and Close() the fingerprintWriter to finalize the +// writes to the underlying pebble SST. +type fingerprintWriter struct { + hasher hash.Hash64 + timestampBuf []byte + + sstWriter *SSTWriter + xorAgg *uintXorAggregate +} + +// makeFingerprintWriter creates a new fingerprintWriter. +func makeFingerprintWriter( + ctx context.Context, hasher hash.Hash64, cs *cluster.Settings, f io.Writer, +) fingerprintWriter { + // TODO(adityamaru,dt): Once + // https://github.com/cockroachdb/cockroach/issues/90450 has been addressed we + // should write to a kvBuf instead of a Backup SST writer. + sstWriter := MakeBackupSSTWriter(ctx, cs, f) + return fingerprintWriter{ + sstWriter: &sstWriter, + hasher: hasher, + xorAgg: &uintXorAggregate{}, + } +} + +type uintXorAggregate struct { + sum uint64 +} + +// add inserts one value into the running xor. +func (a *uintXorAggregate) add(x uint64) { + a.sum = a.sum ^ x +} + +// result returns the xor. +func (a *uintXorAggregate) result() uint64 { + return a.sum +} + +// Finish finalizes the underlying SSTWriter, and returns the aggregated +// fingerprint for point keys. +func (f *fingerprintWriter) Finish() (uint64, error) { + // If no records were added to the sstable, skip completing it. + if f.sstWriter.DataSize != 0 { + if err := f.sstWriter.Finish(); err != nil { + return 0, err + } + } + return f.xorAgg.result(), nil +} + +// Close finishes and frees memory and other resources. Close is idempotent. +func (f *fingerprintWriter) Close() { + if f.sstWriter == nil { + return + } + f.sstWriter.Close() + f.hasher.Reset() + f.xorAgg = nil + f.sstWriter = nil +} + +var _ ExportWriter = &fingerprintWriter{} + +// PutRawMVCCRangeKey implements the Writer interface. +func (f *fingerprintWriter) PutRawMVCCRangeKey(key MVCCRangeKey, bytes []byte) error { + // We do not fingerprint range keys, instead, we write them to a Pebble SST. + // This is because range keys do not have a stable, discrete identity and so + // it is up to the caller to define a deterministic fingerprinting scheme + // across all returned range keys. + return f.sstWriter.PutRawMVCCRangeKey(key, bytes) +} + +// PutRawMVCC implements the Writer interface. +func (f *fingerprintWriter) PutRawMVCC(key MVCCKey, value []byte) error { + defer f.hasher.Reset() + + // Hash the key/timestamp and value of the RawMVCC. + if err := f.hash(key.Key); err != nil { + return err + } + f.timestampBuf = EncodeMVCCTimestampToBuf(f.timestampBuf, key.Timestamp) + if err := f.hash(f.timestampBuf); err != nil { + return err + } + if err := f.hash(value); err != nil { + return err + } + f.xorAgg.add(f.hasher.Sum64()) + return nil +} + +// PutUnversioned implements the Writer interface. +func (f *fingerprintWriter) PutUnversioned(key roachpb.Key, value []byte) error { + defer f.hasher.Reset() + + // Hash the key and value in the absence of a timestamp. + if err := f.hash(key); err != nil { + return err + } + if err := f.hash(value); err != nil { + return err + } + + f.xorAgg.add(f.hasher.Sum64()) + return nil +} + +func (f *fingerprintWriter) hash(data []byte) error { + if _, err := f.hasher.Write(data); err != nil { + return errors.NewAssertionErrorWithWrappedErrf(err, + `"It never returns an error." -- https://golang.org/pkg/hash: %T`, f) + } + + return nil +} diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 522e3dd29dd5..57d8825a3ed0 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -14,6 +14,7 @@ import ( "bytes" "context" "fmt" + "hash/fnv" "io" "math" "runtime" @@ -5766,6 +5767,35 @@ func MVCCIsSpanEmpty( return !valid, nil } +// MVCCExportFingerprint exports a fingerprint for point keys in the keyrange +// [StartKey, EndKey) over the interval (StartTS, EndTS]. Each key/timestamp and +// value is hashed using a fnv64 hasher, and combined into a running aggregate +// via a XOR. On completion of the export this aggregate is returned as the +// fingerprint. +// +// Range keys are not fingerprinted but instead written to a pebble SST that is +// returned to the caller. This is because range keys do not have a stable, +// discrete identity and so it is up to the caller to define a deterministic +// fingerprinting scheme across all returned range keys. +func MVCCExportFingerprint( + ctx context.Context, cs *cluster.Settings, reader Reader, opts MVCCExportOptions, dest io.Writer, +) (roachpb.BulkOpSummary, MVCCKey, uint64, error) { + ctx, span := tracing.ChildSpan(ctx, "storage.MVCCExportToSST") + defer span.Finish() + + hasher := fnv.New64() + fingerprintWriter := makeFingerprintWriter(ctx, hasher, cs, dest) + defer fingerprintWriter.Close() + + summary, resumeKey, err := mvccExportToWriter(ctx, reader, opts, &fingerprintWriter) + if err != nil { + return roachpb.BulkOpSummary{}, MVCCKey{}, 0, err + } + + fingerprint, err := fingerprintWriter.Finish() + return summary, resumeKey, fingerprint, err +} + // MVCCExportToSST exports changes to the keyrange [StartKey, EndKey) over the // interval (StartTS, EndTS] as a Pebble SST. See mvccExportToWriter for more // details. @@ -5791,6 +5821,36 @@ func MVCCExportToSST( return summary, resumeKey, sstWriter.Finish() } +// ExportWriter is a trimmed down version of the Writer interface. It contains +// only those methods used during ExportRequest command evaluation. +type ExportWriter interface { + // PutRawMVCCRangeKey writes an MVCC range key with the provided encoded + // MVCCValue. It will replace any overlapping range keys at the given + // timestamp (even partial overlap). Only MVCC range tombstones, i.e. an empty + // value, are currently allowed (other kinds will need additional handling in + // MVCC APIs and elsewhere, e.g. stats and GC). It can be used to avoid + // decoding and immediately re-encoding an MVCCValue, but should generally be + // avoided due to the lack of type safety. + // + // It is safe to modify the contents of the arguments after PutRawMVCCRangeKey + // returns. + PutRawMVCCRangeKey(MVCCRangeKey, []byte) error + // PutRawMVCC sets the given key to the encoded MVCCValue. It requires that + // the timestamp is non-empty (see {PutUnversioned,PutIntent} if the timestamp + // is empty). It can be used to avoid decoding and immediately re-encoding an + // MVCCValue, but should generally be avoided due to the lack of type safety. + // + // It is safe to modify the contents of the arguments after PutRawMVCC + // returns. + PutRawMVCC(key MVCCKey, value []byte) error + // PutUnversioned sets the given key to the value provided. It is for use + // with inline metadata (not intents) and other unversioned keys (like + // Range-ID local keys). + // + // It is safe to modify the contents of the arguments after Put returns. + PutUnversioned(key roachpb.Key, value []byte) error +} + // mvccExportToWriter exports changes to the keyrange [StartKey, EndKey) over // the interval (StartTS, EndTS] to the passed in writer. See MVCCExportOptions // for options. StartTS may be zero. @@ -5817,11 +5877,8 @@ func MVCCExportToSST( // error is returned then the writer's contents are undefined. It is the // responsibility of the caller to Finish() / Close() the passed in writer. func mvccExportToWriter( - ctx context.Context, reader Reader, opts MVCCExportOptions, writer Writer, + ctx context.Context, reader Reader, opts MVCCExportOptions, writer ExportWriter, ) (roachpb.BulkOpSummary, MVCCKey, error) { - ctx, span := tracing.ChildSpan(ctx, "storage.mvccExportToWriter") - defer span.Finish() - // If we're not exporting all revisions then we can mask point keys below any // MVCC range tombstones, since we don't care about them. var rangeKeyMasking hlc.Timestamp diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index ca8789bdb9c8..ec6a1dd365e9 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -97,7 +97,7 @@ var ( // put_rangekey ts=[,] [localTs=[,]] k= end= // get [t=] [ts=[,]] [resolve [status=]] k= [inconsistent] [skipLocked] [tombstones] [failOnMoreRecent] [localUncertaintyLimit=[,]] [globalUncertaintyLimit=[,]] // scan [t=] [ts=[,]] [resolve [status=]] k= [end=] [inconsistent] [skipLocked] [tombstones] [reverse] [failOnMoreRecent] [localUncertaintyLimit=[,]] [globalUncertaintyLimit=[,]] [max=] [targetbytes=] [allowEmpty] -// export [k=] [end=] [ts=[,]] [kTs=[,]] [startTs=[,]] [maxIntents=] [allRevisions] [targetSize=] [maxSize=] [stopMidKey] +// export [k=] [end=] [ts=[,]] [kTs=[,]] [startTs=[,]] [maxIntents=] [allRevisions] [targetSize=] [maxSize=] [stopMidKey] [fingerprint] // // iter_new [k=] [end=] [prefix] [kind=key|keyAndIntents] [types=pointsOnly|pointsWithRanges|pointsAndRanges|rangesOnly] [pointSynthesis] [maskBelow=[,]] // iter_new_incremental [k=] [end=] [startTs=[,]] [endTs=[,]] [types=pointsOnly|pointsWithRanges|pointsAndRanges|rangesOnly] [maskBelow=[,]] [intents=error|aggregate|emit] @@ -1331,22 +1331,44 @@ func cmdExport(e *evalCtx) error { if e.hasArg("maxSize") { e.scanArg("maxSize", &opts.MaxSize) } + var shouldFingerprint bool + if e.hasArg("fingerprint") { + shouldFingerprint = true + } r := e.newReader() defer r.Close() sstFile := &storage.MemFile{} - summary, resume, err := storage.MVCCExportToSST(e.ctx, e.st, r, opts, sstFile) - if err != nil { - return err + + var summary roachpb.BulkOpSummary + var resume storage.MVCCKey + var fingerprint uint64 + var err error + if shouldFingerprint { + summary, resume, fingerprint, err = storage.MVCCExportFingerprint(e.ctx, e.st, r, opts, sstFile) + if err != nil { + return err + } + e.results.buf.Printf("export: %s", &summary) + e.results.buf.Print(" fingerprint=true") + } else { + summary, resume, err = storage.MVCCExportToSST(e.ctx, e.st, r, opts, sstFile) + if err != nil { + return err + } + e.results.buf.Printf("export: %s", &summary) } - e.results.buf.Printf("export: %s", &summary) if resume.Key != nil { e.results.buf.Printf(" resume=%s", resume) } e.results.buf.Printf("\n") + if shouldFingerprint { + e.results.buf.Printf("fingerprint: %d\n", fingerprint) + } + iter, err := storage.NewMemSSTIterator(sstFile.Bytes(), false /* verify */, storage.IterOptions{ KeyTypes: storage.IterKeyTypePointsAndRanges, UpperBound: keys.MaxKey, diff --git a/pkg/storage/mvcc_key.go b/pkg/storage/mvcc_key.go index 8254c8995d23..8aa3e29ccc3a 100644 --- a/pkg/storage/mvcc_key.go +++ b/pkg/storage/mvcc_key.go @@ -258,6 +258,23 @@ func encodeMVCCTimestampSuffixToBuf(buf []byte, ts hlc.Timestamp) []byte { return buf } +// EncodeMVCCTimestampToBuf encodes an MVCC timestamp into its Pebble +// representation, excluding the length suffix and sentinel byte, reusing the +// given byte slice if it has sufficient capacity. +func EncodeMVCCTimestampToBuf(buf []byte, ts hlc.Timestamp) []byte { + tsLen := encodedMVCCTimestampLength(ts) + if tsLen == 0 { + return buf[:0] + } + if cap(buf) < tsLen { + buf = make([]byte, tsLen) + } else { + buf = buf[:tsLen] + } + encodeMVCCTimestampToBuf(buf, ts) + return buf +} + // encodeMVCCTimestampToBuf encodes an MVCC timestamp into its Pebble // representation, excluding the length suffix and sentinel byte. The target // buffer must have the correct size, and the timestamp must not be empty. diff --git a/pkg/storage/mvcc_key_test.go b/pkg/storage/mvcc_key_test.go index f9357a8a219b..f23c6fe7933b 100644 --- a/pkg/storage/mvcc_key_test.go +++ b/pkg/storage/mvcc_key_test.go @@ -179,6 +179,8 @@ func TestEncodeDecodeMVCCKeyAndTimestampWithLength(t *testing.T) { "logical and synthetic": {"foo", hlc.Timestamp{Logical: 65535, Synthetic: true}, "666f6f0000000000000000000000ffff010e"}, "all": {"foo", hlc.Timestamp{WallTime: 1643550788737652545, Logical: 65535, Synthetic: true}, "666f6f0016cf10bc050557410000ffff010e"}, } + + buf := []byte{} for name, tc := range testcases { t.Run(name, func(t *testing.T) { @@ -234,6 +236,13 @@ func TestEncodeDecodeMVCCKeyAndTimestampWithLength(t *testing.T) { decodedTS, err = decodeMVCCTimestamp(encodedTS) require.NoError(t, err) require.Equal(t, tc.ts, decodedTS) + + buf = EncodeMVCCTimestampToBuf(buf, tc.ts) + if expectTS == nil { + require.Empty(t, buf) + } else { + require.Equal(t, expectTS, buf) + } }) } } diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index a148d1821fba..05a63fb86356 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -14,6 +14,7 @@ import ( "bytes" "context" "fmt" + "hash/fnv" "math" "math/rand" "reflect" @@ -6255,6 +6256,350 @@ func TestMVCCExportToSSTSErrorsOnLargeKV(t *testing.T) { require.ErrorAs(t, err, &expectedErr) } +// TestMVCCExportFingerprint verifies that MVCCExportFingerprint correctly +// fingerprints point keys in a given key and time interval, and returns the +// range keys in a pebble SST. +// +// This test uses a `fingerprintOracle` to verify that the fingerprint generated +// by `MVCCExportFingerprint` is what we would get if we iterated over an SST +// with all keys and computed our own fingerprint. +func TestMVCCExportFingerprint(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + st := cluster.MakeTestingClusterSettings() + + fingerprint := func(opts MVCCExportOptions, engine Engine) (uint64, []byte, roachpb.BulkOpSummary, MVCCKey) { + dest := &MemFile{} + var err error + res, resumeKey, fingerprint, err := MVCCExportFingerprint( + ctx, st, engine, opts, dest) + require.NoError(t, err) + return fingerprint, dest.Data(), res, resumeKey + } + + // verifyFingerprintAgainstOracle uses the `fingerprintOracle` to compute a + // fingerprint over the same key and time interval, and ensure our fingerprint + // and range keys match up with that generated by the oracle. + verifyFingerprintAgainstOracle := func( + actualFingerprint uint64, + actualRangekeys []MVCCRangeKeyStack, + opts MVCCExportOptions, + engine Engine) { + oracle := makeFingerprintOracle(st, engine, opts) + expectedFingerprint, expectedRangeKeys := oracle.getFingerprintAndRangeKeys(ctx, t) + require.Equal(t, expectedFingerprint, actualFingerprint) + require.Equal(t, expectedRangeKeys, actualRangekeys) + } + + engine := createTestPebbleEngine() + defer engine.Close() + + kvSize := int64(16) + rangeKeySize := int64(10) + + // Insert some point keys. + // + // 2000 value3 value4 + // + // 1000 value1 value2 + // + // 1 2 3 + var testData = []testValue{ + value(key(1), "value1", ts(1000)), + value(key(2), "value2", ts(1000)), + value(key(2), "value3", ts(2000)), + value(key(3), "value4", ts(2000)), + } + require.NoError(t, fillInData(ctx, engine, testData)) + + // Insert range keys. + // + // 3000 [--- r2 ---) + // + // 2000 value3 value4 [--- r1 ---) + // + // 1000 value1 value2 + // + // 1 2 3 4 5 + require.NoError(t, engine.PutRawMVCCRangeKey(MVCCRangeKey{ + StartKey: key(4), + EndKey: key(5), + Timestamp: ts(2000), + }, []byte{})) + require.NoError(t, engine.PutRawMVCCRangeKey(MVCCRangeKey{ + StartKey: key(1), + EndKey: key(2), + Timestamp: ts(3000), + }, []byte{})) + + testutils.RunTrueAndFalse(t, "allRevisions", func(t *testing.T, allRevisions bool) { + t.Run("no-key-or-ts-bounds", func(t *testing.T) { + opts := MVCCExportOptions{ + StartKey: MVCCKey{Key: key(1)}, + EndKey: keys.MaxKey, + StartTS: hlc.Timestamp{}, + EndTS: hlc.Timestamp{WallTime: 9999}, + ExportAllRevisions: allRevisions, + } + fingerprint, rangeKeySST, summary, resumeKey := fingerprint(opts, engine) + require.Empty(t, resumeKey) + rangeKeys := getRangeKeys(t, rangeKeySST) + if allRevisions { + require.Equal(t, kvSize*4+rangeKeySize*2, summary.DataSize) + require.Equal(t, 2, len(rangeKeys)) + } else { + require.Equal(t, kvSize*2, summary.DataSize) + // StartTime is empty so we don't read rangekeys when not exporting all + // revisions. + require.Empty(t, rangeKeys) + } + verifyFingerprintAgainstOracle(fingerprint, rangeKeys, opts, engine) + }) + + t.Run("key-bounds", func(t *testing.T) { + opts := MVCCExportOptions{ + StartKey: MVCCKey{Key: key(1)}, + EndKey: key(2).Next(), + StartTS: hlc.Timestamp{}, + EndTS: hlc.Timestamp{WallTime: 9999}, + ExportAllRevisions: allRevisions, + } + fingerprint, rangeKeySST, summary, resumeKey := fingerprint(opts, engine) + require.Empty(t, resumeKey) + rangeKeys := getRangeKeys(t, rangeKeySST) + if allRevisions { + require.Equal(t, kvSize*3+rangeKeySize, summary.DataSize) + require.Equal(t, 1, len(rangeKeys)) + } else { + // Rangekey masks the point key 1@1000, so we only see 2@2000. + require.Equal(t, kvSize*1, summary.DataSize) + // StartTime is empty, so we don't read rangekeys when not exporting all + // revisions. + require.Empty(t, rangeKeys) + } + verifyFingerprintAgainstOracle(fingerprint, getRangeKeys(t, rangeKeySST), opts, engine) + }) + + t.Run("outside-point-key-bounds", func(t *testing.T) { + opts := MVCCExportOptions{ + StartKey: MVCCKey{Key: key(3).Next()}, + EndKey: keys.MaxKey, + StartTS: hlc.Timestamp{}, + EndTS: hlc.Timestamp{WallTime: 9999}, + ExportAllRevisions: allRevisions, + } + fingerprint, rangeKeySST, summary, resumeKey := fingerprint(opts, engine) + require.Empty(t, resumeKey) + rangeKeys := getRangeKeys(t, rangeKeySST) + require.Equal(t, uint64(0), fingerprint) + if allRevisions { + require.Equal(t, rangeKeySize, summary.DataSize) + require.Len(t, rangeKeys, 1) + } else { + require.Equal(t, int64(0), summary.DataSize) + require.Empty(t, rangeKeys) + } + verifyFingerprintAgainstOracle(fingerprint, getRangeKeys(t, rangeKeySST), opts, engine) + }) + + t.Run("time-bounds", func(t *testing.T) { + opts := MVCCExportOptions{ + StartKey: MVCCKey{Key: key(1)}, + EndKey: keys.MaxKey, + StartTS: ts(1000).Prev(), + EndTS: ts(1000), + ExportAllRevisions: allRevisions, + } + fingerprint, rangeKeySST, summary, resumeKey := fingerprint(opts, engine) + require.Empty(t, resumeKey) + rangeKeys := getRangeKeys(t, rangeKeySST) + require.Empty(t, rangeKeys) + require.Equal(t, kvSize*2, summary.DataSize) + verifyFingerprintAgainstOracle(fingerprint, getRangeKeys(t, rangeKeySST), opts, engine) + }) + + t.Run("outside-point-key-time-bounds", func(t *testing.T) { + opts := MVCCExportOptions{ + StartKey: MVCCKey{Key: key(1)}, + EndKey: keys.MaxKey, + StartTS: ts(2000), + EndTS: ts(3000), + ExportAllRevisions: allRevisions, + } + fingerprint, rangeKeySST, summary, resumeKey := fingerprint(opts, engine) + require.Empty(t, resumeKey) + rangeKeys := getRangeKeys(t, rangeKeySST) + require.Equal(t, rangeKeySize, summary.DataSize) + require.Len(t, rangeKeys, 1) + require.Equal(t, uint64(0), fingerprint) + verifyFingerprintAgainstOracle(fingerprint, getRangeKeys(t, rangeKeySST), opts, engine) + }) + + t.Run("assert-hash-is-per-kv", func(t *testing.T) { + // Fingerprint point keys 1 and 2. + opts := MVCCExportOptions{ + StartKey: MVCCKey{Key: key(1)}, + EndKey: key(2).Next(), + StartTS: hlc.Timestamp{}, + EndTS: hlc.Timestamp{WallTime: 9999}, + ExportAllRevisions: allRevisions, + } + fingerprint1, _, summary, resumeKey := fingerprint(opts, engine) + require.Empty(t, resumeKey) + if allRevisions { + require.Equal(t, 3*kvSize+rangeKeySize, summary.DataSize) + } else { + // Rangekey masking means we only see 2@2000. + require.Equal(t, kvSize, summary.DataSize) + } + + // Fingerprint point key 3. + opts = MVCCExportOptions{ + StartKey: MVCCKey{Key: key(3)}, + EndKey: keys.MaxKey, + StartTS: hlc.Timestamp{}, + EndTS: hlc.Timestamp{WallTime: 9999}, + ExportAllRevisions: allRevisions, + } + fingerprint2, _, summary2, resumeKey2 := fingerprint(opts, engine) + require.Empty(t, resumeKey2) + if allRevisions { + require.Equal(t, kvSize+rangeKeySize, summary2.DataSize) + } else { + require.Equal(t, kvSize, summary2.DataSize) + } + + // Fingerprint point keys 1 to 3. + opts = MVCCExportOptions{ + StartKey: MVCCKey{Key: key(1)}, + EndKey: keys.MaxKey, + StartTS: hlc.Timestamp{}, + EndTS: hlc.Timestamp{WallTime: 9999}, + ExportAllRevisions: allRevisions, + } + fingerprint3, _, summary3, resumeKey3 := fingerprint(opts, engine) + require.Empty(t, resumeKey3) + if allRevisions { + require.Equal(t, 4*kvSize+2*rangeKeySize, summary3.DataSize) + } else { + require.Equal(t, 2*kvSize, summary3.DataSize) + } + + // Verify that fp3 = fp1 ^ fp2 + require.Equal(t, fingerprint3, fingerprint1^fingerprint2) + }) + }) +} + +type fingerprintOracle struct { + st *cluster.Settings + engine Engine + opts *MVCCExportOptions +} + +// makeFingerprintOracle returns a fingerprintOracle that can be used to check +// the correctness of a fingerprint for point keys. +func makeFingerprintOracle( + st *cluster.Settings, engine Engine, opts MVCCExportOptions, +) *fingerprintOracle { + return &fingerprintOracle{ + opts: &opts, + engine: engine, + st: st, + } +} + +// getFingerprintAndRangeKeys can be used to generate the fingerprint of point +// keys in an interval determined by the supplied `MVCCExportOptions`. This +// fingerprint is generated by exporting the point and range keys to a pebble +// SST using `MVCCExportToSST` and then maintaining a XOR aggregate of the hash +// of every point key in the SST. Range keys are not fingerprinted but instead +// returned as is to the caller. +func (f *fingerprintOracle) getFingerprintAndRangeKeys( + ctx context.Context, t *testing.T, +) (uint64, []MVCCRangeKeyStack) { + t.Helper() + + dest := &MemFile{} + _, _, err := MVCCExportToSST(ctx, f.st, f.engine, *f.opts, dest) + require.NoError(t, err) + return f.fingerprintPointKeys(t, dest.Data()), getRangeKeys(t, dest.Data()) +} + +func (f *fingerprintOracle) fingerprintPointKeys(t *testing.T, dataSST []byte) uint64 { + t.Helper() + + hasher := fnv.New64() + var xorAgg uint64 + iterOpts := IterOptions{ + KeyTypes: IterKeyTypePointsOnly, + LowerBound: keys.LocalMax, + UpperBound: keys.MaxKey, + } + iter, err := NewMemSSTIterator(dataSST, false, iterOpts) + if err != nil { + t.Fatal(err) + } + defer iter.Close() + + for iter.SeekGE(MVCCKey{Key: keys.MinKey}); ; iter.Next() { + if valid, err := iter.Valid(); !valid || err != nil { + if err != nil { + t.Fatal(err) + } + break + } + k := iter.UnsafeKey() + if k.Timestamp.IsEmpty() { + _, err := hasher.Write(k.Key) + require.NoError(t, err) + _, err = hasher.Write(iter.UnsafeValue()) + require.NoError(t, err) + } else { + _, err := hasher.Write(k.Key) + require.NoError(t, err) + tsLen := encodedMVCCTimestampLength(k.Timestamp) + require.NotZero(t, tsLen) + timestampBuf := make([]byte, tsLen) + encodeMVCCTimestampToBuf(timestampBuf, k.Timestamp) + _, err = hasher.Write(timestampBuf) + require.NoError(t, err) + _, err = hasher.Write(iter.UnsafeValue()) + require.NoError(t, err) + } + xorAgg = xorAgg ^ hasher.Sum64() + hasher.Reset() + } + + return xorAgg +} + +func getRangeKeys(t *testing.T, dataSST []byte) []MVCCRangeKeyStack { + t.Helper() + + iterOpts := IterOptions{ + KeyTypes: IterKeyTypeRangesOnly, + LowerBound: keys.LocalMax, + UpperBound: keys.MaxKey, + } + iter, err := NewMemSSTIterator(dataSST, false, iterOpts) + require.NoError(t, err) + defer iter.Close() + + allRangeKeys := make([]MVCCRangeKeyStack, 0) + for iter.SeekGE(MVCCKey{Key: keys.MinKey}); ; iter.Next() { + if ok, err := iter.Valid(); err != nil { + t.Fatal(err) + } else if !ok { + break + } + rangeKeys := iter.RangeKeys() + allRangeKeys = append(allRangeKeys, rangeKeys.Clone()) + } + return allRangeKeys +} + // mvccGetRaw fetches a raw MVCC value, for use in tests. func mvccGetRaw(t *testing.T, r Reader, key MVCCKey) []byte { value, err := mvccGetRawWithError(t, r, key) diff --git a/pkg/storage/sst_writer.go b/pkg/storage/sst_writer.go index f58c1ea01253..bfef6012fc54 100644 --- a/pkg/storage/sst_writer.go +++ b/pkg/storage/sst_writer.go @@ -35,6 +35,7 @@ type SSTWriter struct { } var _ Writer = &SSTWriter{} +var _ ExportWriter = &SSTWriter{} // writeCloseSyncer interface copied from pebble.sstable. type writeCloseSyncer interface { diff --git a/pkg/storage/testdata/mvcc_histories/export_fingerprint b/pkg/storage/testdata/mvcc_histories/export_fingerprint new file mode 100644 index 000000000000..057ef6744f28 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/export_fingerprint @@ -0,0 +1,605 @@ +# Tests MVCC export fingerprint. +# +# Sets up the following dataset, where x is MVCC point tombstone, o-o is MVCC +# range tombstone, [] is intent. We include some local timestamps, which should +# not be export fingerprinted. +# +# 7 [a7] [d7] [j7] [l7] [o7] +# 6 f6 +# 5 o---------------o k5 +# 4 x x d4 f4 g4 x +# 3 o-------o e3 o-------oh3 o---o +# 2 a2 f2 g2 +# 1 o---------------------------------------o +# a b c d e f g h i j k l m n o + +run ok +del_range_ts k=a end=k ts=1 +put k=a ts=2 v=a2 +del k=a ts=4 +del_range_ts k=b end=d ts=3 +del k=b ts=4 +put k=d ts=4 v=d4 +put k=e ts=3 v=e3 localTs=2 +put k=f ts=2 v=f2 +put k=g ts=2 v=g2 +del_range_ts k=f end=h ts=3 +put k=f ts=4 v=f4 +put k=g ts=4 v=g4 +del_range_ts k=c end=g ts=5 localTs=4 +put k=f ts=6 v=f6 +put k=h ts=3 v=h3 +del k=h ts=4 +put k=k ts=5 v=k5 localTs=4 +del_range_ts k=m end=n ts=3 localTs=2 +with t=A + txn_begin ts=7 + put k=a v=a7 + put k=d v=d7 + put k=j v=j7 + put k=l v=l7 + put k=o v=n7 +---- +del: "a": found key true +del: "b": found key false +del: "h": found key true +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0 +rangekey: {a-b}/[1.000000000,0=/] +rangekey: {b-c}/[3.000000000,0=/ 1.000000000,0=/] +rangekey: {c-d}/[5.000000000,0={localTs=4.000000000,0}/ 3.000000000,0=/ 1.000000000,0=/] +rangekey: {d-f}/[5.000000000,0={localTs=4.000000000,0}/ 1.000000000,0=/] +rangekey: {f-g}/[5.000000000,0={localTs=4.000000000,0}/ 3.000000000,0=/ 1.000000000,0=/] +rangekey: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +rangekey: {h-k}/[1.000000000,0=/] +rangekey: {m-n}/[3.000000000,0={localTs=2.000000000,0}/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/7.000000000,0 -> /BYTES/a7 +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/4.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "d"/7.000000000,0 -> /BYTES/d7 +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "e"/3.000000000,0 -> {localTs=2.000000000,0}/BYTES/e3 +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "f"/2.000000000,0 -> /BYTES/f2 +data: "g"/4.000000000,0 -> /BYTES/g4 +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> / +data: "h"/3.000000000,0 -> /BYTES/h3 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "j"/7.000000000,0 -> /BYTES/j7 +data: "k"/5.000000000,0 -> {localTs=4.000000000,0}/BYTES/k5 +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "l"/7.000000000,0 -> /BYTES/l7 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/7.000000000,0 -> /BYTES/n7 + +# Exporting across intents will error. +run error +export fingerprint k=a end=z +---- +error: (*roachpb.WriteIntentError:) conflicting intents on "a" + +run error +export fingerprint k=a end=z maxIntents=100 +---- +error: (*roachpb.WriteIntentError:) conflicting intents on "a", "d", "j", "l", "o" + +run error +export fingerprint k=a end=z maxIntents=3 +---- +error: (*roachpb.WriteIntentError:) conflicting intents on "a", "d", "j" + +# Export the entire dataset below the intents, with full revision history. +run ok +export fingerprint k=a end=z ts=6 allRevisions +---- +export: data_size:165 fingerprint=true +fingerprint: 17693463359975730253 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {d-f}/[5.000000000,0=/ 1.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +export: {h-k}/[1.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +# Export the full revision history, at increasing end time and then at +# increasing start time. +run ok +export fingerprint k=a end=z ts=1 allRevisions +---- +export: data_size:14 fingerprint=true +fingerprint: 0 +export: {a-k}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=2 allRevisions +---- +export: data_size:38 fingerprint=true +fingerprint: 7394159293535633020 +export: {a-k}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=3 allRevisions +---- +export: data_size:77 fingerprint=true +fingerprint: 7213511226611827020 +export: {a-b}/[1.000000000,0=/] +export: {b-d}/[3.000000000,0=/ 1.000000000,0=/] +export: {d-f}/[1.000000000,0=/] +export: {f-h}/[3.000000000,0=/ 1.000000000,0=/] +export: {h-k}/[1.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=4 allRevisions +---- +export: data_size:104 fingerprint=true +fingerprint: 12311975366333312460 +export: {a-b}/[1.000000000,0=/] +export: {b-d}/[3.000000000,0=/ 1.000000000,0=/] +export: {d-f}/[1.000000000,0=/] +export: {f-h}/[3.000000000,0=/ 1.000000000,0=/] +export: {h-k}/[1.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=5 allRevisions +---- +export: data_size:157 fingerprint=true +fingerprint: 17505735789188331755 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {d-f}/[5.000000000,0=/ 1.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +export: {h-k}/[1.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=6 allRevisions +---- +export: data_size:165 fingerprint=true +fingerprint: 17693463359975730253 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {d-f}/[5.000000000,0=/ 1.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +export: {h-k}/[1.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=1 ts=6 allRevisions +---- +export: data_size:151 fingerprint=true +fingerprint: 17693463359975730253 +export: {b-c}/[3.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/] +export: {d-f}/[5.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/] +export: {g-h}/[3.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=2 ts=6 allRevisions +---- +export: data_size:127 fingerprint=true +fingerprint: 10598829871782564401 +export: {b-c}/[3.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/] +export: {d-f}/[5.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/] +export: {g-h}/[3.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=3 ts=6 allRevisions +---- +export: data_size:88 fingerprint=true +fingerprint: 10488959482011561217 +export: {c-g}/[5.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=4 ts=6 allRevisions +---- +export: data_size:61 fingerprint=true +fingerprint: 6869998736090988929 +export: {c-g}/[5.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=5 ts=6 allRevisions +---- +export: data_size:8 fingerprint=true +fingerprint: 539045325090296998 + +run ok +export fingerprint k=a end=z startTs=6 ts=6 allRevisions +---- +export: fingerprint=true +fingerprint: 0 + +# Export without revision history at increasing end time, then at increasing +# start time. +run ok +export fingerprint k=a end=z ts=1 +---- +export: fingerprint=true +fingerprint: 0 + +run ok +export fingerprint k=a end=z ts=2 +---- +export: data_size:24 fingerprint=true +fingerprint: 7394159293535633020 + +run ok +export fingerprint k=a end=z ts=3 +---- +export: data_size:24 fingerprint=true +fingerprint: 17705510361986729108 + +run ok +export fingerprint k=a end=z ts=4 +---- +export: data_size:32 fingerprint=true +fingerprint: 10270839490468725004 + +run ok +export fingerprint k=a end=z ts=5 +---- +export: data_size:16 fingerprint=true +fingerprint: 2648504303020058862 + +run ok +export fingerprint k=a end=z ts=6 +---- +export: data_size:24 fingerprint=true +fingerprint: 2574496015647063112 + +run ok +export fingerprint k=a end=z startTs=1 ts=6 +---- +export: data_size:91 fingerprint=true +fingerprint: 5843921525122089813 +export: {b-c}/[3.000000000,0=/] +export: {c-g}/[5.000000000,0=/] +export: {g-h}/[3.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=2 ts=6 +---- +export: data_size:91 fingerprint=true +fingerprint: 5843921525122089813 +export: {b-c}/[3.000000000,0=/] +export: {c-g}/[5.000000000,0=/] +export: {g-h}/[3.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=3 ts=6 +---- +export: data_size:72 fingerprint=true +fingerprint: 5843921525122089813 +export: {c-g}/[5.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=4 ts=6 +---- +export: data_size:61 fingerprint=true +fingerprint: 6869998736090988929 +export: {c-g}/[5.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=5 ts=6 +---- +export: data_size:8 fingerprint=true +fingerprint: 539045325090296998 + +run ok +export fingerprint k=a end=z startTs=6 ts=6 +---- +export: fingerprint=true +fingerprint: 0 + +# Incremental export fingerprint one timestamp at a time, with and without full revision +# history. +run ok +export fingerprint k=a end=z startTs=0 ts=1 allRevisions +---- +export: data_size:14 fingerprint=true +fingerprint: 0 +export: {a-k}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=1 ts=2 allRevisions +---- +export: data_size:24 fingerprint=true +fingerprint: 7394159293535633020 + +run ok +export fingerprint k=a end=z startTs=2 ts=3 allRevisions +---- +export: data_size:39 fingerprint=true +fingerprint: 182077538345271088 +export: {b-d}/[3.000000000,0=/] +export: {f-h}/[3.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=3 ts=4 allRevisions +---- +export: data_size:27 fingerprint=true +fingerprint: 14899990458252242048 + +run ok +export fingerprint k=a end=z startTs=4 ts=5 allRevisions +---- +export: data_size:53 fingerprint=true +fingerprint: 6353507799313519911 +export: {c-g}/[5.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=5 ts=6 allRevisions +---- +export: data_size:8 fingerprint=true +fingerprint: 539045325090296998 + +run ok +export fingerprint k=a end=z startTs=0 ts=1 +---- +export: fingerprint=true +fingerprint: 0 + +run ok +export fingerprint k=a end=z startTs=1 ts=2 +---- +export: data_size:24 fingerprint=true +fingerprint: 7394159293535633020 + +run ok +export fingerprint k=a end=z startTs=2 ts=3 +---- +export: data_size:39 fingerprint=true +fingerprint: 182077538345271088 +export: {b-d}/[3.000000000,0=/] +export: {f-h}/[3.000000000,0=/] +export: {m-n}/[3.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=3 ts=4 +---- +export: data_size:27 fingerprint=true +fingerprint: 14899990458252242048 + +run ok +export fingerprint k=a end=z startTs=4 ts=5 +---- +export: data_size:53 fingerprint=true +fingerprint: 6353507799313519911 +export: {c-g}/[5.000000000,0=/] + +run ok +export fingerprint k=a end=z startTs=5 ts=6 +---- +export: data_size:8 fingerprint=true +fingerprint: 539045325090296998 + +# TargetSize returns a resume span, and allows overflow, both when export fingerprinting the +# whole revision history and the latest version. It is not affected by +# stopMidKey. +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=1 +---- +export: data_size:11 fingerprint=true resume="b"/0,0 +fingerprint: 3503808496681756163 +export: {a-b}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=1 stopMidKey +---- +export: data_size:11 fingerprint=true resume="b"/0,0 +fingerprint: 3503808496681756163 +export: {a-b}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=6 targetSize=1 +---- +export: data_size:8 fingerprint=true resume="g"/0,0 +fingerprint: 539045325090296998 + +run ok +export fingerprint k=a end=z startTs=1 ts=6 targetSize=1 +---- +export: data_size:1 fingerprint=true resume="b"/0,0 +fingerprint: 14380066247656349095 + +# MaxSize returns an error if exceeded without TargetSize. +# +# TODO(erikgrinaker): It probably doesn't make sense for this behavior to change +# based on whether TargetSize is set or not, but keeping the existing logic for +# now. +run error +export fingerprint k=a end=z ts=6 allRevisions maxSize=1 +---- +error: (*storage.ExceedMaxSizeError:) export size (3 bytes) exceeds max size (1 bytes) + +run error +export fingerprint k=a end=z ts=6 allRevisions maxSize=10 +---- +error: (*storage.ExceedMaxSizeError:) export size (12 bytes) exceeds max size (10 bytes) + +# MaxSize with TargetSize will bail out before exceeding MaxSize, but it +# depends on StopMidKey. +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=1 maxSize=1 +---- +export: fingerprint=true resume="a"/0,0 +fingerprint: 0 + +run error +export fingerprint k=a end=z ts=6 allRevisions targetSize=10 maxSize=10 +---- +error: (*storage.ExceedMaxSizeError:) export size (12 bytes) exceeds max size (10 bytes) + +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=10 maxSize=10 stopMidKey +---- +export: data_size:4 fingerprint=true resume="a"/2.000000000,0 +fingerprint: 14380066247656349095 +export: a{-\x00}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=12 maxSize=12 +---- +export: data_size:11 fingerprint=true resume="b"/0,0 +fingerprint: 3503808496681756163 +export: {a-b}/[1.000000000,0=/] + +run error +export fingerprint k=a end=z ts=6 allRevisions targetSize=17 maxSize=17 +---- +error: (*storage.ExceedMaxSizeError:) export size (18 bytes) exceeds max size (17 bytes) + +# TargetSize and MaxSize without stopMidKey will keep going to the +# end of the key as long as MaxSize isn't exceeded. +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=4 maxSize=12 +---- +export: data_size:11 fingerprint=true resume="b"/0,0 +fingerprint: 3503808496681756163 +export: {a-b}/[1.000000000,0=/] + +# Hitting MaxSize right after including a range key with the same start key as +# the exceeding point key will emit a point-sized range key, unfortunately. This +# is also the case when we emit a covered point. However, it won't emit that +# range key if StopMidKey is disabled. +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=3 maxSize=3 stopMidKey +---- +export: data_size:3 fingerprint=true resume="a"/4.000000000,0 +fingerprint: 0 +export: a{-\x00}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=4 maxSize=4 stopMidKey +---- +export: data_size:4 fingerprint=true resume="a"/2.000000000,0 +fingerprint: 14380066247656349095 +export: a{-\x00}/[1.000000000,0=/] + +run ok +export fingerprint k=a end=z ts=6 allRevisions targetSize=17 maxSize=17 stopMidKey +---- +export: data_size:17 fingerprint=true resume="b"/4.000000000,0 +fingerprint: 3503808496681756163 +export: {a-b}/[1.000000000,0=/] +export: b{-\x00}/[3.000000000,0=/ 1.000000000,0=/] + +run error +export fingerprint k=a end=z ts=6 allRevisions targetSize=17 maxSize=17 +---- +error: (*storage.ExceedMaxSizeError:) export size (18 bytes) exceeds max size (17 bytes) + +# Resuming from various bounds, with and without other options. +run ok +export fingerprint k=b end=k ts=6 allRevisions +---- +export: data_size:131 fingerprint=true +fingerprint: 11315287205977104233 +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {d-f}/[5.000000000,0=/ 1.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +export: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +export: {h-k}/[1.000000000,0=/] + +run ok +export fingerprint k=bbb end=ggg startTs=2 ts=5 allRevisions +---- +export: data_size:89 fingerprint=true +fingerprint: 10270839490468725004 +export: {bbb-c}/[3.000000000,0=/] +export: {c-d}/[5.000000000,0=/ 3.000000000,0=/] +export: {d-f}/[5.000000000,0=/] +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/] +export: g{-gg}/[3.000000000,0=/] + +run ok +export fingerprint k=bbb end=ggg startTs=2 ts=5 +---- +export: data_size:61 fingerprint=true +fingerprint: 9001940358411271625 +export: {bbb-c}/[3.000000000,0=/] +export: {c-g}/[5.000000000,0=/] +export: g{-gg}/[3.000000000,0=/] + +# Resuming from a specific key version. +run ok +export fingerprint k=a kTs=4 end=c ts=6 allRevisions +---- +export: data_size:16 fingerprint=true +fingerprint: 6905610633035313899 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] + +run ok +export fingerprint k=a kTs=3 end=c ts=6 allRevisions +---- +export: data_size:15 fingerprint=true +fingerprint: 10972433733941915468 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] + +run ok +export fingerprint k=a kTs=2 end=c ts=6 allRevisions +---- +export: data_size:15 fingerprint=true +fingerprint: 10972433733941915468 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] + +run ok +export fingerprint k=a kTs=1 end=c ts=6 allRevisions +---- +export: data_size:7 fingerprint=true +fingerprint: 8031517972374541544 +export: {a-b}/[1.000000000,0=/] +export: {b-c}/[3.000000000,0=/ 1.000000000,0=/] + +run ok +export fingerprint k=f kTs=4 end=g ts=6 allRevisions +---- +export: data_size:35 fingerprint=true +fingerprint: 11595862564133433257 +export: {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] + +run ok +export fingerprint k=f kTs=4 end=g startTs=2 ts=4 allRevisions +---- +export: data_size:10 fingerprint=true +fingerprint: 12786325629015412061 +export: {f-g}/[3.000000000,0=/] + +run ok +export fingerprint k=f kTs=3 end=g startTs=2 ts=4 allRevisions +---- +export: data_size:2 fingerprint=true +fingerprint: 0 +export: {f-g}/[3.000000000,0=/] + +# Resuming from a specific key version at or below startTS. +run ok +export fingerprint k=a kTs=2 end=c startTs=2 ts=6 +---- +export: data_size:3 fingerprint=true +fingerprint: 8031517972374541544 +export: {b-c}/[3.000000000,0=/]