Skip to content

Commit

Permalink
storage: add MVCCExportFingerprintOptions
Browse files Browse the repository at this point in the history
This adds MVCCExportFingerprintOptions with two new options:

 - StripTenantPrefix
 - StripValueChecksum

The goal of these options is to produce a fingerprint that can be used
for comparing data across two tenants.

Note that if arbitrary keys and values are encountered, both options
have the possibility of erroneously removing data from the fingerprint
that isn't actually a tenant prefix or checksum.

Fixes cockroachdb#91150

Release note: None
  • Loading branch information
stevendanna committed Nov 11, 2022
1 parent 8e9b548 commit d316af0
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 11 deletions.
46 changes: 41 additions & 5 deletions pkg/storage/fingerprint_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"hash"
"io"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/errors"
Expand All @@ -33,14 +34,19 @@ import (
type fingerprintWriter struct {
hasher hash.Hash64
timestampBuf []byte
options MVCCExportFingerprintOptions

sstWriter *SSTWriter
xorAgg *uintXorAggregate
}

// makeFingerprintWriter creates a new fingerprintWriter.
func makeFingerprintWriter(
ctx context.Context, hasher hash.Hash64, cs *cluster.Settings, f io.Writer,
ctx context.Context,
hasher hash.Hash64,
cs *cluster.Settings,
f io.Writer,
opts MVCCExportFingerprintOptions,
) fingerprintWriter {
// TODO(adityamaru,dt): Once
// https://github.com/cockroachdb/cockroach/issues/90450 has been addressed we
Expand All @@ -50,6 +56,7 @@ func makeFingerprintWriter(
sstWriter: &sstWriter,
hasher: hasher,
xorAgg: &uintXorAggregate{},
options: opts,
}
}

Expand Down Expand Up @@ -106,14 +113,14 @@ func (f *fingerprintWriter) PutRawMVCC(key MVCCKey, value []byte) error {
defer f.hasher.Reset()

// Hash the key/timestamp and value of the RawMVCC.
if err := f.hash(key.Key); err != nil {
if err := f.hashKey(key.Key); err != nil {
return err
}
f.timestampBuf = EncodeMVCCTimestampToBuf(f.timestampBuf, key.Timestamp)
if err := f.hash(f.timestampBuf); err != nil {
return err
}
if err := f.hash(value); err != nil {
if err := f.hashValue(value); err != nil {
return err
}
f.xorAgg.add(f.hasher.Sum64())
Expand All @@ -125,17 +132,31 @@ func (f *fingerprintWriter) PutUnversioned(key roachpb.Key, value []byte) error
defer f.hasher.Reset()

// Hash the key and value in the absence of a timestamp.
if err := f.hash(key); err != nil {
if err := f.hashKey(key); err != nil {
return err
}
if err := f.hash(value); err != nil {
if err := f.hashValue(value); err != nil {
return err
}

f.xorAgg.add(f.hasher.Sum64())
return nil
}

func (f *fingerprintWriter) hashKey(key []byte) error {
if f.options.StripTenantPrefix {
return f.hash(f.stripTenantPrefix(key))
}
return f.hash(key)
}

func (f *fingerprintWriter) hashValue(value []byte) error {
if f.options.StripValueChecksum {
return f.hash(f.stripValueChecksum(value))
}
return f.hash(value)
}

func (f *fingerprintWriter) hash(data []byte) error {
if _, err := f.hasher.Write(data); err != nil {
return errors.NewAssertionErrorWithWrappedErrf(err,
Expand All @@ -144,3 +165,18 @@ func (f *fingerprintWriter) hash(data []byte) error {

return nil
}

func (f *fingerprintWriter) stripValueChecksum(value []byte) []byte {
if len(value) < mvccChecksumSize {
return value
}
return value[mvccChecksumSize:]
}

func (f *fingerprintWriter) stripTenantPrefix(key []byte) []byte {
remainder, _, err := keys.DecodeTenantPrefixE(key)
if err != nil {
return key
}
return remainder
}
15 changes: 14 additions & 1 deletion pkg/storage/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -5784,7 +5784,7 @@ func MVCCExportFingerprint(
defer span.Finish()

hasher := fnv.New64()
fingerprintWriter := makeFingerprintWriter(ctx, hasher, cs, dest)
fingerprintWriter := makeFingerprintWriter(ctx, hasher, cs, dest, opts.FingerprintOptions)
defer fingerprintWriter.Close()

summary, resumeKey, err := mvccExportToWriter(ctx, reader, opts, &fingerprintWriter)
Expand Down Expand Up @@ -6249,6 +6249,19 @@ type MVCCExportOptions struct {
// resources. Export queries limiter in its iteration loop to break out once
// resources are exhausted.
ResourceLimiter ResourceLimiter
// FingerprintOptions controls how fingerprints are generated
// when using MVCCExportFingerprint.
FingerprintOptions MVCCExportFingerprintOptions
}

type MVCCExportFingerprintOptions struct {
// If StripTenantPrefix is true, keys that appear to be
// tenant-prefixed have the tenant-prefix removed before
// hashing.
StripTenantPrefix bool
// If StripValueChecksum is true, checksums are removed from
// the value before hashing.
StripValueChecksum bool
}

// MVCCIsSpanEmptyOptions configures the MVCCIsSpanEmpty function.
Expand Down
28 changes: 23 additions & 5 deletions pkg/storage/mvcc_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,10 @@ func cmdPut(e *evalCtx) error {
key := e.getKey()
val := e.getVal()

if e.hasArg("init-checksum") {
val.InitChecksum(key)
}

resolve, resolveStatus := e.getResolve()

return e.withWriter("put", func(rw storage.ReadWriter) error {
Expand Down Expand Up @@ -1321,6 +1325,10 @@ func cmdExport(e *evalCtx) error {
EndTS: e.getTs(nil),
ExportAllRevisions: e.hasArg("allRevisions"),
StopMidKey: e.hasArg("stopMidKey"),
FingerprintOptions: storage.MVCCExportFingerprintOptions{
StripTenantPrefix: e.hasArg("stripTenantPrefix"),
StripValueChecksum: e.hasArg("stripValueChecksum"),
},
}
if e.hasArg("maxIntents") {
e.scanArg("maxIntents", &opts.MaxIntents)
Expand Down Expand Up @@ -2192,23 +2200,33 @@ func (e *evalCtx) getKey() roachpb.Key {
e.t.Helper()
var keyS string
e.scanArg("k", &keyS)
return toKey(keyS)
return toKey(keyS, e.getTenantCodec())
}

func (e *evalCtx) getKeyRange() (sk, ek roachpb.Key) {
e.t.Helper()
var keyS string
e.scanArg("k", &keyS)
sk = toKey(keyS)
codec := e.getTenantCodec()
sk = toKey(keyS, codec)
ek = sk.Next()
if e.hasArg("end") {
var endKeyS string
e.scanArg("end", &endKeyS)
ek = toKey(endKeyS)
ek = toKey(endKeyS, codec)
}
return sk, ek
}

func (e *evalCtx) getTenantCodec() keys.SQLCodec {
if e.hasArg("tenant-prefix") {
var tenantID int
e.scanArg("tenant-prefix", &tenantID)
return keys.MakeSQLCodec(roachpb.TenantID{InternalValue: uint64(tenantID)})
}
return keys.SystemSQLCodec
}

func (e *evalCtx) newTxn(
txnName string, ts, globalUncertaintyLimit hlc.Timestamp, key roachpb.Key,
) (*roachpb.Transaction, error) {
Expand Down Expand Up @@ -2424,7 +2442,7 @@ func (e *evalCtx) metamorphicPeekBounds(
return rw, leftPeekBound, rightPeekBound
}

func toKey(s string) roachpb.Key {
func toKey(s string, sqlCodec keys.SQLCodec) roachpb.Key {
if len(s) == 0 {
return roachpb.Key(s)
}
Expand Down Expand Up @@ -2456,7 +2474,7 @@ func toKey(s string) roachpb.Key {

var colMap catalog.TableColMap
colMap.Set(0, 0)
key := keys.SystemSQLCodec.IndexPrefix(1, 1)
key := sqlCodec.IndexPrefix(1, 1)
key, _, err = rowenc.EncodeColumns([]descpb.ColumnID{0}, nil /* directions */, colMap, []tree.Datum{tree.NewDString(pk)}, key)
if err != nil {
panic(err)
Expand Down
52 changes: 52 additions & 0 deletions pkg/storage/testdata/mvcc_histories/export_fingerprint_tenant
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Tests MVCC export fingerprinting configured to be tenant-agnostic.
#
# We create a simple set of keys that we expect will produce the same
# fingerprint when tenant prefixes and value checksums are ignored.
run ok
put k=/a ts=2 v=a localTs=2 tenant-prefix=10 init-checksum
put k=/b ts=2 v=b tenant-prefix=10 init-checksum
put k=/c ts=2 v=c tenant-prefix=10 init-checksum
put k=/d ts=2 v=d tenant-prefix=10 init-checksum
put k=/a ts=2 v=a tenant-prefix=11 init-checksum
put k=/b ts=2 v=b localTs=4 tenant-prefix=11 init-checksum
put k=/c ts=2 v=c tenant-prefix=11 init-checksum
put k=/d ts=2 v=d tenant-prefix=11 init-checksum
----
>> at end:
data: /Tenant/10/Table/1/1/"a"/0/2.000000000,0 -> /BYTES/a
data: /Tenant/10/Table/1/1/"b"/0/2.000000000,0 -> /BYTES/b
data: /Tenant/10/Table/1/1/"c"/0/2.000000000,0 -> /BYTES/c
data: /Tenant/10/Table/1/1/"d"/0/2.000000000,0 -> /BYTES/d
data: /Tenant/11/Table/1/1/"a"/0/2.000000000,0 -> /BYTES/a
data: /Tenant/11/Table/1/1/"b"/0/2.000000000,0 -> /BYTES/b
data: /Tenant/11/Table/1/1/"c"/0/2.000000000,0 -> /BYTES/c
data: /Tenant/11/Table/1/1/"d"/0/2.000000000,0 -> /BYTES/d

# Fingerprint tenant 10
run ok
export fingerprint k=/a end=/z ts=0 allRevisions tenant-prefix=10
----
export: data_size:60 deprecated_rows:4 entry_counts:<key:4294967297 value:4 > fingerprint=true
fingerprint: 9662827328792920765

# Fingerprint tenant 11
run ok
export fingerprint k=/a end=/z ts=0 allRevisions tenant-prefix=11
----
export: data_size:60 deprecated_rows:4 entry_counts:<key:4294967297 value:4 > fingerprint=true
fingerprint: 17513934348803083905

# Fingerprint tenant 10 with tenant prefix stripped
run ok
export fingerprint k=/a end=/z ts=0 allRevisions tenant-prefix=10 stripTenantPrefix stripValueChecksum
----
export: data_size:60 deprecated_rows:4 entry_counts:<key:4294967297 value:4 > fingerprint=true
fingerprint: 6565009613709557332

# Fingerprint tenant 11 with tenant prefix stripped
# NOTE: This fingerprint should match the tenant 10 fingerprint.
run ok
export fingerprint k=/a end=/z ts=0 allRevisions tenant-prefix=11 stripTenantPrefix stripValueChecksum
----
export: data_size:60 deprecated_rows:4 entry_counts:<key:4294967297 value:4 > fingerprint=true
fingerprint: 6565009613709557332

0 comments on commit d316af0

Please sign in to comment.