From 6eec284cbe36a8817ffad07d169372c5b7b5d33f Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Thu, 21 Nov 2024 10:51:48 -0500 Subject: [PATCH] cockroachkvs: add TestKeySchema_KeySeeker Adapt TestKeySchema_KeySeeker from pkg/storage, adding it to the Pebble cockroachkvs package. --- cockroachkvs/cockroachkvs.go | 2 +- cockroachkvs/cockroachkvs_test.go | 151 +++++++++++++++-- cockroachkvs/key_schema_test.go | 2 +- cockroachkvs/testdata/key_schema_key_seeker | 171 ++++++++++++++++++++ 4 files changed, 315 insertions(+), 11 deletions(-) create mode 100644 cockroachkvs/testdata/key_schema_key_seeker diff --git a/cockroachkvs/cockroachkvs.go b/cockroachkvs/cockroachkvs.go index 54a5609fde..cc5c61f2f2 100644 --- a/cockroachkvs/cockroachkvs.go +++ b/cockroachkvs/cockroachkvs.go @@ -333,7 +333,7 @@ var zeroLogical [4]byte func normalizeEngineSuffixForCompare(a []byte) []byte { // Check sentinel byte. if invariants.Enabled && len(a) != int(a[len(a)-1]) { - panic(errors.AssertionFailedf("malformed suffix: %x", a)) + panic(errors.AssertionFailedf("malformed suffix: %x (length byte is %d; but suffix is %d bytes)", a, a[len(a)-1], len(a))) } // Strip off sentinel byte. a = a[:len(a)-1] diff --git a/cockroachkvs/cockroachkvs_test.go b/cockroachkvs/cockroachkvs_test.go index ecb62e5538..543f928999 100644 --- a/cockroachkvs/cockroachkvs_test.go +++ b/cockroachkvs/cockroachkvs_test.go @@ -13,12 +13,14 @@ import ( "strings" "testing" "time" + "unsafe" "github.com/cockroachdb/crlib/crbytes" "github.com/cockroachdb/crlib/crstrings" "github.com/cockroachdb/crlib/testutils/leaktest" "github.com/cockroachdb/crlib/testutils/require" "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/testutils" "github.com/cockroachdb/pebble/sstable/block" @@ -154,6 +156,112 @@ func TestKeySchema_KeyWriter(t *testing.T) { }) } +func TestKeySchema_KeySeeker(t *testing.T) { + defer leaktest.AfterTest(t)() + + var buf bytes.Buffer + var enc colblk.DataBlockEncoder + var dec colblk.DataBlockDecoder + var ks colblk.KeySeeker + var maxKeyLen int + enc.Init(&KeySchema) + + initKeySeeker := func() { + ksPointer := &cockroachKeySeeker{} + KeySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec) + ks = KeySchema.KeySeeker((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer))) + } + + datadriven.RunTest(t, "testdata/key_schema_key_seeker", func(t *testing.T, td *datadriven.TestData) string { + buf.Reset() + switch td.Cmd { + case "define-block": + enc.Reset() + maxKeyLen = 0 + var rows int + for _, line := range crstrings.Lines(td.Input) { + k := parseUserKey(line) + fmt.Fprintf(&buf, "Parse(%s) = hex:%x\n", line, k) + maxKeyLen = max(maxKeyLen, len(k)) + kcmp := enc.KeyWriter.ComparePrev(k) + ikey := base.InternalKey{ + UserKey: k, + Trailer: pebble.MakeInternalKeyTrailer(0, base.InternalKeyKindSet), + } + enc.Add(ikey, k, block.InPlaceValuePrefix(false), kcmp, false /* isObsolete */) + rows++ + } + blk, _ := enc.Finish(rows, enc.Size()) + dec.Init(&KeySchema, blk) + return buf.String() + case "is-lower-bound": + initKeySeeker() + syntheticSuffix, syntheticSuffixStr, _ := getSyntheticSuffix(t, td) + + for _, line := range crstrings.Lines(td.Input) { + k := parseUserKey(line) + got := ks.IsLowerBound(k, syntheticSuffix) + fmt.Fprintf(&buf, "IsLowerBound(%s, %q) = %t\n", line, syntheticSuffixStr, got) + } + return buf.String() + case "seek-ge": + initKeySeeker() + for _, line := range crstrings.Lines(td.Input) { + k := parseUserKey(line) + boundRow := -1 + searchDir := 0 + row, equalPrefix := ks.SeekGE(k, boundRow, int8(searchDir)) + + fmt.Fprintf(&buf, "SeekGE(%s, boundRow=%d, searchDir=%d) = (row=%d, equalPrefix=%t)", + line, boundRow, searchDir, row, equalPrefix) + if row >= 0 && row < dec.BlockDecoder().Rows() { + var kiter colblk.PrefixBytesIter + kiter.Buf = make([]byte, maxKeyLen+1) + key := ks.MaterializeUserKey(&kiter, -1, row) + fmt.Fprintf(&buf, " [hex:%x]", key) + } + fmt.Fprintln(&buf) + } + return buf.String() + case "materialize-user-key": + initKeySeeker() + syntheticSuffix, syntheticSuffixStr, syntheticSuffixOk := getSyntheticSuffix(t, td) + + var kiter colblk.PrefixBytesIter + kiter.Buf = make([]byte, maxKeyLen+len(syntheticSuffix)+1) + prevRow := -1 + for _, line := range crstrings.Lines(td.Input) { + row, err := strconv.Atoi(line) + if err != nil { + t.Fatalf("bad row number %q: %s", line, err) + } + if syntheticSuffixOk { + key := ks.MaterializeUserKeyWithSyntheticSuffix(&kiter, syntheticSuffix, prevRow, row) + fmt.Fprintf(&buf, "MaterializeUserKeyWithSyntheticSuffix(%d, %d, %s) = hex:%x\n", prevRow, row, syntheticSuffixStr, key) + } else { + key := ks.MaterializeUserKey(&kiter, prevRow, row) + fmt.Fprintf(&buf, "MaterializeUserKey(%d, %d) = hex:%x\n", prevRow, row, key) + } + prevRow = row + } + return buf.String() + default: + panic(fmt.Sprintf("unrecognized command %q", td.Cmd)) + } + }) +} + +func getSyntheticSuffix(t *testing.T, td *datadriven.TestData) ([]byte, string, bool) { + var syntheticSuffix []byte + var syntheticSuffixStr string + cmdArg, ok := td.Arg("synthetic-suffix") + if ok { + syntheticSuffixStr = strings.TrimPrefix(cmdArg.SingleVal(t), "@") + syntheticSuffix = parseVersion(syntheticSuffixStr) + } + return syntheticSuffix, syntheticSuffixStr, ok +} + func asciiOrHex(b []byte) string { if bytes.ContainsFunc(b, func(r rune) bool { return r < ' ' || r > '~' }) { return fmt.Sprintf("hex:%x", b) @@ -523,18 +631,43 @@ func parseUserKey(userKeyStr string) []byte { panic(fmt.Sprintf("invalid user key string %s: %v", userKeyStr, err)) } } + k := append(append([]byte(roachKey), 0), parseVersion(versionStr)...) + checkEngineKey(k) + return k +} - // Append sentinel byte. - userKey := append([]byte(roachKey), 0) - if versionStr != "" { - var version []byte - if _, err := fmt.Sscanf(versionStr, "%X", &version); err != nil { - panic(fmt.Sprintf("invalid user key string %q: cannot parse version %X", userKeyStr, version)) +func parseVersion(versionStr string) []byte { + if versionStr == "" { + // No version. + return nil + } + + if ci := strings.IndexByte(versionStr, ','); ci >= 0 { + // Parse as a MVCC timestamp version. + wallTime, err := strconv.ParseUint(versionStr[:ci], 10, 64) + if err != nil { + panic(err) } - userKey = append(userKey, version...) - userKey = append(userKey, byte(len(version)+1)) + logicalTime, err := strconv.ParseUint(versionStr[ci+1:], 10, 32) + if err != nil { + panic(err) + } + ret := AppendTimestamp([]byte{0x00}, wallTime, uint32(logicalTime)) + if len(ret) != 14 && len(ret) != 10 { + panic(fmt.Sprintf("expected 10 or 14-length ret got %d", len(ret))) + } + checkEngineKey(ret) + // TODO(jackson): Refactor to allow us to generate a suffix without a + // sentinel byte rather than stripping it like this. + return ret[1:] + } + + // Parse as a hex-encoded version. + var version []byte + if _, err := fmt.Sscanf(versionStr, "%X", &version); err != nil { + panic(fmt.Sprintf("invalid version string %q", versionStr)) } - return userKey + return append(version, byte(len(version)+1)) } // parseInternalKey parses a cockroach key in the following format: diff --git a/cockroachkvs/key_schema_test.go b/cockroachkvs/key_schema_test.go index 8a79c91025..c3309bce0b 100644 --- a/cockroachkvs/key_schema_test.go +++ b/cockroachkvs/key_schema_test.go @@ -146,7 +146,7 @@ func TestKeySchema_RandomKeys(t *testing.T) { var dec colblk.DataBlockDecoder dec.Init(&KeySchema, blk) var it colblk.DataBlockIter - it.InitOnce(&KeySchema, Compare, Split, nil) + it.InitOnce(&KeySchema, &Comparer, nil) require.NoError(t, it.Init(&dec, block.NoTransforms)) // Ensure that a scan across the block finds all the relevant keys. var valBuf []byte diff --git a/cockroachkvs/testdata/key_schema_key_seeker b/cockroachkvs/testdata/key_schema_key_seeker new file mode 100644 index 0000000000..9509b4162e --- /dev/null +++ b/cockroachkvs/testdata/key_schema_key_seeker @@ -0,0 +1,171 @@ +define-block +"foo" @ 00000000b2d05e0000000001 +"foo" @ 00000000b2d05e00 +---- +Parse("foo" @ 00000000b2d05e0000000001) = hex:666f6f0000000000b2d05e00000000010d +Parse("foo" @ 00000000b2d05e00) = hex:666f6f0000000000b2d05e0009 + +is-lower-bound +"abc" +fax @ 3000000000,1 +foo @ 3000000000,2 +foo @ 3000000000,1 +foo @ 3000000000,0 +zoo @ 9100000000,2 +---- +IsLowerBound("abc", "") = true +IsLowerBound(fax @ 3000000000,1, "") = true +IsLowerBound(foo @ 3000000000,2, "") = true +IsLowerBound(foo @ 3000000000,1, "") = true +IsLowerBound(foo @ 3000000000,0, "") = false +IsLowerBound(zoo @ 9100000000,2, "") = false + +seek-ge +fax @ 9000000000,0 +foo @ 3000000000,1 +foo @ 3000000000,0 +foo @ 3000000000,2 +zoo @ 9000000000,0 +---- +SeekGE(fax @ 9000000000,0, boundRow=-1, searchDir=0) = (row=0, equalPrefix=false) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(foo @ 3000000000,1, boundRow=-1, searchDir=0) = (row=0, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(foo @ 3000000000,0, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:666f6f0000000000b2d05e0009] +SeekGE(foo @ 3000000000,2, boundRow=-1, searchDir=0) = (row=0, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(zoo @ 9000000000,0, boundRow=-1, searchDir=0) = (row=2, equalPrefix=false) + +define-block +bar @ 3000000000,1 +bax @ 3000000000,1 +foo @ 3000000000,1 +moo @ 3000000000,1 +---- +Parse(bar @ 3000000000,1) = hex:6261720000000000b2d05e00000000010d +Parse(bax @ 3000000000,1) = hex:6261780000000000b2d05e00000000010d +Parse(foo @ 3000000000,1) = hex:666f6f0000000000b2d05e00000000010d +Parse(moo @ 3000000000,1) = hex:6d6f6f0000000000b2d05e00000000010d + +is-lower-bound +bar @ 9000000000,2 +bar @ 8000000000,2 +bar @ 8000000000,1 +bar @ 8000000000,0 +bar @ 7000000000,9 +bar @ 3000000000,2 +bar @ 3000000000,1 +bar @ 3000000000,0 +---- +IsLowerBound(bar @ 9000000000,2, "") = true +IsLowerBound(bar @ 8000000000,2, "") = true +IsLowerBound(bar @ 8000000000,1, "") = true +IsLowerBound(bar @ 8000000000,0, "") = true +IsLowerBound(bar @ 7000000000,9, "") = true +IsLowerBound(bar @ 3000000000,2, "") = true +IsLowerBound(bar @ 3000000000,1, "") = true +IsLowerBound(bar @ 3000000000,0, "") = false + +is-lower-bound synthetic-suffix=@8000000000,1 +bar @ 9000000000,2 +bar @ 8000000000,2 +bar @ 8000000000,1 +bar @ 8000000000,0 +bar @ 7000000000,9 +bar @ 3000000000,2 +bar @ 3000000000,1 +bar @ 3000000000,0 +---- +IsLowerBound(bar @ 9000000000,2, "8000000000,1") = true +IsLowerBound(bar @ 8000000000,2, "8000000000,1") = true +IsLowerBound(bar @ 8000000000,1, "8000000000,1") = true +IsLowerBound(bar @ 8000000000,0, "8000000000,1") = false +IsLowerBound(bar @ 7000000000,9, "8000000000,1") = false +IsLowerBound(bar @ 3000000000,2, "8000000000,1") = false +IsLowerBound(bar @ 3000000000,1, "8000000000,1") = false +IsLowerBound(bar @ 3000000000,0, "8000000000,1") = false + +seek-ge +apple @ 2000000000,0 +bar @ 4000000000,0 +bar @ 3000000000,0 +bar @ 2000000000,0 +bax @ 3000000000,1 +bax @ 3000000000,0 +fax @ 9000000000,0 +foo @ 3000000000,2 +foo @ 3000000000,1 +foo @ 3000000000,0 +moo @ 3000000001,0 +moo @ 3000000000,2 +moo @ 3000000000,1 +moo @ 3000000000,0 +zoo @ 9000000000,0 +---- +SeekGE(apple @ 2000000000,0, boundRow=-1, searchDir=0) = (row=0, equalPrefix=false) [hex:6261720000000000b2d05e00000000010d] +SeekGE(bar @ 4000000000,0, boundRow=-1, searchDir=0) = (row=0, equalPrefix=true) [hex:6261720000000000b2d05e00000000010d] +SeekGE(bar @ 3000000000,0, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:6261780000000000b2d05e00000000010d] +SeekGE(bar @ 2000000000,0, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:6261780000000000b2d05e00000000010d] +SeekGE(bax @ 3000000000,1, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:6261780000000000b2d05e00000000010d] +SeekGE(bax @ 3000000000,0, boundRow=-1, searchDir=0) = (row=2, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(fax @ 9000000000,0, boundRow=-1, searchDir=0) = (row=2, equalPrefix=false) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(foo @ 3000000000,2, boundRow=-1, searchDir=0) = (row=2, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(foo @ 3000000000,1, boundRow=-1, searchDir=0) = (row=2, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d] +SeekGE(foo @ 3000000000,0, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d] +SeekGE(moo @ 3000000001,0, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d] +SeekGE(moo @ 3000000000,2, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d] +SeekGE(moo @ 3000000000,1, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d] +SeekGE(moo @ 3000000000,0, boundRow=-1, searchDir=0) = (row=4, equalPrefix=true) +SeekGE(zoo @ 9000000000,0, boundRow=-1, searchDir=0) = (row=4, equalPrefix=false) + +materialize-user-key +0 +1 +2 +3 +---- +MaterializeUserKey(-1, 0) = hex:6261720000000000b2d05e00000000010d +MaterializeUserKey(0, 1) = hex:6261780000000000b2d05e00000000010d +MaterializeUserKey(1, 2) = hex:666f6f0000000000b2d05e00000000010d +MaterializeUserKey(2, 3) = hex:6d6f6f0000000000b2d05e00000000010d + +materialize-user-key synthetic-suffix=@8000000000,9 +0 +1 +2 +3 +---- +MaterializeUserKeyWithSyntheticSuffix(-1, 0, 8000000000,9) = hex:6261720000000001dcd65000000000090d +MaterializeUserKeyWithSyntheticSuffix(0, 1, 8000000000,9) = hex:6261780000000001dcd65000000000090d +MaterializeUserKeyWithSyntheticSuffix(1, 2, 8000000000,9) = hex:666f6f0000000001dcd65000000000090d +MaterializeUserKeyWithSyntheticSuffix(2, 3, 8000000000,9) = hex:6d6f6f0000000001dcd65000000000090d + +materialize-user-key +3 +2 +0 +1 +---- +MaterializeUserKey(-1, 3) = hex:6d6f6f0000000000b2d05e00000000010d +MaterializeUserKey(3, 2) = hex:666f6f0000000000b2d05e00000000010d +MaterializeUserKey(2, 0) = hex:6261720000000000b2d05e00000000010d +MaterializeUserKey(0, 1) = hex:6261780000000000b2d05e00000000010d + +define-block +moo @ 3000000001,0 +moo @ 3000000000,2 +moo @ 3000000000,1 +moo @ 3000000000,0 +---- +Parse(moo @ 3000000001,0) = hex:6d6f6f0000000000b2d05e0109 +Parse(moo @ 3000000000,2) = hex:6d6f6f0000000000b2d05e00000000020d +Parse(moo @ 3000000000,1) = hex:6d6f6f0000000000b2d05e00000000010d +Parse(moo @ 3000000000,0) = hex:6d6f6f0000000000b2d05e0009 + +materialize-user-key +0 +1 +2 +3 +---- +MaterializeUserKey(-1, 0) = hex:6d6f6f0000000000b2d05e0109 +MaterializeUserKey(0, 1) = hex:6d6f6f0000000000b2d05e00000000020d +MaterializeUserKey(1, 2) = hex:6d6f6f0000000000b2d05e00000000010d +MaterializeUserKey(2, 3) = hex:6d6f6f0000000000b2d05e0009