Skip to content

Commit

Permalink
cockroachkvs: add TestKeySchema_KeySeeker
Browse files Browse the repository at this point in the history
Adapt TestKeySchema_KeySeeker from pkg/storage, adding it to the Pebble
cockroachkvs package.
  • Loading branch information
jbowens committed Dec 5, 2024
1 parent 830786c commit 6eec284
Show file tree
Hide file tree
Showing 4 changed files with 315 additions and 11 deletions.
2 changes: 1 addition & 1 deletion cockroachkvs/cockroachkvs.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ var zeroLogical [4]byte
func normalizeEngineSuffixForCompare(a []byte) []byte {
// Check sentinel byte.
if invariants.Enabled && len(a) != int(a[len(a)-1]) {
panic(errors.AssertionFailedf("malformed suffix: %x", a))
panic(errors.AssertionFailedf("malformed suffix: %x (length byte is %d; but suffix is %d bytes)", a, a[len(a)-1], len(a)))
}
// Strip off sentinel byte.
a = a[:len(a)-1]
Expand Down
151 changes: 142 additions & 9 deletions cockroachkvs/cockroachkvs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ import (
"strings"
"testing"
"time"
"unsafe"

"github.com/cockroachdb/crlib/crbytes"
"github.com/cockroachdb/crlib/crstrings"
"github.com/cockroachdb/crlib/testutils/leaktest"
"github.com/cockroachdb/crlib/testutils/require"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/pebble"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/testutils"
"github.com/cockroachdb/pebble/sstable/block"
Expand Down Expand Up @@ -154,6 +156,112 @@ func TestKeySchema_KeyWriter(t *testing.T) {
})
}

func TestKeySchema_KeySeeker(t *testing.T) {
defer leaktest.AfterTest(t)()

var buf bytes.Buffer
var enc colblk.DataBlockEncoder
var dec colblk.DataBlockDecoder
var ks colblk.KeySeeker
var maxKeyLen int
enc.Init(&KeySchema)

initKeySeeker := func() {
ksPointer := &cockroachKeySeeker{}
KeySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec)
ks = KeySchema.KeySeeker((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)))
}

datadriven.RunTest(t, "testdata/key_schema_key_seeker", func(t *testing.T, td *datadriven.TestData) string {
buf.Reset()
switch td.Cmd {
case "define-block":
enc.Reset()
maxKeyLen = 0
var rows int
for _, line := range crstrings.Lines(td.Input) {
k := parseUserKey(line)
fmt.Fprintf(&buf, "Parse(%s) = hex:%x\n", line, k)
maxKeyLen = max(maxKeyLen, len(k))
kcmp := enc.KeyWriter.ComparePrev(k)
ikey := base.InternalKey{
UserKey: k,
Trailer: pebble.MakeInternalKeyTrailer(0, base.InternalKeyKindSet),
}
enc.Add(ikey, k, block.InPlaceValuePrefix(false), kcmp, false /* isObsolete */)
rows++
}
blk, _ := enc.Finish(rows, enc.Size())
dec.Init(&KeySchema, blk)
return buf.String()
case "is-lower-bound":
initKeySeeker()
syntheticSuffix, syntheticSuffixStr, _ := getSyntheticSuffix(t, td)

for _, line := range crstrings.Lines(td.Input) {
k := parseUserKey(line)
got := ks.IsLowerBound(k, syntheticSuffix)
fmt.Fprintf(&buf, "IsLowerBound(%s, %q) = %t\n", line, syntheticSuffixStr, got)
}
return buf.String()
case "seek-ge":
initKeySeeker()
for _, line := range crstrings.Lines(td.Input) {
k := parseUserKey(line)
boundRow := -1
searchDir := 0
row, equalPrefix := ks.SeekGE(k, boundRow, int8(searchDir))

fmt.Fprintf(&buf, "SeekGE(%s, boundRow=%d, searchDir=%d) = (row=%d, equalPrefix=%t)",
line, boundRow, searchDir, row, equalPrefix)
if row >= 0 && row < dec.BlockDecoder().Rows() {
var kiter colblk.PrefixBytesIter
kiter.Buf = make([]byte, maxKeyLen+1)
key := ks.MaterializeUserKey(&kiter, -1, row)
fmt.Fprintf(&buf, " [hex:%x]", key)
}
fmt.Fprintln(&buf)
}
return buf.String()
case "materialize-user-key":
initKeySeeker()
syntheticSuffix, syntheticSuffixStr, syntheticSuffixOk := getSyntheticSuffix(t, td)

var kiter colblk.PrefixBytesIter
kiter.Buf = make([]byte, maxKeyLen+len(syntheticSuffix)+1)
prevRow := -1
for _, line := range crstrings.Lines(td.Input) {
row, err := strconv.Atoi(line)
if err != nil {
t.Fatalf("bad row number %q: %s", line, err)
}
if syntheticSuffixOk {
key := ks.MaterializeUserKeyWithSyntheticSuffix(&kiter, syntheticSuffix, prevRow, row)
fmt.Fprintf(&buf, "MaterializeUserKeyWithSyntheticSuffix(%d, %d, %s) = hex:%x\n", prevRow, row, syntheticSuffixStr, key)
} else {
key := ks.MaterializeUserKey(&kiter, prevRow, row)
fmt.Fprintf(&buf, "MaterializeUserKey(%d, %d) = hex:%x\n", prevRow, row, key)
}
prevRow = row
}
return buf.String()
default:
panic(fmt.Sprintf("unrecognized command %q", td.Cmd))
}
})
}

func getSyntheticSuffix(t *testing.T, td *datadriven.TestData) ([]byte, string, bool) {
var syntheticSuffix []byte
var syntheticSuffixStr string
cmdArg, ok := td.Arg("synthetic-suffix")
if ok {
syntheticSuffixStr = strings.TrimPrefix(cmdArg.SingleVal(t), "@")
syntheticSuffix = parseVersion(syntheticSuffixStr)
}
return syntheticSuffix, syntheticSuffixStr, ok
}

func asciiOrHex(b []byte) string {
if bytes.ContainsFunc(b, func(r rune) bool { return r < ' ' || r > '~' }) {
return fmt.Sprintf("hex:%x", b)
Expand Down Expand Up @@ -523,18 +631,43 @@ func parseUserKey(userKeyStr string) []byte {
panic(fmt.Sprintf("invalid user key string %s: %v", userKeyStr, err))
}
}
k := append(append([]byte(roachKey), 0), parseVersion(versionStr)...)
checkEngineKey(k)
return k
}

// Append sentinel byte.
userKey := append([]byte(roachKey), 0)
if versionStr != "" {
var version []byte
if _, err := fmt.Sscanf(versionStr, "%X", &version); err != nil {
panic(fmt.Sprintf("invalid user key string %q: cannot parse version %X", userKeyStr, version))
func parseVersion(versionStr string) []byte {
if versionStr == "" {
// No version.
return nil
}

if ci := strings.IndexByte(versionStr, ','); ci >= 0 {
// Parse as a MVCC timestamp version.
wallTime, err := strconv.ParseUint(versionStr[:ci], 10, 64)
if err != nil {
panic(err)
}
userKey = append(userKey, version...)
userKey = append(userKey, byte(len(version)+1))
logicalTime, err := strconv.ParseUint(versionStr[ci+1:], 10, 32)
if err != nil {
panic(err)
}
ret := AppendTimestamp([]byte{0x00}, wallTime, uint32(logicalTime))
if len(ret) != 14 && len(ret) != 10 {
panic(fmt.Sprintf("expected 10 or 14-length ret got %d", len(ret)))
}
checkEngineKey(ret)
// TODO(jackson): Refactor to allow us to generate a suffix without a
// sentinel byte rather than stripping it like this.
return ret[1:]
}

// Parse as a hex-encoded version.
var version []byte
if _, err := fmt.Sscanf(versionStr, "%X", &version); err != nil {
panic(fmt.Sprintf("invalid version string %q", versionStr))
}
return userKey
return append(version, byte(len(version)+1))
}

// parseInternalKey parses a cockroach key in the following format:
Expand Down
2 changes: 1 addition & 1 deletion cockroachkvs/key_schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func TestKeySchema_RandomKeys(t *testing.T) {
var dec colblk.DataBlockDecoder
dec.Init(&KeySchema, blk)
var it colblk.DataBlockIter
it.InitOnce(&KeySchema, Compare, Split, nil)
it.InitOnce(&KeySchema, &Comparer, nil)
require.NoError(t, it.Init(&dec, block.NoTransforms))
// Ensure that a scan across the block finds all the relevant keys.
var valBuf []byte
Expand Down
171 changes: 171 additions & 0 deletions cockroachkvs/testdata/key_schema_key_seeker
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
define-block
"foo" @ 00000000b2d05e0000000001
"foo" @ 00000000b2d05e00
----
Parse("foo" @ 00000000b2d05e0000000001) = hex:666f6f0000000000b2d05e00000000010d
Parse("foo" @ 00000000b2d05e00) = hex:666f6f0000000000b2d05e0009

is-lower-bound
"abc"
fax @ 3000000000,1
foo @ 3000000000,2
foo @ 3000000000,1
foo @ 3000000000,0
zoo @ 9100000000,2
----
IsLowerBound("abc", "") = true
IsLowerBound(fax @ 3000000000,1, "") = true
IsLowerBound(foo @ 3000000000,2, "") = true
IsLowerBound(foo @ 3000000000,1, "") = true
IsLowerBound(foo @ 3000000000,0, "") = false
IsLowerBound(zoo @ 9100000000,2, "") = false

seek-ge
fax @ 9000000000,0
foo @ 3000000000,1
foo @ 3000000000,0
foo @ 3000000000,2
zoo @ 9000000000,0
----
SeekGE(fax @ 9000000000,0, boundRow=-1, searchDir=0) = (row=0, equalPrefix=false) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(foo @ 3000000000,1, boundRow=-1, searchDir=0) = (row=0, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(foo @ 3000000000,0, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:666f6f0000000000b2d05e0009]
SeekGE(foo @ 3000000000,2, boundRow=-1, searchDir=0) = (row=0, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(zoo @ 9000000000,0, boundRow=-1, searchDir=0) = (row=2, equalPrefix=false)

define-block
bar @ 3000000000,1
bax @ 3000000000,1
foo @ 3000000000,1
moo @ 3000000000,1
----
Parse(bar @ 3000000000,1) = hex:6261720000000000b2d05e00000000010d
Parse(bax @ 3000000000,1) = hex:6261780000000000b2d05e00000000010d
Parse(foo @ 3000000000,1) = hex:666f6f0000000000b2d05e00000000010d
Parse(moo @ 3000000000,1) = hex:6d6f6f0000000000b2d05e00000000010d

is-lower-bound
bar @ 9000000000,2
bar @ 8000000000,2
bar @ 8000000000,1
bar @ 8000000000,0
bar @ 7000000000,9
bar @ 3000000000,2
bar @ 3000000000,1
bar @ 3000000000,0
----
IsLowerBound(bar @ 9000000000,2, "") = true
IsLowerBound(bar @ 8000000000,2, "") = true
IsLowerBound(bar @ 8000000000,1, "") = true
IsLowerBound(bar @ 8000000000,0, "") = true
IsLowerBound(bar @ 7000000000,9, "") = true
IsLowerBound(bar @ 3000000000,2, "") = true
IsLowerBound(bar @ 3000000000,1, "") = true
IsLowerBound(bar @ 3000000000,0, "") = false

is-lower-bound synthetic-suffix=@8000000000,1
bar @ 9000000000,2
bar @ 8000000000,2
bar @ 8000000000,1
bar @ 8000000000,0
bar @ 7000000000,9
bar @ 3000000000,2
bar @ 3000000000,1
bar @ 3000000000,0
----
IsLowerBound(bar @ 9000000000,2, "8000000000,1") = true
IsLowerBound(bar @ 8000000000,2, "8000000000,1") = true
IsLowerBound(bar @ 8000000000,1, "8000000000,1") = true
IsLowerBound(bar @ 8000000000,0, "8000000000,1") = false
IsLowerBound(bar @ 7000000000,9, "8000000000,1") = false
IsLowerBound(bar @ 3000000000,2, "8000000000,1") = false
IsLowerBound(bar @ 3000000000,1, "8000000000,1") = false
IsLowerBound(bar @ 3000000000,0, "8000000000,1") = false

seek-ge
apple @ 2000000000,0
bar @ 4000000000,0
bar @ 3000000000,0
bar @ 2000000000,0
bax @ 3000000000,1
bax @ 3000000000,0
fax @ 9000000000,0
foo @ 3000000000,2
foo @ 3000000000,1
foo @ 3000000000,0
moo @ 3000000001,0
moo @ 3000000000,2
moo @ 3000000000,1
moo @ 3000000000,0
zoo @ 9000000000,0
----
SeekGE(apple @ 2000000000,0, boundRow=-1, searchDir=0) = (row=0, equalPrefix=false) [hex:6261720000000000b2d05e00000000010d]
SeekGE(bar @ 4000000000,0, boundRow=-1, searchDir=0) = (row=0, equalPrefix=true) [hex:6261720000000000b2d05e00000000010d]
SeekGE(bar @ 3000000000,0, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:6261780000000000b2d05e00000000010d]
SeekGE(bar @ 2000000000,0, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:6261780000000000b2d05e00000000010d]
SeekGE(bax @ 3000000000,1, boundRow=-1, searchDir=0) = (row=1, equalPrefix=true) [hex:6261780000000000b2d05e00000000010d]
SeekGE(bax @ 3000000000,0, boundRow=-1, searchDir=0) = (row=2, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(fax @ 9000000000,0, boundRow=-1, searchDir=0) = (row=2, equalPrefix=false) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(foo @ 3000000000,2, boundRow=-1, searchDir=0) = (row=2, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(foo @ 3000000000,1, boundRow=-1, searchDir=0) = (row=2, equalPrefix=true) [hex:666f6f0000000000b2d05e00000000010d]
SeekGE(foo @ 3000000000,0, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d]
SeekGE(moo @ 3000000001,0, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d]
SeekGE(moo @ 3000000000,2, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d]
SeekGE(moo @ 3000000000,1, boundRow=-1, searchDir=0) = (row=3, equalPrefix=true) [hex:6d6f6f0000000000b2d05e00000000010d]
SeekGE(moo @ 3000000000,0, boundRow=-1, searchDir=0) = (row=4, equalPrefix=true)
SeekGE(zoo @ 9000000000,0, boundRow=-1, searchDir=0) = (row=4, equalPrefix=false)

materialize-user-key
0
1
2
3
----
MaterializeUserKey(-1, 0) = hex:6261720000000000b2d05e00000000010d
MaterializeUserKey(0, 1) = hex:6261780000000000b2d05e00000000010d
MaterializeUserKey(1, 2) = hex:666f6f0000000000b2d05e00000000010d
MaterializeUserKey(2, 3) = hex:6d6f6f0000000000b2d05e00000000010d

materialize-user-key synthetic-suffix=@8000000000,9
0
1
2
3
----
MaterializeUserKeyWithSyntheticSuffix(-1, 0, 8000000000,9) = hex:6261720000000001dcd65000000000090d
MaterializeUserKeyWithSyntheticSuffix(0, 1, 8000000000,9) = hex:6261780000000001dcd65000000000090d
MaterializeUserKeyWithSyntheticSuffix(1, 2, 8000000000,9) = hex:666f6f0000000001dcd65000000000090d
MaterializeUserKeyWithSyntheticSuffix(2, 3, 8000000000,9) = hex:6d6f6f0000000001dcd65000000000090d

materialize-user-key
3
2
0
1
----
MaterializeUserKey(-1, 3) = hex:6d6f6f0000000000b2d05e00000000010d
MaterializeUserKey(3, 2) = hex:666f6f0000000000b2d05e00000000010d
MaterializeUserKey(2, 0) = hex:6261720000000000b2d05e00000000010d
MaterializeUserKey(0, 1) = hex:6261780000000000b2d05e00000000010d

define-block
moo @ 3000000001,0
moo @ 3000000000,2
moo @ 3000000000,1
moo @ 3000000000,0
----
Parse(moo @ 3000000001,0) = hex:6d6f6f0000000000b2d05e0109
Parse(moo @ 3000000000,2) = hex:6d6f6f0000000000b2d05e00000000020d
Parse(moo @ 3000000000,1) = hex:6d6f6f0000000000b2d05e00000000010d
Parse(moo @ 3000000000,0) = hex:6d6f6f0000000000b2d05e0009

materialize-user-key
0
1
2
3
----
MaterializeUserKey(-1, 0) = hex:6d6f6f0000000000b2d05e0109
MaterializeUserKey(0, 1) = hex:6d6f6f0000000000b2d05e00000000020d
MaterializeUserKey(1, 2) = hex:6d6f6f0000000000b2d05e00000000010d
MaterializeUserKey(2, 3) = hex:6d6f6f0000000000b2d05e0009

0 comments on commit 6eec284

Please sign in to comment.