internal/testkeys/testkeys.go

// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

// Package testkeys provides facilities for generating and comparing
// human-readable test keys for use in tests and benchmarks. This package
// provides a single Comparer implementation that compares all keys generated
// by this package.
//
// Keys generated by this package may optionally have a 'suffix' encoding an
// MVCC timestamp. This suffix is of the form "@t<integer>". Comparisons on the
// suffix are performed using integer value, not the byte representation.
package testkeys

import (
	"bytes"
	"fmt"
	"math"
	"strconv"

	"github.com/cockroachdb/pebble/internal/base"
	"go4.org/strutil"
)

const alpha = "abcdefghijklmnopqrstuvwxyz"

const suffixDelim = `@t`

var maxSuffixLen = len(suffixDelim) + len(fmt.Sprintf("%d", math.MaxInt64))

// Comparer is the comparer for test keys generated by this package.
var Comparer *base.Comparer = &base.Comparer{
	Compare: compare,
	Equal:   func(a, b []byte) bool { return compare(a, b) == 0 },
	AbbreviatedKey: func(k []byte) uint64 {
		return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
	},
	FormatKey: base.DefaultFormatter,
	Separator: func(dst, a, b []byte) []byte {
		ai := split(a)
		if ai == len(a) {
			return append(dst, a...)
		}
		bi := split(b)
		if bi == len(b) {
			return append(dst, a...)
		}

		// If the keys are the same just return a.
		if bytes.Equal(a[:ai], b[:bi]) {
			return append(dst, a...)
		}
		n := len(dst)
		dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
		// Did it pick a separator different than a[:ai] -- if not we can't do better than a.
		buf := dst[n:]
		if bytes.Equal(a[:ai], buf) {
			return append(dst[:n], a...)
		}
		// The separator is > a[:ai], so we only need to add the sentinel.
		return append(dst, 0)
	},
	Successor: func(dst, a []byte) []byte {
		ai := split(a)
		if ai == len(a) {
			return append(dst, a...)
		}
		n := len(dst)
		dst = base.DefaultComparer.Successor(dst, a[:ai])
		// Did it pick a successor different than a[:ai] -- if not we can't do better than a.
		buf := dst[n:]
		if bytes.Equal(a[:ai], buf) {
			return append(dst[:n], a...)
		}
		// The successor is > a[:ai], so we only need to add the sentinel.
		return append(dst, 0)
	},
	Split: split,
	Name:  "pebble.internal.testkeys",
}

func compare(a, b []byte) int {
	ai, bi := split(a), split(b)
	if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
		return v
	}

	if len(a[ai:]) == 0 {
		if len(b[bi:]) == 0 {
			return 0
		}
		return -1
	} else if len(b[bi:]) == 0 {
		return +1
	}
	return compareTimestamps(a[ai:], b[bi:])
}

func split(a []byte) int {
	i := bytes.LastIndex(a, []byte(suffixDelim))
	if i >= 0 {
		return i
	}
	return len(a)
}

func compareTimestamps(a, b []byte) int {
	ai, err := strutil.ParseUintBytes(bytes.TrimPrefix(a, []byte(suffixDelim)), 10, 64)
	if err != nil {
		panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
	}
	bi, err := strutil.ParseUintBytes(bytes.TrimPrefix(b, []byte(suffixDelim)), 10, 64)
	if err != nil {
		panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
	}
	switch {
	case ai < bi:
		return +1
	case ai > bi:
		return -1
	default:
		return 0
	}
}

// Keyspace describes a finite keyspace of unsuffixed test keys.
type Keyspace interface {
	// Count returns the number of keys that exist within this keyspace.
	Count() int

	// MaxLen returns the maximum length, in bytes, of a key within this
	// keyspace. This is only guaranteed to return an upper bound.
	MaxLen() int

	// Slice returns the sub-keyspace from index i, inclusive, to index j
	// exclusive.
	Slice(i, j int) Keyspace

	// EveryN returns a key space that includes 1 key for every N keys in the
	// original keyspace.
	EveryN(n int) Keyspace

	key(buf []byte, i int) int
}

// Alpha constructs a keyspace consisting of all keys containing characters a-z,
// with at most `maxLength` characters.
func Alpha(maxLength int) Keyspace {
	return alphabet{
		alphabet:  []byte(alpha),
		maxLength: maxLength,
		increment: 1,
	}
}

// KeyAt returns the i-th key within the keyspace with a suffix encoding the
// timestamp t.
func KeyAt(k Keyspace, i int, t int) []byte {
	b := make([]byte, k.MaxLen()+maxSuffixLen)
	return b[:WriteKeyAt(b, k, i, t)]
}

// WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
// suffix encoding the timestamp t suffix. It returns the number of bytes
// written.
func WriteKeyAt(dst []byte, k Keyspace, i int, t int) int {
	n := WriteKey(dst, k, i)
	n += WriteSuffix(dst[n:], t)
	return n
}

// Suffix returns the test keys suffix representation of timestamp t.
func Suffix(t int) []byte {
	b := make([]byte, maxSuffixLen)
	return b[:WriteSuffix(b, t)]
}

// WriteSuffix writes the test keys suffix representation of timestamp t to dst,
// returning the number of bytes written.
func WriteSuffix(dst []byte, t int) int {
	n := copy(dst, suffixDelim)
	n += len(strconv.AppendInt(dst[n:n], int64(t), 10))
	return n
}

// Key returns the i-th unsuffixed key within the keyspace.
func Key(k Keyspace, i int) []byte {
	b := make([]byte, k.MaxLen())
	return b[:k.key(b, i)]
}

// WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
// returns the number of bytes written.
func WriteKey(dst []byte, k Keyspace, i int) int {
	return k.key(dst, i)
}

type alphabet struct {
	alphabet  []byte
	maxLength int
	headSkip  int
	tailSkip  int
	increment int
}

func (a alphabet) Count() int {
	return (keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip) / a.increment
}

func (a alphabet) MaxLen() int {
	return a.maxLength
}

func (a alphabet) Slice(i, j int) Keyspace {
	s := a
	s.headSkip += i
	s.tailSkip += a.Count() - j
	return s
}

func (a alphabet) EveryN(n int) Keyspace {
	s := a
	s.increment *= n
	return s
}

func keyCount(n, l int) int {
	// The number of representable keys in the keyspace is a function of the
	// length of the alphabet n and the max key length l. Consider how the
	// number of representable keys grows as l increases:
	//
	// l = 1: n
	// l = 2: n + n^2
	// l = 3: n + n^2 + n^3
	// ...
	// Σ i=(1...l) n^i = n*(n^l - 1)/(n-1)
	return (n * (int(math.Pow(float64(n), float64(l))) - 1)) / (n - 1)
}

func (a alphabet) key(buf []byte, idx int) int {
	// This function generates keys of length 1..maxKeyLength, pulling
	// characters from the alphabet. The idx determines which key to generate,
	// generating the i-th lexicographically next key.
	//
	// The index to use is advanced by `headSkip`, allowing a keyspace to encode
	// a subregion of the keyspace.
	//
	// Eg, alphabet = `ab`, maxKeyLength = 3:
	//
	//           aaa aab     aba abb         baa bab     bba bbb
	//       aa          ab              ba          bb
	//   a                           b
	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
	//
	return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
		keyCount(len(a.alphabet), a.maxLength))
}

func generateAlphabetKey(buf, alphabet []byte, i, keyCount int) int {
	if keyCount == 0 || i > keyCount || i < 0 {
		return 0
	}

	// Of the keyCount keys in the generative keyspace, how many are there
	// starting with a particular character?
	keysPerCharacter := keyCount / len(alphabet)

	// Find the character that the key at index i starts with and set it.
	characterIdx := i / keysPerCharacter
	buf[0] = alphabet[characterIdx]

	// Consider characterIdx = 0, pointing to 'a'.
	//
	//           aaa aab     aba abb         baa bab     bba bbb
	//       aa          ab              ba          bb
	//   a                           b
	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
	//  \_________________________/
	//    |keysPerCharacter| keys
	//
	// In our recursive call, we reduce the problem to:
	//
	//           aaa aab     aba abb
	//       aa          ab
	//       0   1   2   3   4   5
	//     \________________________/
	//    |keysPerCharacter-1| keys
	//
	// In the subproblem, there are keysPerCharacter-1 keys (eliminating the
	// just 'a' key, plus any keys beginning with any other character).
	//
	// The index i is also offset, reduced by the count of keys beginning with
	// characters earlier in the alphabet (keysPerCharacter*characterIdx) and
	// the key consisting of just the 'a' (-1).
	i = i - keysPerCharacter*characterIdx - 1
	return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
}