From 7b847963ec288afe279988d686215ab3f6c18642 Mon Sep 17 00:00:00 2001 From: Wojciech Malota-Wojcik Date: Mon, 30 Sep 2024 17:06:33 +0200 Subject: [PATCH] Use in-memory blob to store nodes --- benchmark_test.go | 117 ------------------ quantum.go | 295 ++++++++++++++++++++++++---------------------- quantum_test.go | 51 ++++---- 3 files changed, 182 insertions(+), 281 deletions(-) delete mode 100644 benchmark_test.go diff --git a/benchmark_test.go b/benchmark_test.go deleted file mode 100644 index deab1cd..0000000 --- a/benchmark_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package quantum - -import ( - "crypto/rand" - mathrand "math/rand" - "testing" -) - -// go test -benchtime=10x -bench=BenchmarkQuantum -run=^$ -cpuprofile profile.out -// go tool pprof -http="localhost:8000" pprofbin ./profile.out - -type key struct { - store [32]byte - key [32]byte -} - -const ( - keysNum = 10_000_000 - loop1 = 1000 - loop2 = 30 -) - -var ( - keys []key - dbMap map[key]int - dbQuantum Snapshot[key, int] -) - -func init() { - keys = make([]key, 0, keysNum) - - for range cap(keys) { - var k key - _, _ = rand.Read(k.store[:]) - _, _ = rand.Read(k.key[:]) - keys = append(keys, k) - } - - dbMap = map[key]int{} - for i, k := range keys { - dbMap[k] = i - } - - dbQuantum = New[key, int]() - for i, k := range keys { - dbQuantum.Set(k, i) - } - - for i := len(keys) - 1; i > 0; i-- { - j := mathrand.Intn(i + 1) - keys[i], keys[j] = keys[j], keys[i] - } -} - -func BenchmarkMaps(b *testing.B) { - b.StopTimer() - b.ResetTimer() - - snapshot1 := map[key]int{} - snapshot2 := map[key]int{} - - for range b.N { - rands := make([]int, 0, loop1*loop2) - for range cap(rands) { - rands = append(rands, mathrand.Intn(len(keys))) - } - - var ri int - - b.StartTimer() - for range loop1 { - for range loop2 { - k := keys[rands[ri]] - ri++ - v2 := snapshot2[k] - v1 := snapshot1[k] - v := dbMap[k] - snapshot2[k] = v + v1 + v2 - } - for k, v := range snapshot2 { - snapshot1[k] = v - } - clear(snapshot2) - } - for k, v := range snapshot1 { - dbMap[k] = v - } - clear(snapshot1) - b.StopTimer() - } -} - -func BenchmarkQuantum(b *testing.B) { - b.StopTimer() - b.ResetTimer() - - for range b.N { - rands := make([]int, 0, loop1*loop2) - for range cap(rands) { - rands = append(rands, mathrand.Intn(len(keys))) - } - - var ri int - - b.StartTimer() - for range loop1 { - dbQuantum = dbQuantum.Next() - for range loop2 { - k := keys[rands[ri]] - ri++ - v, _ := dbQuantum.Get(k) - dbQuantum.Set(k, v) - } - } - b.StopTimer() - } -} diff --git a/quantum.go b/quantum.go index a8fbd21..3d5ee4d 100644 --- a/quantum.go +++ b/quantum.go @@ -1,53 +1,81 @@ package quantum import ( - "unsafe" - "github.com/cespare/xxhash" - "github.com/outofforest/mass" "github.com/outofforest/photon" ) -// FIXME (wojciech): reclaim abandoned nodes to save on heap allocations. - const ( bitsPerHop = 4 arraySize = 1 << bitsPerHop mask = arraySize - 1 + pageSize = 512 uint64Length = 8 ) -type pointerType byte +// State enumerates possible slot states. +type State byte const ( - freePointerType pointerType = iota - kvPointerType - nodePointerType + stateFree State = iota + stateData + statePointer ) +// Config stores configuration. +type Config struct { + TotalSize uint64 +} + +// NodeHeader is the header common to all node types. +type NodeHeader struct { + Version uint64 + HashMod uint64 +} + +// PointerNode is the node containing pointers to other nodes. +type PointerNode struct { + Header NodeHeader + States [arraySize]State + Pointers [arraySize]uint64 +} + +// DataItem stores single key-value pair. +type DataItem[K, V comparable] struct { + Hash uint64 + Key K + Value V +} + +// DataNode stores the data items. +type DataNode[K, V comparable] struct { + Header NodeHeader + States [arraySize]State + Items [arraySize]DataItem[K, V] +} + // New creates new quantum store. -func New[K comparable, V any]() Snapshot[K, V] { +func New[K, V comparable](config Config) Snapshot[K, V] { s := Snapshot[K, V]{ - rootNodeType: kvPointerType, + version: 0, + data: make([]byte, config.TotalSize), + rootNodeType: stateData, defaultValue: *new(V), - massNodes: mass.New[node[K, V]](1000), - massKVPairs: mass.New[kvPair[K, V]](1000), } - s.root = s.massNodes.New() return s } // Snapshot represents the state at particular point in time. -type Snapshot[K comparable, V any] struct { +type Snapshot[K, V comparable] struct { version uint64 - root *node[K, V] - rootNodeType pointerType - defaultValue V - hasher hasher[K] + rootNode uint64 + rootNodeType State hashMod uint64 - massNodes *mass.Mass[node[K, V]] - massKVPairs *mass.Mass[kvPair[K, V]] + defaultValue V + + data []byte + allocatedNodeIndex uint64 } // Next transitions to the next snapshot of the state. @@ -58,29 +86,35 @@ func (s Snapshot[K, V]) Next() Snapshot[K, V] { // Get gets the value of the key. func (s *Snapshot[K, V]) Get(key K) (value V, exists bool) { - h := s.hasher.Hash(key) + h := hashKey(key, 0) nType := s.rootNodeType - n := s.root + n := s.node(s.rootNode) for { - if n.hasher.bytes != nil { - h = n.hasher.Hash(key) + header := photon.NewFromBytes[NodeHeader](n) + + if header.V.HashMod > 0 { + h = hashKey(key, header.V.HashMod) } index := h & mask h >>= bitsPerHop - if n.Types[index] == freePointerType { - return s.defaultValue, false - } - switch nType { - case nodePointerType: - nType = n.Types[index] - n = n.Pointers[index] + case statePointer: + node := photon.NewFromBytes[PointerNode](n) + if node.V.States[index] == stateFree { + return s.defaultValue, false + } + nType = node.V.States[index] + n = s.node(node.V.Pointers[index]) default: - kv := n.KVs[index] - if kv.Hash == h && kv.Key == key { - return kv.Value, true + node := photon.NewFromBytes[DataNode[K, V]](n) + if node.V.States[index] == stateFree { + return s.defaultValue, false + } + item := node.V.Items[index] + if item.Hash == h && item.Key == key { + return item.Value, true } return s.defaultValue, false } @@ -89,179 +123,158 @@ func (s *Snapshot[K, V]) Get(key K) (value V, exists bool) { // Set sets the value for the key. func (s *Snapshot[K, V]) Set(key K, value V) { - h := s.hasher.Hash(key) + h := hashKey(key, 0) nType := s.rootNodeType - n := s.root + n := s.node(s.rootNode) - var parentNode *node[K, V] + var parentNode photon.Union[*PointerNode] var parentIndex uint64 for { - if n.Version < s.version { - n2 := s.massNodes.New() - n2.Version = s.version - n2.Types = n.Types - n2.hasher = n.hasher - if nType == kvPointerType { - n2.KVs = n.KVs - } else { - n2.Pointers = n.Pointers - } + header := photon.NewFromBytes[NodeHeader](n) + if header.V.Version < s.version { + newNodeIndex, newNodeData := s.allocateNode() + copy(newNodeData, n) + header = photon.NewFromBytes[NodeHeader](newNodeData) + header.V.Version = s.version + n = newNodeData switch { - case parentNode == nil: - s.root = n2 - n = s.root + case parentNode.V == nil: + s.rootNode = newNodeIndex default: - parentNode.Pointers[parentIndex] = n2 - n = parentNode.Pointers[parentIndex] + parentNode.V.Pointers[parentIndex] = newNodeIndex } } - if n.hasher.bytes != nil { - h = n.hasher.Hash(key) + if header.V.HashMod > 0 { + h = hashKey(key, header.V.HashMod) } index := h & mask h >>= bitsPerHop switch nType { - case nodePointerType: - if n.Types[index] == freePointerType { - n.Types[index] = kvPointerType - n.Pointers[index] = s.massNodes.New() - n.Version = s.version + case statePointer: + node := photon.NewFromBytes[PointerNode](n) + if node.V.States[index] == stateFree { + node.V.States[index] = stateData + nodeIndex, nodeData := s.allocateNode() + node.V.Pointers[index] = nodeIndex + + dataNode := photon.NewFromBytes[DataNode[K, V]](nodeData) + dataNode.V.Header.Version = s.version } parentIndex = index - parentNode = n - nType = n.Types[index] - n = n.Pointers[index] + parentNode = node + nType = node.V.States[index] + n = s.node(node.V.Pointers[index]) default: - if n.Types[index] == freePointerType { - n.Types[index] = kvPointerType - kv := s.massKVPairs.New() - kv.Hash = h - kv.Key = key - kv.Value = value - n.KVs[index] = kv + node := photon.NewFromBytes[DataNode[K, V]](n) + if node.V.States[index] == stateFree { + node.V.States[index] = stateData + node.V.Items[index] = DataItem[K, V]{ + Hash: h, + Key: key, + Value: value, + } return } - kv := n.KVs[index] + item := node.V.Items[index] var conflict bool - if kv.Hash == h { - if kv.Key == key { - kv2 := s.massKVPairs.New() - kv2.Hash = kv.Hash - kv2.Key = kv.Key - kv2.Value = value - n.KVs[index] = kv2 + if item.Hash == h { + if item.Key == key { + node.V.Items[index].Value = value return } // hash conflict - conflict = true } // conflict or split needed - n2 := s.massNodes.New() - n2.Version = s.version - n2.hasher = n.hasher + pointerNodeIndex, pointerNodeData := s.allocateNode() + pointerNode := photon.NewFromBytes[PointerNode](pointerNodeData) + pointerNode.V.Header = NodeHeader{ + Version: s.version, + HashMod: node.V.Header.HashMod, + } for i := range uint64(arraySize) { - if n.Types[i] == freePointerType { + if node.V.States[i] == stateFree { continue } - n2.Types[i] = kvPointerType - n2.Pointers[i] = s.massNodes.New() - n2.Pointers[i].Version = s.version + pointerNode.V.States[i] = stateData + dataNodeIndex, dataNodeData := s.allocateNode() + pointerNode.V.Pointers[i] = dataNodeIndex + dataNode := photon.NewFromBytes[DataNode[K, V]](dataNodeData) + dataNode.V.Header.Version = s.version - kv := n.KVs[i] + item := node.V.Items[i] var hash uint64 if conflict && i == index { s.hashMod++ - n2.Pointers[i].hasher = newHasher[K](s.hashMod) - hash = n2.Pointers[i].hasher.Hash(kv.Key) + dataNode.V.Header.HashMod = s.hashMod + hash = hashKey(item.Key, s.hashMod) } else { - hash = kv.Hash + hash = item.Hash } index := hash & mask - n2.Pointers[i].Types[index] = kvPointerType - - kv2 := s.massKVPairs.New() - kv2.Hash = hash >> bitsPerHop - kv2.Key = kv.Key - kv2.Value = kv.Value - n2.Pointers[i].KVs[index] = kv2 + dataNode.V.States[index] = stateData + dataNode.V.Items[index] = DataItem[K, V]{ + Hash: hash >> bitsPerHop, + Key: item.Key, + Value: item.Value, + } } - if parentNode == nil { - s.rootNodeType = nodePointerType - s.root = n2 - parentNode = s.root + if parentNode.V == nil { + s.rootNodeType = statePointer + s.rootNode = pointerNodeIndex } else { - parentNode.Types[parentIndex] = nodePointerType - parentNode.Pointers[parentIndex] = n2 - parentNode = parentNode.Pointers[parentIndex] + parentNode.V.States[parentIndex] = statePointer + parentNode.V.Pointers[parentIndex] = pointerNodeIndex } + parentNode = pointerNode parentIndex = index - n = n2.Pointers[index] + n = s.node(pointerNode.V.Pointers[index]) } } } -type kvPair[K comparable, V any] struct { - Hash uint64 - Key K - Value V -} - -type node[K comparable, V any] struct { - Version uint64 - hasher hasher[K] - - Types [arraySize]pointerType - KVs [arraySize]*kvPair[K, V] - Pointers [arraySize]*node[K, V] +func (s *Snapshot[K, V]) node(n uint64) []byte { + return s.data[n*pageSize : (n+1)*pageSize] } -func newHasher[K comparable](mod uint64) hasher[K] { - var k K - var bytes []byte - var data []byte - if mod > 0 { - bytes = make([]byte, uint64Length+unsafe.Sizeof(k)) - copy(bytes, photon.NewFromValue(&mod).B) - data = bytes[uint64Length:] - } +func (s *Snapshot[K, V]) allocateNode() (uint64, []byte) { + // FIXME (wojciech): Copy 0x00 bytes to allocated node. - return hasher[K]{ - bytes: bytes, - data: data, - } -} - -type hasher[K comparable] struct { - bytes []byte - data []byte + s.allocatedNodeIndex++ + return s.allocatedNodeIndex, s.node(s.allocatedNodeIndex) } -func (h hasher[K]) Hash(key K) uint64 { +func hashKey[K comparable](key K, hashMod uint64) uint64 { var hash uint64 - if h.bytes == nil { - hash = xxhash.Sum64(photon.NewFromValue[K](&key).B) + p := photon.NewFromValue[K](&key) + if hashMod == 0 { + hash = xxhash.Sum64(p.B) } else { - copy(h.data, photon.NewFromValue[K](&key).B) - hash = xxhash.Sum64(h.bytes) + // FIXME (wojciech): Remove heap allocation + b := make([]byte, uint64Length+len(p.B)) + copy(b, photon.NewFromValue(&hashMod).B) + copy(b[uint64Length:], photon.NewFromValue[K](&key).B) + hash = xxhash.Sum64(b) } + if isTesting { hash = testHash(hash) } + return hash } diff --git a/quantum_test.go b/quantum_test.go index 6f26910..dce089a 100644 --- a/quantum_test.go +++ b/quantum_test.go @@ -8,6 +8,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/outofforest/photon" ) var collisions = [][]int{ @@ -23,20 +25,20 @@ var collisions = [][]int{ {32134280, 33645087, 37005304, 83416269}, } -func TestCollisions(t *testing.T) { - var hasher hasher[int] +var config = Config{TotalSize: 10 * 1024 * 1024} +func TestCollisions(t *testing.T) { for _, set := range collisions { m := map[uint64]struct{}{} for _, i := range set { - m[hasher.Hash(i)] = struct{}{} + m[hashKey(i, 0)] = struct{}{} } assert.Len(t, m, 1) } } func TestSet(t *testing.T) { - s := New[int, int]() + s := New[int, int](config) for i := range 10 { s.Set(i, i) @@ -46,7 +48,7 @@ func TestSet(t *testing.T) { } func TestSetCollisions(t *testing.T) { - s := New[int, int]() + s := New[int, int](config) allValues := make([]int, 0, len(collisions)*len(collisions[0])) @@ -63,7 +65,7 @@ func TestSetCollisions(t *testing.T) { } func TestGetCollisions(t *testing.T) { - s := New[int, int]() + s := New[int, int](config) inserted := make([]int, 0, len(collisions)*len(collisions[0])) read := make([]int, 0, len(collisions)*len(collisions[0])) @@ -90,7 +92,7 @@ func TestGetCollisions(t *testing.T) { } func TestSetOnNext(t *testing.T) { - s := New[int, int]() + s := New[int, int](config) for i := range 10 { s.Set(i, i) @@ -106,7 +108,7 @@ func TestSetOnNext(t *testing.T) { } func TestGet(t *testing.T) { - s := New[int, int]() + s := New[int, int](config) for i := range 10 { s.Set(i, i) @@ -119,7 +121,7 @@ func TestGet(t *testing.T) { } func TestReplace(t *testing.T) { - s1 := New[int, int]() + s1 := New[int, int](config) for i := range 10 { s1.Set(i, i) @@ -160,11 +162,9 @@ func TestFindCollisions(t *testing.T) { fmt.Println("started") - var hasher hasher[int] - m := map[uint64][]int{} for i := range math.MaxInt { - h := hasher.Hash(i) + h := hashKey(i, 0) if h2 := m[h]; len(h2) == 4 { sort.Ints(h2) fmt.Printf("%#v\n", h2) @@ -176,8 +176,8 @@ func TestFindCollisions(t *testing.T) { func collect(s Snapshot[int, int]) []int { values := []int{} - typeStack := []pointerType{s.rootNodeType} - nodeStack := []*node[int, int]{s.root} + typeStack := []State{s.rootNodeType} + nodeStack := []uint64{s.rootNode} for { if len(nodeStack) == 0 { @@ -190,16 +190,21 @@ func collect(s Snapshot[int, int]) []int { typeStack = typeStack[:len(typeStack)-1] nodeStack = nodeStack[:len(nodeStack)-1] - for i := range arraySize { - if n.Types[i] == freePointerType { - continue + switch t { + case stateData: + node := photon.NewFromBytes[DataNode[int, int]](s.node(n)) + for i := range arraySize { + if node.V.States[i] == stateData { + values = append(values, node.V.Items[i].Value) + } } - switch t { - case kvPointerType: - values = append(values, n.KVs[i].Value) - default: - typeStack = append(typeStack, n.Types[i]) - nodeStack = append(nodeStack, n.Pointers[i]) + default: + node := photon.NewFromBytes[PointerNode](s.node(n)) + for i := range arraySize { + if node.V.States[i] != stateFree { + typeStack = append(typeStack, node.V.States[i]) + nodeStack = append(nodeStack, node.V.Pointers[i]) + } } } }