From 2d1328f7e069bb34da801e414a1f452dff4f9ae8 Mon Sep 17 00:00:00 2001 From: Wojciech Malota-Wojcik Date: Tue, 10 Sep 2024 13:52:36 +0200 Subject: [PATCH] Binary snapshottable tree --- build/go.mod | 4 +- build/go.sum | 8 +- go.mod | 13 +++ go.sum | 20 +++++ quantum.go | 213 ++++++++++++++++++++++++++++++++++++++++++++++++ quantum_test.go | 195 ++++++++++++++++++++++++++++++++++++++++++++ switch.go | 5 ++ switchtest.go | 5 ++ 8 files changed, 457 insertions(+), 6 deletions(-) create mode 100644 quantum.go create mode 100644 quantum_test.go create mode 100644 switch.go create mode 100644 switchtest.go diff --git a/build/go.mod b/build/go.mod index f76a230..e60f3a9 100644 --- a/build/go.mod +++ b/build/go.mod @@ -4,7 +4,7 @@ go 1.22 require ( github.com/outofforest/build/v2 v2.2.0 - github.com/outofforest/tools v1.0.2 + github.com/outofforest/tools v1.0.4 ) require ( @@ -19,5 +19,5 @@ require ( github.com/spf13/pflag v1.0.5 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/text v0.17.0 // indirect + golang.org/x/text v0.18.0 // indirect ) diff --git a/build/go.sum b/build/go.sum index 48faca1..13a7ab8 100644 --- a/build/go.sum +++ b/build/go.sum @@ -19,8 +19,8 @@ github.com/outofforest/parallel v0.2.3 h1:DRIgHr7XTL4LLgsTqrj041kulv4ajtbCkRbkOG github.com/outofforest/parallel v0.2.3/go.mod h1:cu210xIjJtOMXR2ERzEcNA2kr0Z0xfZjSKw2jTxAQ2E= github.com/outofforest/run v0.6.0 h1:t/3vAodvU5L5vJ3BB0qRgfviX+T3JJmLgPN6G2WQs3U= github.com/outofforest/run v0.6.0/go.mod h1:l7TAtA/zG+7JEH017Qrfm7gzmMyM0gjd+CereJdJSpI= -github.com/outofforest/tools v1.0.2 h1:EE+rasbu44NEX0ML7HtyNaeKdBT3CwEEURKMxGgJgoM= -github.com/outofforest/tools v1.0.2/go.mod h1:fn8F7Z4F4SdyWCSIQIf2NvHbN08fDCum+aDVEbxSPUs= +github.com/outofforest/tools v1.0.4 h1:PdZVOeEbHDwQoVMifrOFz0CzTGDxDNBvv295Z2A5mps= +github.com/outofforest/tools v1.0.4/go.mod h1:fn8F7Z4F4SdyWCSIQIf2NvHbN08fDCum+aDVEbxSPUs= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -73,8 +73,8 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= diff --git a/go.mod b/go.mod index 9168754..d24ee3a 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,16 @@ module github.com/outofforest/quantum go 1.22 + +require ( + github.com/cespare/xxhash v1.1.0 + github.com/outofforest/photon v0.5.0 + github.com/stretchr/testify v1.8.4 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum index e69de29..089c420 100644 --- a/go.sum +++ b/go.sum @@ -0,0 +1,20 @@ +github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/outofforest/photon v0.5.0 h1:7Mq92+Dwj7TPOIZzbwOYBe05OLOP0d7GtRFvOGaU000= +github.com/outofforest/photon v0.5.0/go.mod h1:4qOhLdJ3jiXj7umpt57hCGs5T+p3LX9QdpkipX4YDy4= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/quantum.go b/quantum.go new file mode 100644 index 0000000..c6f5cb7 --- /dev/null +++ b/quantum.go @@ -0,0 +1,213 @@ +package quantum + +import ( + "unsafe" + + "github.com/cespare/xxhash" + + "github.com/outofforest/photon" +) + +// FIXME (wojciech): avoid individual heap allocations for nodes. + +// New creates new quantum store. +func New[K comparable, V any]() Snapshot[K, V] { + return Snapshot[K, V]{ + root: new(node[K, V]), + rootSet: new(bool), + defaultValue: *new(V), + } +} + +// Snapshot represents the state at particular point in time. +type Snapshot[K comparable, V any] struct { + version uint64 + root *node[K, V] + rootSet *bool + defaultValue V + hasher hasher[K] + hashMod uint64 +} + +// Next transitions to the next snapshot of the state. +func (s Snapshot[K, V]) Next() Snapshot[K, V] { + s.version++ + + r := *s.root + s.root = &r + + rs := *s.rootSet + s.rootSet = &rs + + return s +} + +// Get gets the value of the key. +func (s Snapshot[K, V]) Get(key K) (value V, exists bool) { + h := s.hasher.Hash(key) + n := s.root + for { + if n == nil { + return s.defaultValue, false + } + if n.Hash == h { + if n.Key == key { + return n.Value, true + } + + // conflict + if s.hasher.bytes == nil { + s.hashMod++ + n.hasher = newHasher[K](s.hashMod) + } + h = n.hasher.Hash(key) + } + + bit := h & 0x01 + h >>= 1 + + switch bit { + case 0x00: + n = n.Left + default: + n = n.Right + } + } +} + +// Set sets the value for the key. +func (s Snapshot[K, V]) Set(key K, value V) { + const ( + leftChild int = iota + rightChild + ) + + h := s.hasher.Hash(key) + + if !*s.rootSet { + *s.root = node[K, V]{ + Value: value, + Key: key, + Version: s.version, + Hash: h, + } + *s.rootSet = true + return + } + + var parentNode *node[K, V] + var child int + n := s.root + for { + if n == nil { + n = &node[K, V]{ + Value: value, + Key: key, + Version: s.version, + Hash: h, + } + + if child == leftChild { + parentNode.Left = n + } else { + parentNode.Right = n + } + return + } + if n.Version < s.version { + n2 := *n + n2.Version = s.version + + switch { + case parentNode == nil: + *s.root = n2 + n = s.root + case child == leftChild: + n = &n2 + parentNode.Left = n + default: + n = &n2 + parentNode.Right = n + } + } + if n.Hash == h { + if n.Key == key { + n.Value = value + return + } + + // conflict + if s.hasher.bytes == nil { + s.hashMod++ + n.hasher = newHasher[K](s.hashMod) + } + h = n.hasher.Hash(key) + } + + bit := h & 0x01 + h >>= 1 + parentNode = n + + switch bit { + case 0x00: + n = n.Left + child = leftChild + case 0x01: + n = n.Right + child = rightChild + } + } +} + +type node[K comparable, V any] struct { + Key K + Value V + + Version uint64 + Hash uint64 + Left *node[K, V] + Right *node[K, V] + + hasher hasher[K] +} + +const uint64Length = 8 + +func newHasher[K comparable](mod uint64) hasher[K] { + var k K + var bytes []byte + var data []byte + if mod > 0 { + bytes = make([]byte, uint64Length+unsafe.Sizeof(k)) + copy(bytes, photon.NewFromValue(&mod).B) + data = bytes[uint64Length:] + } + + return hasher[K]{ + bytes: bytes, + data: data, + } +} + +type hasher[K comparable] struct { + bytes []byte + data []byte +} + +func (h hasher[K]) Hash(key K) uint64 { + var hash uint64 + if h.bytes == nil { + hash = xxhash.Sum64(photon.NewFromValue[K](&key).B) + } else { + copy(h.data, photon.NewFromValue[K](&key).B) + hash = xxhash.Sum64(h.data) + } + if isTesting { + hash = testHash(hash) + } + return hash +} + +func testHash(hash uint64) uint64 { + return hash & 0x7fffffff +} diff --git a/quantum_test.go b/quantum_test.go new file mode 100644 index 0000000..b2851a5 --- /dev/null +++ b/quantum_test.go @@ -0,0 +1,195 @@ +package quantum + +import ( + "fmt" + "math" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var collisions = [][]int{ + {15691551, 62234586, 76498628, 79645586}, + {6417226, 8828927, 78061179, 87384387}, + {9379853, 15271236, 26924827, 39742852}, + {71180670, 73568605, 96077640, 100118418}, + {11317952, 69053141, 82160848, 112455075}, + {33680651, 34881710, 52672514, 56033413}, + {635351, 7564491, 43998577, 77923294}, + {15069177, 60348274, 84185567, 116299206}, + {43622549, 93531002, 108158183, 115087013}, + {32134280, 33645087, 37005304, 83416269}, +} + +func TestCollisions(t *testing.T) { + var hasher hasher[int] + + for _, set := range collisions { + m := map[uint64]struct{}{} + for _, i := range set { + m[hasher.Hash(i)] = struct{}{} + } + assert.Len(t, m, 1) + } +} + +func TestSet(t *testing.T) { + s := New[int, int]() + + for i := range 10 { + s.Set(i, i) + } + + require.Equal(t, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, collect(s)) +} + +func TestSetCollisions(t *testing.T) { + s := New[int, int]() + + allValues := make([]int, 0, len(collisions)*len(collisions[0])) + + for _, set := range collisions { + for _, i := range set { + allValues = append(allValues, i) + s.Set(i, i) + } + } + + sort.Ints(allValues) + + require.Equal(t, allValues, collect(s)) +} + +func TestGetCollisions(t *testing.T) { + s := New[int, int]() + + inserted := make([]int, 0, len(collisions)*len(collisions[0])) + read := make([]int, 0, len(collisions)*len(collisions[0])) + + for _, set := range collisions { + for _, i := range set { + inserted = append(inserted, i) + s.Set(i, i) + } + } + for _, set := range collisions { + for _, i := range set { + v, _ := s.Get(i) + read = append(read, v) + } + } + + sort.Ints(inserted) + sort.Ints(read) + + require.Equal(t, inserted, read) +} + +func TestSetOnNext(t *testing.T) { + s := New[int, int]() + + for i := range 10 { + s.Set(i, i) + } + + s2 := s.Next() + for i := range 5 { + s2.Set(i, i+10) + } + + require.Equal(t, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, collect(s)) + require.Equal(t, []int{5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, collect(s2)) +} + +func TestGet(t *testing.T) { + s := New[int, int]() + + for i := range 10 { + s.Set(i, i) + } + for i := range 10 { + v, exists := s.Get(i) + require.True(t, exists) + require.Equal(t, i, v) + } +} + +func TestReplace(t *testing.T) { + s1 := New[int, int]() + + for i := range 10 { + s1.Set(i, i) + } + + s2 := s1.Next() + + for i, j := 0, 10; i < 5; i, j = i+1, j+1 { + s2.Set(i, j) + } + + for i := range 10 { + v, exists := s1.Get(i) + require.True(t, exists) + require.Equal(t, i, v) + } + + for i := range 5 { + v, exists := s2.Get(i) + require.True(t, exists) + require.Equal(t, i+10, v) + } + + for i := 5; i < 10; i++ { + v, exists := s2.Get(i) + require.True(t, exists) + require.Equal(t, i, v) + } +} + +// go test -run=TestFindCollisions -v -tags=testing . + +func TestFindCollisions(t *testing.T) { + // Remove SkipNow and use command + // go test -run=TestFindCollisions -v -tags=testing . + // to generate integers with colliding hashes. + t.SkipNow() + + fmt.Println("started") + + var hasher hasher[int] + + m := map[uint64][]int{} + for i := range math.MaxInt { + h := hasher.Hash(i) + if h2 := m[h]; len(h2) == 4 { + sort.Ints(h2) + fmt.Printf("%#v\n", h2) + } else { + m[h] = append(m[h], i) + } + } +} + +func collect(s Snapshot[int, int]) []int { + values := []int{} + stack := []*node[int, int]{s.root} + + for { + if len(stack) == 0 { + sort.Ints(values) + return values + } + + n := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if n == nil { + continue + } + + values = append(values, n.Value) + stack = append(stack, n.Right, n.Left) + } +} diff --git a/switch.go b/switch.go new file mode 100644 index 0000000..d476f6a --- /dev/null +++ b/switch.go @@ -0,0 +1,5 @@ +//go:build !testing + +package quantum + +const isTesting = false diff --git a/switchtest.go b/switchtest.go new file mode 100644 index 0000000..fa003a2 --- /dev/null +++ b/switchtest.go @@ -0,0 +1,5 @@ +//go:build testing + +package quantum + +const isTesting = true