From bb89ddf8c43e67727c550a5799d45c2dc2dbd74a Mon Sep 17 00:00:00 2001 From: miyamoto <64457274+iammytoo@users.noreply.github.com> Date: Fri, 13 Sep 2024 12:03:47 +0900 Subject: [PATCH] feat: add usearch (#2608) * feat: add usearch * style: format code with Gofumpt and Prettier This commit fixes the style issues introduced in 58baee9 according to the output from Gofumpt and Prettier. Details: https://github.com/vdaas/vald/pull/2608 * feat: impl usearch istallation cmd for ci/base container * style: format code with Gofumpt and Prettier This commit fixes the style issues introduced in 938cc12 according to the output from Gofumpt and Prettier. Details: https://github.com/vdaas/vald/pull/2608 * add: multiple vector test * fix: add ldconfg to Makefile * refactor: covert switch to map --------- Co-authored-by: deepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com> Co-authored-by: Hiroto Funakoshi Co-authored-by: Kiichiro YUKAWA Co-authored-by: Yusuke Kato --- Makefile | 22 + dockers/ci/base/Dockerfile | 1 + go.mod | 1 + go.sum | 2 + hack/actions/gen/main.go | 1 + hack/docker/gen/main.go | 10 +- internal/core/algorithm/usearch/option.go | 156 +++++++ internal/core/algorithm/usearch/usearch.go | 251 ++++++++++++ .../core/algorithm/usearch/usearch_test.go | 379 ++++++++++++++++++ internal/errors/usearch.go | 32 ++ versions/USEARCH_VERSION | 1 + 11 files changed, 852 insertions(+), 4 deletions(-) create mode 100644 internal/core/algorithm/usearch/option.go create mode 100644 internal/core/algorithm/usearch/usearch.go create mode 100644 internal/core/algorithm/usearch/usearch_test.go create mode 100644 internal/errors/usearch.go create mode 100644 versions/USEARCH_VERSION diff --git a/Makefile b/Makefile index 42b9302bfc..644937332c 100644 --- a/Makefile +++ b/Makefile @@ -85,6 +85,7 @@ BUF_VERSION := $(eval BUF_VERSION := $(shell cat versions/BUF_VERS CMAKE_VERSION := $(eval CMAKE_VERSION := $(shell cat versions/CMAKE_VERSION))$(CMAKE_VERSION) DOCKER_VERSION := $(eval DOCKER_VERSION := $(shell cat versions/DOCKER_VERSION))$(DOCKER_VERSION) FAISS_VERSION := $(eval FAISS_VERSION := $(shell cat versions/FAISS_VERSION))$(FAISS_VERSION) +USEARCH_VERSION := $(eval USEARCH_VERSION := $(shell cat versions/USEARCH_VERSION))$(USEARCH_VERSION) GOLANGCILINT_VERSION := $(eval GOLANGCILINT_VERSION := $(shell cat versions/GOLANGCILINT_VERSION))$(GOLANGCILINT_VERSION) GO_VERSION := $(eval GO_VERSION := $(shell cat versions/GO_VERSION))$(GO_VERSION) HDF5_VERSION := $(eval HDF5_VERSION := $(shell cat versions/HDF5_VERSION))$(HDF5_VERSION) @@ -603,6 +604,11 @@ version/ngt: version/faiss: @echo $(FAISS_VERSION) +.PHONY: version/usearch +## print usearch version +version/usearch: + @echo $(USEARCH_VERSION) + .PHONY: version/docker ## print Kubernetes version version/docker: @@ -677,6 +683,22 @@ $(LIB_PATH)/libfaiss.a: rm -rf $(TEMP_DIR)/v$(FAISS_VERSION).tar.gz $(TEMP_DIR)/faiss-$(FAISS_VERSION) ldconfig +.PHONY: usearch/install +## install usearch +usearch/install: +ifeq ($(OS),linux) + curl -sSL https://github.com/unum-cloud/usearch/releases/download/v$(USEARCH_VERSION)/usearch_$(OS)_$(GOARCH)_$(USEARCH_VERSION).deb -o usearch_$(OS)_$(USEARCH_VERSION).deb + dpkg -i usearch_$(OS)_$(USEARCH_VERSION).deb + rm usearch_$(OS)_$(USEARCH_VERSION).deb + ldconfig +else ifeq ($(OS),macos) + curl -sSL https://github.com/unum-cloud/usearch/releases/download/v$(USEARCH_VERSION)/usearch_macos_$(GOARCH)_$(USEARCH_VERSION).zip -o usearch_macos_$(OS)_$(USEARCH_VERSION).zip + unzip usearch_macos_$(OS)_$(USEARCH_VERSION).zip + sudo mv libusearch_c.dylib /usr/local/lib && sudo mv usearch.h /usr/local/include + rm -rf usearch_macos_$(OS)_$(USEARCH_VERSION).zip + ldconfig +endif + .PHONY: cmake/install ## install CMAKE cmake/install: diff --git a/dockers/ci/base/Dockerfile b/dockers/ci/base/Dockerfile index 09fccb1e35..3ee2d9b83c 100644 --- a/dockers/ci/base/Dockerfile +++ b/dockers/ci/base/Dockerfile @@ -120,6 +120,7 @@ RUN --mount=type=bind,target=.,rw \ && make telepresence/install \ && make ngt/install \ && make faiss/install \ + && make usearch/install \ && rm -rf ${GOPATH}/src/github.com/${ORG}/${REPO}/* # skipcq: DOK-DL3002 USER root:root diff --git a/go.mod b/go.mod index 3e63824046..fea8003d5d 100644 --- a/go.mod +++ b/go.mod @@ -380,6 +380,7 @@ require ( github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/scylladb/gocqlx v0.0.0-00010101000000-000000000000 github.com/stretchr/testify v1.9.0 + github.com/unum-cloud/usearch/golang v0.0.0-20240828190432-b9a9758a06e1 github.com/zeebo/xxh3 v1.0.2 go.etcd.io/bbolt v1.3.8 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 diff --git a/go.sum b/go.sum index 309a52862a..3295469f7c 100644 --- a/go.sum +++ b/go.sum @@ -635,6 +635,8 @@ github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vl github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/unum-cloud/usearch/golang v0.0.0-20240828190432-b9a9758a06e1 h1:hILse+Dt0Sk6RfyG19Ld48kcdTOnHx2F6dm3QH1X4Mw= +github.com/unum-cloud/usearch/golang v0.0.0-20240828190432-b9a9758a06e1/go.mod h1:NxBpQibuBBeA/V8RGbrNzVAv4OyWWL5yNao7mVz656k= github.com/urfave/cli/v2 v2.4.0/go.mod h1:NX9W0zmTvedE5oDoOMs2RTC8RvdK98NTYZE5LbaEYPg= github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= diff --git a/hack/actions/gen/main.go b/hack/actions/gen/main.go index d5be92145f..269f6f1973 100644 --- a/hack/actions/gen/main.go +++ b/hack/actions/gen/main.go @@ -313,6 +313,7 @@ const ( rustVersionPath = versionsPath + "/RUST_VERSION" faissVersionPath = versionsPath + "/FAISS_VERSION" ngtVersionPath = versionsPath + "/NGT_VERSION" + usearchVersionPath = versionsPath + "/USEARCH_VERSION" makefilePath = "Makefile" makefileDirPath = "Makefile.d/**" diff --git a/hack/docker/gen/main.go b/hack/docker/gen/main.go index 7750270efc..3bc6c1bb9e 100644 --- a/hack/docker/gen/main.go +++ b/hack/docker/gen/main.go @@ -241,8 +241,9 @@ const ( agentInernalPackage = "pkg/agent/internal" - ngtPreprocess = "make ngt/install" - faissPreprocess = "make faiss/install" + ngtPreprocess = "make ngt/install" + faissPreprocess = "make faiss/install" + usearchPreprocess = "make usearch/install" helmOperatorRootdir = "/opt/helm" helmOperatorWatchFile = helmOperatorRootdir + "/watches.yaml" @@ -645,7 +646,7 @@ func main() { append(ngtBuildDeps, append(faissBuildDeps, devContainerDeps...)...)...)...), - Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess), + Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess, usearchPreprocess), Entrypoints: []string{"/bin/bash"}, }, "vald-dev-container": { @@ -663,7 +664,8 @@ func main() { Preprocess: append(devContainerPreprocess, append(ciContainerPreprocess, ngtPreprocess, - faissPreprocess)...), + faissPreprocess, + usearchPreprocess)...), }, "vald-buildbase": { AppName: "buildbase", diff --git a/internal/core/algorithm/usearch/option.go b/internal/core/algorithm/usearch/option.go new file mode 100644 index 0000000000..d4bf0061c8 --- /dev/null +++ b/internal/core/algorithm/usearch/option.go @@ -0,0 +1,156 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package usearch provides implementation of Go API for https://github.com/unum-cloud/usearch +package usearch + +import ( + "strconv" + "strings" + + "github.com/kpango/fastime" + core "github.com/unum-cloud/usearch/golang" + "github.com/vdaas/vald/internal/core/algorithm" + "github.com/vdaas/vald/internal/errors" +) + +// Option represents the functional option for usearch. +type Option func(*usearch) error + +var defaultOptions = []Option{ + WithIndexPath("/tmp/usearch-" + strconv.FormatInt(fastime.UnixNanoNow(), 10)), + WithQuantizationType("F32"), + WithMetricType("cosine"), + WithDimension(64), + WithConnectivity(0), + WithExpansionAdd(0), + WithExpansionSearch(0), + WithMulti(false), +} + +// WithIndexPath represents the option to set the index path for usearch. +func WithIndexPath(path string) Option { + return func(u *usearch) error { + if len(path) == 0 { + return errors.NewErrIgnoredOption("indexPath") + } + u.idxPath = path + return nil + } +} + +// WithQuantizationType represents the option to set the quantizationType for usearch. +func WithQuantizationType(quantizationType string) Option { + return func(u *usearch) error { + quantizationTypeMap := map[string]core.Quantization{ + "BF16": core.BF16, + "F16": core.F16, + "F32": core.F32, + "F64": core.F64, + "I8": core.I8, + "B1": core.B1, + } + if quantizationType, ok := quantizationTypeMap[quantizationType]; ok { + u.quantizationType = quantizationType + } else { + err := errors.NewUsearchError("unsupported QuantizationType") + return errors.NewErrCriticalOption("QuantizationType", quantizationType, err) + } + return nil + } +} + +// WithMetricType represents the option to set the metricType for usearch. +func WithMetricType(metricType string) Option { + return func(u *usearch) error { + metricTypeMap := map[string]core.Metric{ + "l2sq": core.L2sq, + "ip": core.InnerProduct, + "cosine": core.Cosine, + "haversine": core.Haversine, + "divergence": core.Divergence, + "pearson": core.Pearson, + "hamming": core.Hamming, + "tanimoto": core.Tanimoto, + "sorensen": core.Sorensen, + } + normalizedMetricType := strings.NewReplacer("-", "", "_", "", " ", "").Replace(strings.ToLower(metricType)) + if metricType, ok := metricTypeMap[normalizedMetricType]; ok { + u.metricType = metricType + } else { + err := errors.NewUsearchError("unsupported MetricType") + return errors.NewErrCriticalOption("MetricType", metricType, err) + } + return nil + } +} + +// WithDimension represents the option to set the dimension for usearch. +func WithDimension(dim int) Option { + return func(u *usearch) error { + if dim > algorithm.MaximumVectorDimensionSize || dim < algorithm.MinimumVectorDimensionSize { + err := errors.ErrInvalidDimensionSize(dim, algorithm.MaximumVectorDimensionSize) + return errors.NewErrCriticalOption("dimension", dim, err) + } + + u.dimension = uint(dim) + return nil + } +} + +// WithConnectivity represents the option to set the connectivity for usearch. +func WithConnectivity(connectivity int) Option { + return func(u *usearch) error { + if connectivity < 0 { + return errors.NewErrInvalidOption("Connectivity", connectivity) + } + + u.connectivity = uint(connectivity) + return nil + } +} + +// WithExpansionAdd represents the option to set the expansion add for usearch. +func WithExpansionAdd(expansionAdd int) Option { + return func(u *usearch) error { + if expansionAdd < 0 { + return errors.NewErrInvalidOption("Expansion Add", expansionAdd) + } + + u.expansionAdd = uint(expansionAdd) + return nil + } +} + +// WithExpansionSearch represents the option to set the expansion search for usearch. +func WithExpansionSearch(expansionSearch int) Option { + return func(u *usearch) error { + if expansionSearch < 0 { + return errors.NewErrInvalidOption("Expansion Search", expansionSearch) + } + + u.expansionSearch = uint(expansionSearch) + return nil + } +} + +// WithMulti represents the option to set the multi for usearch. +func WithMulti(multi bool) Option { + return func(u *usearch) error { + u.multi = multi + return nil + } +} diff --git a/internal/core/algorithm/usearch/usearch.go b/internal/core/algorithm/usearch/usearch.go new file mode 100644 index 0000000000..0d8c647f63 --- /dev/null +++ b/internal/core/algorithm/usearch/usearch.go @@ -0,0 +1,251 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package usearch provides Go API implementation for USearch library. https://github.com/unum-cloud/usearch +package usearch + +import ( + "strconv" + "sync" + + core "github.com/unum-cloud/usearch/golang" + "github.com/vdaas/vald/internal/core/algorithm" + "github.com/vdaas/vald/internal/errors" +) + +type ( + // Uuearch is the core interface for interacting with usearch index. + Usearch interface { + // SaveIndex saves the USearch index to storage. + SaveIndex() error + + // SaveIndexWithPath saves the USearch index to the specified path. + SaveIndexWithPath(path string) error + + // GetIndicesSize returns the number of vectors in index. + GetIndicesSize() (indicesSize int, err error) + + // Reserve reserves memory for vectors of given number of arg. + Reserve(vectorCount int) error + + // Add adds vectors to the USearch index and returns the total count. + Add(key uint64, vec []float32) error + + // Search performs a nearest neighbor search and returns the results. + Search(q []float32, k int) ([]algorithm.SearchResult, error) + + // GetObject retruns search result by id as []algorithm.SearchResult. + GetObject(key core.Key, count int) ([]float32, error) + + // Remove removes vectors from the index by key. + Remove(key uint64) error + + // Close frees the resources used by the USearch index. + Close() error + } + + usearch struct { + // index struct + index *core.Index + + // config + quantizationType core.Quantization + metricType core.Metric + dimension uint + connectivity uint + expansionAdd uint + expansionSearch uint + multi bool + + idxPath string + mu *sync.RWMutex + } +) + +// New initializes a new USearch instance with the provided options. +func New(opts ...Option) (Usearch, error) { + return gen(false, opts...) +} + +func Load(opts ...Option) (Usearch, error) { + return gen(true, opts...) +} + +func gen(isLoad bool, opts ...Option) (Usearch, error) { + var ( + u = new(usearch) + err error + ) + u.mu = new(sync.RWMutex) + + for _, opt := range append(defaultOptions, opts...) { + if err = opt(u); err != nil { + return nil, errors.NewUsearchError("usarch option error :" + err.Error()) + } + } + + if isLoad { + conf := core.DefaultConfig(uint(u.dimension)) + u.index, err = core.NewIndex(conf) + if err != nil { + return nil, errors.NewUsearchError("usearch new index error for load index") + } + + err = u.index.Load(u.idxPath) + if err != nil { + return nil, errors.NewUsearchError("usearch load index error") + } + } else { + options := core.DefaultConfig(u.dimension) + options.Quantization = u.quantizationType + options.Metric = u.metricType + options.Dimensions = u.dimension + options.Connectivity = u.connectivity + options.ExpansionAdd = u.expansionAdd + options.ExpansionSearch = u.expansionSearch + options.Multi = u.multi + + u.index, err = core.NewIndex(options) + if err != nil { + return nil, errors.NewUsearchError("usearch create index error") + } + } + + return u, nil +} + +// SaveIndex stores usearch index to storage. +func (u *usearch) SaveIndex() error { + u.mu.Lock() + defer u.mu.Unlock() + + err := u.index.Save(u.idxPath) + if err != nil { + return errors.NewUsearchError("usarch save index error") + } + return nil +} + +// SaveIndexWithPath stores usearch index to specified storage. +func (u *usearch) SaveIndexWithPath(idxPath string) error { + u.mu.Lock() + defer u.mu.Unlock() + + err := u.index.Save(idxPath) + if err != nil { + return errors.NewUsearchError("usarch save index with path error") + } + return nil +} + +// GetIndicesSize returns the number of vectors in index. +func (u *usearch) GetIndicesSize() (indicesSize int, err error) { + u.mu.Lock() + defer u.mu.Unlock() + size, err := u.index.Len() + if err != nil { + return -1, errors.NewUsearchError("failed to usearch_size") + } + return int(size), err +} + +// Add adds vectors to the index +func (u *usearch) Add(key core.Key, vec []float32) error { + if len(vec) != int(u.dimension) { + return errors.New("inconsistent dimensions") + } + + u.mu.Lock() + err := u.index.Add(key, vec) + defer u.mu.Unlock() + if err != nil { + return errors.NewUsearchError("failed to usearch_add") + } + return nil +} + +// Reserve reserves memory for vectors of given number of arg. +func (u *usearch) Reserve(vectorCount int) error { + u.mu.Lock() + err := u.index.Reserve(uint(vectorCount)) + defer u.mu.Unlock() + if err != nil { + return errors.NewUsearchError("failed to usearch_reserve") + } + return nil +} + +// Search returns search result as []algorithm.SearchResult. +func (u *usearch) Search(q []float32, k int) ([]algorithm.SearchResult, error) { + if len(q) != int(u.dimension) { + return nil, errors.ErrIncompatibleDimensionSize(len(q), int(u.dimension)) + } + u.mu.Lock() + I, D, err := u.index.Search(q, uint(k)) + u.mu.Unlock() + if err != nil { + return nil, errors.NewUsearchError("failed to usearch_search") + } + + if len(I) == 0 || len(D) == 0 { + return nil, errors.ErrEmptySearchResult + } + result := make([]algorithm.SearchResult, min(len(I), k)) + for i := range result { + result[i] = algorithm.SearchResult{ID: uint32(I[i]), Distance: D[i], Error: nil} + } + return result, nil +} + +// GetObject retruns search result by id as []algorithm.SearchResult. +func (u *usearch) GetObject(key core.Key, count int) ([]float32, error) { + u.mu.RLock() + vectors, err := u.index.Get(key, uint(count)) + u.mu.RUnlock() + if err != nil { + return nil, errors.NewUsearchError("failed to usearch_get") + } + + if vectors == nil { + return nil, errors.ErrObjectNotFound( + errors.NewUsearchError("failed to usearch_get"), strconv.Itoa(int(key)), + ) + } + + return vectors, nil +} + +// Remove removes from usearch index. +func (u *usearch) Remove(key core.Key) error { + u.mu.Lock() + err := u.index.Remove(key) + defer u.mu.Unlock() + if err != nil { + return errors.NewUsearchError("failed to usearch_remove") + } + + return nil +} + +// Close frees the resources associated with the USearch index. +func (u *usearch) Close() error { + err := u.index.Destroy() + if err != nil { + return errors.NewUsearchError("failed to usearch_free") + } + u.index = nil + return nil +} diff --git a/internal/core/algorithm/usearch/usearch_test.go b/internal/core/algorithm/usearch/usearch_test.go new file mode 100644 index 0000000000..8ec0f3941e --- /dev/null +++ b/internal/core/algorithm/usearch/usearch_test.go @@ -0,0 +1,379 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package usearch provides Go API implementation for USearch library. https://github.com/unum-cloud/usearch +package usearch + +import ( + "math" + "os" + "path/filepath" + "testing" + + "github.com/vdaas/vald/internal/core/algorithm" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/log" + "github.com/vdaas/vald/internal/log/logger" + "github.com/vdaas/vald/internal/test/comparator" + "github.com/vdaas/vald/internal/test/goleak" +) + +var ( + usearchComparator = []comparator.Option{ + comparator.AllowUnexported(usearch{}), + comparator.RWMutexComparer, + comparator.ErrorComparer, + comparator.AtomicUint64Comparator, + } + + searchResultComparator = []comparator.Option{ + comparator.CompareField("Distance", comparator.Comparer(func(s1, s2 float32) bool { + if s1 == 0 { // if vec1 is same as vec2, the distance should be same + return s2 == 0 + } + // by setting non-zero value in test case, it will only check if both got/want is non-zero + return s1 != 0 && s2 != 0 + })), + } + + defaultAfterFunc = func(t *testing.T, u Usearch) error { + t.Helper() + + if u == nil { + return nil + } + + u.Close() + return nil + } +) + +func idxTempDir(t *testing.T) string { + t.Helper() + return filepath.Join(t.TempDir(), "index") +} + +func TestMain(m *testing.M) { + log.Init(log.WithLoggerType(logger.NOP.String())) + os.Exit(m.Run()) +} + +func Test_usearch_Search(t *testing.T) { + type args struct { + q []float32 + k int + } + type fields struct { + idxPath string + quantizationType string + metricType string + dimension int + connectivity int + expansionAdd int + expansionSearch int + multi bool + } + type want struct { + want []algorithm.SearchResult + err error + } + type test struct { + name string + args args + fields fields + createFunc func(t *testing.T, fields fields) (Usearch, error) + want want + checkFunc func(want, []algorithm.SearchResult, Usearch, error) error + beforeFunc func(args) + afterFunc func(*testing.T, Usearch) error + } + defaultCreateFunc := func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + + return New( + WithIndexPath(fields.idxPath), + WithQuantizationType(fields.quantizationType), + WithMetricType(fields.metricType), + WithDimension(fields.dimension), + WithConnectivity(fields.connectivity), + WithExpansionAdd(fields.expansionAdd), + WithExpansionSearch(fields.expansionSearch), + WithMulti(fields.multi), + ) + } + defaultCheckFunc := func(w want, got []algorithm.SearchResult, n Usearch, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + } + if diff := comparator.Diff(got, w.want, searchResultComparator...); diff != "" { + return errors.Errorf("diff: %s", diff) + } + + return nil + } + insertCreateFunc := func(t *testing.T, fields fields, vecs [][]float32, poolSize uint32) (Usearch, error) { // create func with insert/index + t.Helper() + + u, err := defaultCreateFunc(t, fields) + if err != nil { + return nil, err + } + + err = u.Reserve(int(poolSize)) + if err != nil { + return nil, err + } + + for i, v := range vecs { + if err := u.Add(uint64(i+1), v); err != nil { + t.Error(err) + return nil, err + } + } + + return u, nil + } + tests := []test{ + { + name: "return vector id after the same vector inserted", + args: args{ + q: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + vec := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{vec}, 1) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(1), Distance: 0}, + }, + }, + }, + { + name: "resturn vector id after the nearby vector inserted", + args: args{ + q: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + iv := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{iv}, 1) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(1), Distance: 1}, + }, + }, + }, + { + name: "return limited result after insert 10 vectors with limited size 3", + args: args{ + q: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 3, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + ivs := [][]float32{ // insert 10 vec + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {2, 3, 4, 5, 6, 7, 8, 9, math.MaxFloat32}, + } + + return insertCreateFunc(t, fields, ivs, 10) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(10), Distance: 3}, + {ID: uint32(9), Distance: 3}, + {ID: uint32(8), Distance: 3}, + }, + }, + }, + { + name: "return most accurate result after insert 10 vectors with limited size 5", + args: args{ + q: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + ivs := [][]float32{ + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // vec id 1 + {2, 3, 4, 5, 6, 7, 8, 9, 10}, // vec id 2 + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // vec id 3 + {2, 3, 4, 5, 6, 7, 8, 9, 10}, // vec id 4 + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // vec id 5 + {2, 3, 4, 5, 6, 7, 8, 9, 10}, // vec id 6 + {2, 3, 4, 5, 6, 7, 8, 9, 9.04}, // vec id 7 + {2, 3, 4, 5, 6, 7, 8, 9, 9.03}, // vec id 8 + {1, 2, 3, 4, 5, 6, 7, 8, 9.01}, // vec id 9 + {1, 2, 3, 4, 5, 6, 7, 8, 9.02}, // vec id 10 + } + + return insertCreateFunc(t, fields, ivs, 10) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(9), Distance: 2.384185791015625e-07}, + {ID: uint32(10), Distance: 5.364418029785156e-07}, + {ID: uint32(6), Distance: 3}, + {ID: uint32(4), Distance: 3}, + {ID: uint32(2), Distance: 3}, + }, + }, + }, + { + name: "return nothing if the search dimension is less than the inserted vector", + args: args{ + q: []float32{0, 1, 2, 3, 4, 5, 6, 7}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + vec := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{vec}, 1) + }, + want: want{ + err: errors.New("incompatible dimension size detected\trequested: 8,\tconfigured: 9"), + }, + }, + { + name: "return nothing if the search dimension is more than the inserted vector", + args: args{ + q: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + vec := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{vec}, 1) + }, + want: want{ + err: errors.New("incompatible dimension size detected\trequested: 10,\tconfigured: 9"), + }, + }, + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc == nil { + test.afterFunc = defaultAfterFunc + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + if test.createFunc == nil { + test.createFunc = defaultCreateFunc + } + + u, err := test.createFunc(tt, test.fields) + if err != nil { + tt.Fatal(err) + } + + got, err := u.Search(test.args.q, test.args.k) + if err := checkFunc(test.want, got, u, err); err != nil { + tt.Errorf("error = %v", err) + } + + if err := test.afterFunc(tt, u); err != nil { + tt.Error(err) + } + }) + } +} diff --git a/internal/errors/usearch.go b/internal/errors/usearch.go new file mode 100644 index 0000000000..4b065a0f35 --- /dev/null +++ b/internal/errors/usearch.go @@ -0,0 +1,32 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package errors provides error types and function +package errors + +type UsearchError struct { + Msg string +} + +func NewUsearchError(msg string) error { + return UsearchError{ + Msg: msg, + } +} + +func (u UsearchError) Error() string { + return u.Msg +} diff --git a/versions/USEARCH_VERSION b/versions/USEARCH_VERSION new file mode 100644 index 0000000000..d91346fd9e --- /dev/null +++ b/versions/USEARCH_VERSION @@ -0,0 +1 @@ +2.15.1 \ No newline at end of file