From c3e2cb3c11f311851ad3c9488ff7f874d038f43d Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Wed, 5 Aug 2020 18:21:35 +0900 Subject: [PATCH 01/15] implement billion scale data loader Signed-off-by: Kosuke Morimoto --- Makefile | 25 + Makefile.d/bench.mk | 40 ++ .../benchmark/assets/large/dataset/.gitignore | 2 + hack/benchmark/assets/x1b/loader.go | 195 ++++++++ .../benchmark/assets/x1b/loader_test_bench.go | 135 ++++++ pkg/tools/cli/loadtest/assets/dataset.go | 445 ++---------------- pkg/tools/cli/loadtest/assets/dataset_test.go | 2 +- .../assets/{loader.go => hdf5_loader.go} | 68 +-- .../{loader_test.go => hdf5_loader_test.go} | 0 .../cli/loadtest/assets/large_dataset.go | 124 +++++ .../cli/loadtest/assets/small_dataset.go | 211 +++++++++ 11 files changed, 762 insertions(+), 485 deletions(-) create mode 100644 hack/benchmark/assets/large/dataset/.gitignore create mode 100644 hack/benchmark/assets/x1b/loader.go create mode 100644 hack/benchmark/assets/x1b/loader_test_bench.go rename pkg/tools/cli/loadtest/assets/{loader.go => hdf5_loader.go} (68%) rename pkg/tools/cli/loadtest/assets/{loader_test.go => hdf5_loader_test.go} (100%) create mode 100644 pkg/tools/cli/loadtest/assets/large_dataset.go create mode 100644 pkg/tools/cli/loadtest/assets/small_dataset.go diff --git a/Makefile b/Makefile index 069e7c11f4..b2614d3c0d 100644 --- a/Makefile +++ b/Makefile @@ -93,6 +93,31 @@ CXXFLAGS ?= $(CFLAGS) BENCH_DATASET_MD5S := $(eval BENCH_DATASET_MD5S := $(shell find $(BENCH_DATASET_MD5_DIR) -type f -regex ".*\.md5"))$(BENCH_DATASET_MD5S) BENCH_DATASETS = $(BENCH_DATASET_MD5S:$(BENCH_DATASET_MD5_DIR)/%.md5=$(BENCH_DATASET_HDF5_DIR)/%.hdf5) +BENCH_LARGE_DATASET_BASE_DIR = $(BENCH_DATASET_BASE_DIR)/large/dataset + +SIFT1B_ROOT_DIR = $(BENCH_LARGE_DATASET_BASE_DIR)/sift1b + +SIFT1B_BASE_FILE = $(SIFT1B_ROOT_DIR)/bigann_base.bvecs +SIFT1B_LEARN_FILE = $(SIFT1B_ROOT_DIR)/bigann_learn.bvecs +SIFT1B_QUERY_FILE = $(SIFT1B_ROOT_DIR)/bigann_query.bvecs +SIFT1B_GROUNDTRUTH_DIR = $(SIFT1B_ROOT_DIR)/gnd + +SIFT1B_BASE_URL = ftp://ftp.irisa.fr/local/texmex/corpus/ + +DEEP1B_ROOT_DIR = $(BENCH_LARGE_DATASET_BASE_DIR)/deep1b + +DEEP1B_BASE_FILE = $(DEEP1B_ROOT_DIR)/deep1B_base.fvecs +DEEP1B_LEARN_FILE = $(DEEP1B_ROOT_DIR)/deep1B_learn.fvecs +DEEP1B_QUERY_FILE = $(DEEP1B_ROOT_DIR)/deep1B_queries.fvecs +DEEP1B_GROUNDTRUTH_FILE = $(DEEP1B_ROOT_DIR)/deep1B_groundtruth.ivecs + +DEEP1B_BASE_DIR = $(DEEP1B_ROOT_DIR)/base +DEEP1B_BASE_CHUNK_FILES = $(shell printf "$(DEEP1B_BASE_DIR)/base_%02d\n" {0..36}) +DEEP1B_LEARN_DIR = $(DEEP1B_ROOT_DIR)/learn +DEEP1B_LEARN_CHUNK_FILES = $(shell printf "$(DEEP1B_LEARN_DIR)/learn_%02d\n" {0..13}) + +DEEP1B_API_URL = https://cloud-api.yandex.net/v1/disk/public/resources/download?public_key=https://yadi.sk/d/11eDCm7Dsn9GA&path= + DATASET_ARGS ?= identity-128 ADDRESS_ARGS ?= "" diff --git a/Makefile.d/bench.mk b/Makefile.d/bench.mk index da6113f1f8..f980c19ab0 100644 --- a/Makefile.d/bench.mk +++ b/Makefile.d/bench.mk @@ -24,6 +24,24 @@ $(BENCH_DATASET_HDF5_DIR): $(call mkdir, $@) $(call rm, -rf, $@/*) +%.large_dataset_dir: + @test -f $* || mkdir -p $* + +$(SIFT1B_BASE_FILE) $(SIFT1B_LEARN_FILE) $(SIFT1B_QUERY_FILE): | $(SIFT1B_ROOT_DIR).large_dataset_dir + test -f $@ || curl -fsSL $(SIFT1B_BASE_URL)$(subst $(SIFT1B_ROOT_DIR)/,,$@).gz | gunzip -d > $@ + +$(SIFT1B_GROUNDTRUTH_DIR): | $(SIFT1B_ROOT_DIR).large_dataset_dir + test -f $@ || curl -fsSL $(SIFT1B_BASE_URL)bigann_gnd.tar.gz | tar -C $(SIFT1B_ROOT_DIR) -zx + +$(DEEP1B_GROUNDTRUTH_FILE) $(DEEP1B_QUERY_FILE) $(DEEP1B_BASE_CHUNK_FILES) $(DEEP1B_LEARN_CHUNK_FILES): | $(DEEP1B_ROOT_DIR).large_dataset_dir + test -f $@ || curl -fsSL "$(shell curl -fsSL "$(DEEP1B_API_URL)$(subst $(DEEP1B_ROOT_DIR),,$@)" | sed -e 's/^{\(.*\)}$$/\1/' | tr ',' '\n' | grep href | cut -d ':' -f 2- | tr -d '"')" -o $@ + +$(DEEP1B_BASE_FILE): | $(DEEP1B_BASE_DIR).large_dataset_dir $(DEEP1B_BASE_CHUNK_FILES) + cat $(DEEP1B_BASE_CHUNK_FILES) > $@ + +$(DEEP1B_LEARN_FILE): | $(DEEP1B_LEARN_DIR).large_dataset_dir $(DEEP1B_LEARN_CHUNK_FILES) + cat $(DEEP1B_LEARN_CHUNK_FILES) > $@ + .PHONY: bench/datasets ## fetch datasets for benchmark bench/datasets: $(BENCH_DATASETS) @@ -45,6 +63,28 @@ bench/datasets/md5dir/print: bench/datasets/hdf5dir/print: @echo $(BENCH_DATASET_HDF5_DIR) +.PHONY: bench/dataset/large +## fetch large dataset for benchmark +bench/dataset/large: \ + bench/dataset/large/sift1b \ + bench/dataset/large/deep1b + +.PHONY: bench/dataset/large/sift1b +## fetch sift1b dataset for benchmark +bench/dataset/large/sift1b: \ + $(SIFT1B_BASE_FILE) \ + $(SIFT1B_LEARN_FILE) \ + $(SIFT1B_QUERY_FILE) \ + $(SIFT1B_GROUNDTRUTH_DIR) + +.PHONY: bench/dataset/large/deep1b +## fetch deep1b dataset for benchmark +bench/dataset/large/deep1b: \ + $(DEEP1B_BASE_FILE) \ + $(DEEP1B_LEARN_FILE) \ + $(DEEP1B_QUERY_FILE) \ + $(DEEP1B_GROUNDTRUTH_FILE) + .PHONY: bench ## run all benchmarks bench: \ diff --git a/hack/benchmark/assets/large/dataset/.gitignore b/hack/benchmark/assets/large/dataset/.gitignore new file mode 100644 index 0000000000..c96a04f008 --- /dev/null +++ b/hack/benchmark/assets/large/dataset/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/hack/benchmark/assets/x1b/loader.go b/hack/benchmark/assets/x1b/loader.go new file mode 100644 index 0000000000..1a7d839bd5 --- /dev/null +++ b/hack/benchmark/assets/x1b/loader.go @@ -0,0 +1,195 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package x1b + +import ( + "os" + "path/filepath" + "syscall" + "unsafe" + + "github.com/vdaas/vald/internal/errors" +) + +const ( + headerSize = 4 +) + +var ( + ErrOutOfBounds = errors.New("out of bounds") + ErrUnsupportedFileType = errors.New("unsupported file type") +) + +type X1b interface { + Load(i int) (interface{}, error) + Dimension() int + Size() int + Close() error +} + +type Bvecs interface { + X1b + LoadUint8(i int) ([]uint8, error) +} + +type Fvecs interface { + X1b + LoadFloat32(i int) ([]float32, error) +} + +type Ivecs interface { + X1b + LoadInt32(i int) ([]int32, error) +} + +type file struct { + mem []byte + dim int + size int + block int +} + +type bvecs struct { + *file +} +type fvecs struct { + *file +} +type ivecs struct { + *file +} + +func open(fname string, elementSize int) (f *file, err error) { + fp, err := os.Open(fname) + if err != nil { + return nil, err + } + defer func() { + err = fp.Close() + }() + + fi, err := fp.Stat() + if err != nil { + return nil, err + } + + mem, err := syscall.Mmap(int(fp.Fd()), 0, int(fi.Size()), syscall.PROT_READ, syscall.MAP_SHARED) + if err != nil { + return nil, err + } + + dim := int(*(*int32)(unsafe.Pointer(&mem[0]))) + block := headerSize + dim * elementSize + return &file{ + mem: mem, + dim: dim, + size: len(mem) / block, + block: block, + }, nil +} + +func (f *file) Close() error { + return syscall.Munmap(f.mem) +} + +func (f *file) load(i int) ([]byte, error) { + if i >= f.size { + return nil, ErrOutOfBounds + } + + return f.mem[i*f.block+headerSize:(i+1)*f.block], nil +} + +func (f *file) Dimension() int { + return f.dim +} + +func (f *file) Size() int { + return f.size +} + +func (bv *bvecs) LoadUint8(i int) ([]uint8, error) { + buf, err := bv.load(i) + if err != nil { + return nil, err + } + return ((*[1 << 26]uint8)(unsafe.Pointer(&buf[0])))[:bv.dim:bv.dim], nil +} + +func (bv *bvecs) Load(i int) (interface{}, error) { + return bv.LoadUint8(i) +} + +func (fv *fvecs) LoadFloat32(i int) ([]float32, error) { + buf, err := fv.load(i) + if err != nil { + return nil, err + } + return ((*[1 << 26]float32)(unsafe.Pointer(&buf[0])))[:fv.dim:fv.dim], nil +} + +func (fv *fvecs) Load(i int) (interface{}, error) { + return fv.LoadFloat32(i) +} + +func (iv *ivecs) LoadInt32(i int) ([]int32, error) { + buf, err := iv.load(i) + if err != nil { + return nil, err + } + return ((*[1 << 26]int32)(unsafe.Pointer(&buf[0])))[:iv.dim:iv.dim], nil +} + +func (iv *ivecs) Load(i int) (interface{}, error) { + return iv.LoadInt32(i) +} + +func NewBVecs(fname string) (Bvecs, error) { + f, err := open(fname, 1) + if err != nil { + return nil, err + } + return &bvecs{f}, nil +} + +func NewFVecs(fname string) (Fvecs, error) { + f, err := open(fname, 4) + if err != nil { + return nil, err + } + return &fvecs{f}, nil +} + +func NewIVecs(fname string) (Ivecs, error) { + f, err := open(fname, 4) + if err != nil { + return nil, err + } + return &ivecs{f}, nil +} + +func Open(fname string) (X1b, error) { + switch filepath.Ext(fname) { + case ".bvecs": + return NewBVecs(fname) + case ".fvecs": + return NewFVecs(fname) + case ".ivecs": + return NewIVecs(fname) + default: + return nil, ErrUnsupportedFileType + } +} \ No newline at end of file diff --git a/hack/benchmark/assets/x1b/loader_test_bench.go b/hack/benchmark/assets/x1b/loader_test_bench.go new file mode 100644 index 0000000000..17f5ebf21e --- /dev/null +++ b/hack/benchmark/assets/x1b/loader_test_bench.go @@ -0,0 +1,135 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package x1b + +import ( + "testing" +) + +const ( + bvecsFile = "../large/sift1b/bigann_base.bvecs" + fvecsFile = "../large/sift1b/gnd/dis_1000M.fvecs" + ivecsFile = "../large/sift1b/gnd/idx_1000M.ivecs" +) + +func BenchmarkBVecs(b *testing.B) { + bv, err := NewBVecs(bvecsFile) + defer func() { + if err := bv.Close(); err != nil { + b.Fatal(err) + } + }() + if err != nil { + b.Fatal(err) + } + + i := 0 + b.Run("", func(bb *testing.B) { + bb.ReportAllocs() + bb.ResetTimer() + for n := 0; n < b.N; n++ { + v, err := bv.Load(i) + switch err { + case nil: + i++ + continue + case ErrOutOfBounds: + if err := bv.Close(); err != nil { + bb.Fatal(err) + } + bv, err = NewBVecs(bvecsFile) + i = 0 + } + if err != nil { + bb.Fatal(err) + } + bb.Log(v) + } + }) +} + +func BenchmarkFVecs(b *testing.B) { + fv, err := NewFVecs(fvecsFile) + defer func() { + if err := fv.Close(); err != nil { + b.Fatal(err) + } + }() + if err != nil { + b.Fatal(err) + } + + i := 0 + b.Run("", func(bb *testing.B) { + bb.ReportAllocs() + bb.ResetTimer() + for n := 0; n < b.N; n++ { + v, err := fv.Load(i) + switch err { + case nil: + i++ + continue + case ErrOutOfBounds: + if err := fv.Close(); err != nil { + bb.Fatal(err) + } + fv, err = NewFVecs(fvecsFile) + i = 0 + } + if err != nil { + bb.Fatal(err) + } + bb.Log(v) + } + }) +} + +func BenchmarkIVecs(b *testing.B) { + iv, err := NewIVecs(ivecsFile) + defer func() { + if err := iv.Close(); err != nil { + b.Fatal(err) + } + }() + + if err != nil { + b.Fatal(err) + } + + i := 0 + b.Run("", func(bb *testing.B) { + bb.ReportAllocs() + bb.ResetTimer() + for n := 0; n < b.N; n++ { + v, err := iv.Load(i) + switch err { + case nil: + i++ + continue + case ErrOutOfBounds: + if err := iv.Close(); err != nil { + bb.Fatal(err) + } + iv, err = NewIVecs(ivecsFile) + i = 0 + } + if err != nil { + bb.Fatal(err) + } + bb.Log(v) + } + }) +} \ No newline at end of file diff --git a/pkg/tools/cli/loadtest/assets/dataset.go b/pkg/tools/cli/loadtest/assets/dataset.go index 5319dc532a..98df8a88d3 100644 --- a/pkg/tools/cli/loadtest/assets/dataset.go +++ b/pkg/tools/cli/loadtest/assets/dataset.go @@ -16,27 +16,23 @@ package assets import ( - "fmt" - "math/rand" "os" "path/filepath" - "strconv" "strings" - "sync" - "github.com/vdaas/vald/internal/log" + "github.com/vdaas/vald/hack/benchmark/assets/x1b" +) + +var ( + ErrOutOfBounds = x1b.ErrOutOfBounds ) // Dataset is representation of train and test dataset. type Dataset interface { - Train() [][]float32 - TrainAsFloat64() [][]float64 - Query() [][]float32 - QueryAsFloat64() [][]float64 - Distances() [][]float32 - DistancesAsFloat64() [][]float64 - Neighbors() [][]int - IDs() []string + Train(i int) (interface{}, error) + Query(i int) (interface{}, error) + Distance(i int) ([]float32, error) + Neighbor(i int) ([]int, error) Name() string Dimension() int DistanceType() string @@ -44,401 +40,10 @@ type Dataset interface { } type dataset struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string -} - -var ( - data = map[string]func() (Dataset, error){ - "fashion-mnist": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "fashion-mnist-784-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "fashion-mnist", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, nil - }, - "mnist": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "mnist-784-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "mnist", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, err - }, - "glove-25": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-25-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-25", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "glove-50": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-50-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-50", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "glove-100": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-100-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-100", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "glove-200": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-200-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-200", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "nytimes": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "nytimes-256-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "nytimes", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "sift": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "sift-128-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "sift", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, nil - }, - "gist": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "gist-960-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "gist", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, nil - }, - "kosarak": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "/kosarak-jaccard.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "kosarak", - dimension: d.Dimension(), - distanceType: "jaccard", - objectType: "float", - }, nil - }, - } -) - -func identity(dim int) func() (Dataset, error) { - return func() (Dataset, error) { - ids := CreateSerialIDs(dim * 1000) - train := make([][]float32, dim) - for i := range train { - train[i] = make([]float32, dim) - train[i][i] = 1 - } - return &dataset{ - train: train, - query: train, - ids: ids, - name: fmt.Sprintf("identity-%d", dim), - dimension: dim, - distanceType: "l2", - objectType: "float", - }, nil - } -} - -func random(dim, size int) func() (Dataset, error) { - return func() (Dataset, error) { - ids := CreateRandomIDs(size) - train := make([][]float32, size) - query := make([][]float32, size) - for i := range train { - train[i] = make([]float32, dim) - query[i] = make([]float32, dim) - for j := range train[i] { - train[i][j] = rand.Float32() - query[i][j] = rand.Float32() - } - } - return &dataset{ - train: train, - query: query, - ids: ids, - name: fmt.Sprintf("random-%d-%d", dim, size), - dimension: dim, - distanceType: "l2", - objectType: "float", - }, nil - } -} - -func gaussian(dim, size int, mean, stdDev float64) func() (Dataset, error) { - return func() (Dataset, error) { - ids := CreateRandomIDs(size) - train := make([][]float32, size) - query := make([][]float32, size) - for i := range train { - train[i] = make([]float32, dim) - query[i] = make([]float32, dim) - for j := range train[i] { - train[i][j] = float32(rand.NormFloat64()*stdDev + mean) - query[i][j] = float32(rand.NormFloat64()*stdDev + mean) - } - } - return &dataset{ - train: train, - query: query, - ids: ids, - name: fmt.Sprintf("gaussian-%d-%d-%f-%f", dim, size, mean, stdDev), - dimension: dim, - distanceType: "l2", - objectType: "float", - }, nil - } -} - -func datasetDir() (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - root := func(cur string) string { - for { - if strings.HasSuffix(cur, "vald") { - return cur - } else { - cur = filepath.Dir(cur) - } - } - }(wd) - return filepath.Join(root, "hack/benchmark/assets/dataset") + "/", nil -} - -// Data loads specified dataset and returns it. -func Data(name string) func() (Dataset, error) { - log.Debugf("start loading: %s", name) - defer log.Debugf("finish loading: %s", name) - if strings.HasPrefix(name, "identity-") { - l := strings.Split(name, "-") - i, _ := strconv.Atoi(l[1]) - return identity(i) - } - if strings.HasPrefix(name, "random-") { - l := strings.Split(name, "-") - d, _ := strconv.Atoi(l[1]) - s, _ := strconv.Atoi(l[2]) - return random(d, s) - } - if strings.HasPrefix(name, "gaussian-") { - l := strings.Split(name, "-") - d, _ := strconv.Atoi(l[1]) - s, _ := strconv.Atoi(l[2]) - m, _ := strconv.ParseFloat(l[3], 64) - sd, _ := strconv.ParseFloat(l[4], 64) - return gaussian(d, s, m, sd) - } - if d, ok := data[name]; ok { - return d - } - return nil -} - -// Train returns vectors for train. -func (d *dataset) Train() [][]float32 { - return d.train -} - -// TrainAsFloat64 returns casted float64 vectors for train. -func (d *dataset) TrainAsFloat64() [][]float64 { - d.trainOnce.Do(func() { - d.trainAsFloat64 = float32To64(d.train) - }) - return d.trainAsFloat64 -} - -// Query returns vectors for test. -func (d *dataset) Query() [][]float32 { - return d.query -} - -// QueryAsFloat64 returns casted float64 vectors for test. -func (d *dataset) QueryAsFloat64() [][]float64 { - d.queryOnce.Do(func() { - d.queryAsFloat64 = float32To64(d.query) - }) - return d.queryAsFloat64 -} - -// Distances returns distances between queries and answers. -func (d *dataset) Distances() [][]float32 { - return d.distances -} - -// Distances returns casted float64 distances between queries and answers. -func (d *dataset) DistancesAsFloat64() [][]float64 { - d.distancesOnce.Do(func() { - d.distancesAsFloat64 = float32To64(d.distances) - }) - return d.distancesAsFloat64 -} - -// Neighbors returns nearest vectors from queries. -func (d *dataset) Neighbors() [][]int { - return d.neighbors -} - -// IDs returns ids of train vectors. -func (d *dataset) IDs() []string { - return d.ids + name string + dimension int + distanceType string + objectType string } // Name returns dataset name. @@ -461,13 +66,19 @@ func (d *dataset) ObjectType() string { return d.objectType } -func float32To64(x [][]float32) (y [][]float64) { - y = make([][]float64, len(x)) - for i, z := range x { - y[i] = make([]float64, len(z)) - for j, a := range z { - y[i][j] = float64(a) - } +func findDir(path string) (string, error) { + wd, err := os.Getwd() + if err != nil { + return "", err } - return y -} + root := func(cur string) string { + for { + if strings.HasSuffix(cur, "vald") { + return cur + } else { + cur = filepath.Dir(cur) + } + } + }(wd) + return filepath.Join(root, path) + "/", nil +} \ No newline at end of file diff --git a/pkg/tools/cli/loadtest/assets/dataset_test.go b/pkg/tools/cli/loadtest/assets/dataset_test.go index e081553f7b..d4b2c8cc03 100644 --- a/pkg/tools/cli/loadtest/assets/dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/dataset_test.go @@ -225,7 +225,7 @@ func Test_datasetDir(t *testing.T) { test.checkFunc = defaultCheckFunc } - got, err := datasetDir() + got, err := smallDatasetDir() if err := test.checkFunc(test.want, got, err); err != nil { tt.Errorf("error = %v", err) } diff --git a/pkg/tools/cli/loadtest/assets/loader.go b/pkg/tools/cli/loadtest/assets/hdf5_loader.go similarity index 68% rename from pkg/tools/cli/loadtest/assets/loader.go rename to pkg/tools/cli/loadtest/assets/hdf5_loader.go index 916f5b5d70..57b84aa8ae 100644 --- a/pkg/tools/cli/loadtest/assets/loader.go +++ b/pkg/tools/cli/loadtest/assets/hdf5_loader.go @@ -16,9 +16,6 @@ package assets import ( - "strconv" - - "github.com/kpango/fuid" "github.com/vdaas/vald/internal/errors" "gonum.org/v1/hdf5" ) @@ -118,67 +115,4 @@ func Load(path string) (train, test, distances [][]float32, neighbors [][]int, d } return train, test, distances, neighbors, dim, nil -} - -// CreateRandomIDs generates random string IDs. -func CreateRandomIDs(n int) (ids []string) { - ids = make([]string, 0, n) - for i := 0; i < n; i++ { - ids = append(ids, fuid.String()) - } - return ids -} - -// CreateRandomIDsWithLength generates random string IDs that have specified length. -func CreateRandomIDsWithLength(n, l int) (ids []string) { - ids = make([]string, 0, n) - for i := 0; i < n; i++ { - id := fuid.String() - for len(id) < l { - id = id + fuid.String() - } - ids = append(ids, id[:l]) - } - return ids -} - -// CreateSerialIDs generates serial number IDs. -func CreateSerialIDs(n int) []string { - ids := make([]string, 0, n) - for i := 0; i < n; i++ { - ids = append(ids, strconv.Itoa(i)) - } - return ids -} - -// LoadDataWithRandomIDs returns approximate nearest neighbor benchmark dataset with random IDs. -func LoadDataWithRandomIDs(path string) (Dataset, error) { - train, test, distances, neighbors, dim, err := Load(path) - if err != nil { - return nil, err - } - return &dataset{ - train: train, - query: test, - distances: distances, - neighbors: neighbors, - ids: CreateRandomIDs(len(train)), - dimension: dim, - }, nil -} - -// LoadDataWithSerialIDs returns approximate nearest neighbor benchmark dataset with serial IDs. -func LoadDataWithSerialIDs(path string) (Dataset, error) { - train, test, distances, neighbors, dim, err := Load(path) - if err != nil { - return nil, err - } - return &dataset{ - train: train, - query: test, - distances: distances, - neighbors: neighbors, - ids: CreateSerialIDs(len(train)), - dimension: dim, - }, nil -} +} \ No newline at end of file diff --git a/pkg/tools/cli/loadtest/assets/loader_test.go b/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go similarity index 100% rename from pkg/tools/cli/loadtest/assets/loader_test.go rename to pkg/tools/cli/loadtest/assets/hdf5_loader_test.go diff --git a/pkg/tools/cli/loadtest/assets/large_dataset.go b/pkg/tools/cli/loadtest/assets/large_dataset.go new file mode 100644 index 0000000000..80a7a786cd --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/large_dataset.go @@ -0,0 +1,124 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "path/filepath" + + "github.com/vdaas/vald/hack/benchmark/assets/x1b" + "github.com/vdaas/vald/internal/errors" +) + +type largeDataset struct { + *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs +} + +func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFileName, name, distanceType, objectType string) func() (Dataset, error) { + return func() (Dataset, error) { + dir, err := findDir("hack/benchmark/assets/dataset/large") + if err != nil { + return nil, err + } + train, err := x1b.Open(filepath.Join(dir, trainFileName)) + if err != nil { + return nil, err + } + query, err := x1b.Open(filepath.Join(dir, queryFileName)) + if err != nil { + return nil, err + } + tdim := train.Dimension() + qdim := query.Dimension() + if tdim != qdim { + return nil, errors.New("dimension must be same train and query.") + } + iv, err := x1b.NewIVecs(filepath.Join(dir, groundTruthFileName)) + if err != nil { + return nil, er + } + groundTruth := make([][]int, 0, iv.Size()) + for i := 0; ; i++ { + gt32, err := iv.LoadInt32(i) + if err != nil { + if err == ErrOutOfBounds { + break + } + } + gt := make([]int, 0, len(gt32)) + for _, v := range gt32{ + gt = append(gt, int(v)) + } + groundTruth = append(groundTruth, gt) + } + + distances, err := x1b.NewFVecs(filepath.Join(dir, distanceFileName)) + if err != nil { + return nil, err + } + return &largeDataset{ + dataset: &dataset{ + name: name, + dimension: tdim, + distanceType: distanceType, + objectType: objectType, + }, + train: train, + query: query, + groundTruth: groundTruth, + distances: distances, + }, nil + + } +} + +func (d *largeDataset) Train(i int) (interface{}, error) { + return d.train.Load(i) +} + +func (d *largeDataset) Query(i int) (interface{}, error) { + return d.query.Load(i) +} + +func (d *largeDataset) Distance(i int) ([]float32, error) { + return d.distances.LoadFloat32(i) +} + +func (d *largeDataset) Neighbor(i int) ([]int, error) { + if i >= len(d.groundTruth) { + return nil, ErrOutOfBounds + } + return d.groundTruth[i], nil +} + +func (d *largeDataset) Dimension() int { + return d.dimension +} + +func (d *largeDataset) DistanceType() string { + return d.distanceType +} + +func (d *largeDataset) ObjectType() string { + return d.objectType +} + +func (d *largeDataset) Name() string { + return d.name +} \ No newline at end of file diff --git a/pkg/tools/cli/loadtest/assets/small_dataset.go b/pkg/tools/cli/loadtest/assets/small_dataset.go new file mode 100644 index 0000000000..447d977e39 --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/small_dataset.go @@ -0,0 +1,211 @@ +package assets + +import ( + "fmt" + "math/rand" + "path/filepath" + "strconv" + "strings" + + "github.com/vdaas/vald/internal/log" +) + +type smallDataset struct { + *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int +} + +func loadSmallData(fileName, datasetName, distanceType, objectType string) func() (Dataset, error) { + return func() (Dataset, error) { + dir, err := findDir("hack/benchmark/assets/dataset") + if err != nil { + return nil, err + } + t, q, d, n, dim, err := Load(filepath.Join(dir, fileName)) + if err != nil { + return nil, err + } + + return &smallDataset{ + dataset: &dataset { + name: datasetName, + dimension: dim, + distanceType: distanceType, + objectType: objectType, + }, + train: t, + query: q, + distances: d, + neighbors: n, + }, nil + } +} + +func identity(dim int) func() (Dataset, error) { + return func() (Dataset, error) { + train := make([][]float32, dim) + for i := range train { + train[i] = make([]float32, dim) + train[i][i] = 1 + } + return &smallDataset{ + dataset: &dataset{ + name: fmt.Sprintf("identity-%d", dim), + dimension: dim, + distanceType: "l2", + objectType: "float", + }, + train: train, + query: train, + }, nil + } +} + +func random(dim, size int) func() (Dataset, error) { + return func() (Dataset, error) { + train := make([][]float32, size) + query := make([][]float32, size) + for i := range train { + train[i] = make([]float32, dim) + query[i] = make([]float32, dim) + for j := range train[i] { + train[i][j] = rand.Float32() + query[i][j] = rand.Float32() + } + } + return &smallDataset{ + dataset: &dataset{ + name: fmt.Sprintf("random-%d-%d", dim, size), + dimension: dim, + distanceType: "l2", + objectType: "float", + }, + train: train, + query: query, + }, nil + } +} + +func gaussian(dim, size int, mean, stdDev float64) func() (Dataset, error) { + return func() (Dataset, error) { + train := make([][]float32, size) + query := make([][]float32, size) + for i := range train { + train[i] = make([]float32, dim) + query[i] = make([]float32, dim) + for j := range train[i] { + train[i][j] = float32(rand.NormFloat64()*stdDev + mean) + query[i][j] = float32(rand.NormFloat64()*stdDev + mean) + } + } + return &smallDataset{ + dataset: &dataset{ + name: fmt.Sprintf("gaussian-%d-%d-%f-%f", dim, size, mean, stdDev), + dimension: dim, + distanceType: "l2", + objectType: "float", + }, + train: train, + query: query, + }, nil + } +} + +// Data loads specified dataset and returns it. +func Data(name string) func() (Dataset, error) { + log.Debugf("start loading: %s", name) + defer log.Debugf("finish loading: %s", name) + if strings.HasPrefix(name, "identity-") { + l := strings.Split(name, "-") + i, _ := strconv.Atoi(l[1]) + return identity(i) + } + if strings.HasPrefix(name, "random-") { + l := strings.Split(name, "-") + d, _ := strconv.Atoi(l[1]) + s, _ := strconv.Atoi(l[2]) + return random(d, s) + } + if strings.HasPrefix(name, "gaussian-") { + l := strings.Split(name, "-") + d, _ := strconv.Atoi(l[1]) + s, _ := strconv.Atoi(l[2]) + m, _ := strconv.ParseFloat(l[3], 64) + sd, _ := strconv.ParseFloat(l[4], 64) + return gaussian(d, s, m, sd) + } + switch name { + case "fashion-mnist": + return loadSmallData("fashion-mnist-784-euclidean.hdf5", name, "l2", "float") + case "mnist": + return loadSmallData("mnist-784-euclidean.hdf5", name, "l2", "float") + case "glove-25": + return loadSmallData("glove-25-angular.hdf5", name, "cosine", "float") + case "glove-50": + return loadSmallData("glove-50-angular.hdf5", name, "cosine", "float") + case "glove-100": + return loadSmallData("glove-100-angular.hdf5", name, "cosine", "float") + case "glove-200": + return loadSmallData("glove-200-angular.hdf5", name, "cosine", "float") + case "nytimes": + return loadSmallData("nytimes-256-angular.hdf5", name, "cosine", "float") + case "sift": + return loadSmallData("sift-128-euclidean.hdf5", name, "l2", "float") + case "gist": + return loadSmallData("gist-960-euclidean.hdf5", name, "l2", "float") + case "kosarak": + return loadSmallData("kosarak-jaccard.hdf5", name, "jaccard", "float") + case "sift1b": + return loadLargeData("bigann_base.bvecs", "bigann_query.bvecs", "gnd/idx_1000M.ivecs", "gnd/dis_1000M.fvecs", name, "l2", "uint8") + case "deep1b": + return loadLargeData("deep1B_base.fvecs", "deep1B_query.fvecs", "deep1B_groundtruth.ivecs", "", name, "l2", "float") + } + return nil +} + +// Train returns vectors for train. +func (s *smallDataset) Train(i int) (interface{}, error) { + if i >= len(s.train) { + return nil, ErrOutOfBounds + } + return s.train[i], nil +} + +// Query returns vectors for test. +func (s *smallDataset) Query(i int) (interface{}, error) { + if i >= len(s.query) { + return nil, ErrOutOfBounds + } + return s.query[i], nil +} + +// Distance returns distances between queries and answers. +func (s *smallDataset) Distance(i int) ([]float32, error) { + if i >= len(s.distances) { + return nil, ErrOutOfBounds + } + return s.distances[i], nil +} + +// Neighbors returns nearest vectors from queries. +func (s *smallDataset) Neighbor(i int) ([]int, error) { + if i >= len(s.neighbors) { + return nil, ErrOutOfBounds + } + return s.neighbors[i], nil +} + +func float32To64(x [][]float32) (y [][]float64) { + y = make([][]float64, len(x)) + for i, z := range x { + y[i] = make([]float64, len(z)) + for j, a := range z { + y[i][j] = float64(a) + } + } + return y +} + From db838a3ceaf4d04d2a7e6447434cf368cc892f2f Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Fri, 7 Aug 2020 16:43:37 +0900 Subject: [PATCH 02/15] fix dataset to datasets Signed-off-by: Kosuke Morimoto --- Makefile.d/bench.mk | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile.d/bench.mk b/Makefile.d/bench.mk index f980c19ab0..5a2838b378 100644 --- a/Makefile.d/bench.mk +++ b/Makefile.d/bench.mk @@ -63,23 +63,23 @@ bench/datasets/md5dir/print: bench/datasets/hdf5dir/print: @echo $(BENCH_DATASET_HDF5_DIR) -.PHONY: bench/dataset/large -## fetch large dataset for benchmark -bench/dataset/large: \ - bench/dataset/large/sift1b \ - bench/dataset/large/deep1b +.PHONY: bench/datasets/large +## fetch large datasets for benchmark +bench/datasets/large: \ + bench/datasets/large/sift1b \ + bench/datasets/large/deep1b -.PHONY: bench/dataset/large/sift1b +.PHONY: bench/datasets/large/sift1b ## fetch sift1b dataset for benchmark -bench/dataset/large/sift1b: \ +bench/datasets/large/sift1b: \ $(SIFT1B_BASE_FILE) \ $(SIFT1B_LEARN_FILE) \ $(SIFT1B_QUERY_FILE) \ $(SIFT1B_GROUNDTRUTH_DIR) -.PHONY: bench/dataset/large/deep1b +.PHONY: bench/datasets/large/deep1b ## fetch deep1b dataset for benchmark -bench/dataset/large/deep1b: \ +bench/datasets/large/deep1b: \ $(DEEP1B_BASE_FILE) \ $(DEEP1B_LEARN_FILE) \ $(DEEP1B_QUERY_FILE) \ From 7a974a1a8cfcda46206f5740b3f15dd509a8408f Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Thu, 20 Aug 2020 16:09:50 +0900 Subject: [PATCH 03/15] implement billion scale loader Signed-off-by: Kosuke Morimoto --- hack/benchmark/assets/x1b/loader.go | 8 +- .../benchmark/assets/x1b/loader_test_bench.go | 2 +- .../core/benchmark/strategy/insert.go | 14 ++- .../core/benchmark/strategy/insert_commit.go | 14 ++- .../core/benchmark/strategy/search.go | 14 ++- .../benchmark/core/benchmark/strategy/util.go | 40 +++++++-- .../benchmark/internal/e2e/strategy/insert.go | 20 +++-- .../benchmark/internal/e2e/strategy/remove.go | 9 +- .../benchmark/internal/e2e/strategy/search.go | 22 +++-- .../internal/e2e/strategy/stream_insert.go | 11 ++- .../internal/e2e/strategy/stream_remove.go | 5 +- .../internal/e2e/strategy/stream_search.go | 8 +- pkg/tools/cli/loadtest/assets/dataset.go | 61 ++++++++++++- pkg/tools/cli/loadtest/assets/hdf5_loader.go | 2 +- .../cli/loadtest/assets/large_dataset.go | 34 ++++++-- .../cli/loadtest/assets/small_dataset.go | 86 ++++--------------- pkg/tools/cli/loadtest/service/insert.go | 13 +-- pkg/tools/cli/loadtest/service/search.go | 9 +- 18 files changed, 235 insertions(+), 137 deletions(-) diff --git a/hack/benchmark/assets/x1b/loader.go b/hack/benchmark/assets/x1b/loader.go index 1a7d839bd5..5a10040db2 100644 --- a/hack/benchmark/assets/x1b/loader.go +++ b/hack/benchmark/assets/x1b/loader.go @@ -29,7 +29,7 @@ const ( ) var ( - ErrOutOfBounds = errors.New("out of bounds") + ErrOutOfBounds = errors.New("out of bounds") ErrUnsupportedFileType = errors.New("unsupported file type") ) @@ -92,7 +92,7 @@ func open(fname string, elementSize int) (f *file, err error) { } dim := int(*(*int32)(unsafe.Pointer(&mem[0]))) - block := headerSize + dim * elementSize + block := headerSize + dim*elementSize return &file{ mem: mem, dim: dim, @@ -110,7 +110,7 @@ func (f *file) load(i int) ([]byte, error) { return nil, ErrOutOfBounds } - return f.mem[i*f.block+headerSize:(i+1)*f.block], nil + return f.mem[i*f.block+headerSize : (i+1)*f.block], nil } func (f *file) Dimension() int { @@ -192,4 +192,4 @@ func Open(fname string) (X1b, error) { default: return nil, ErrUnsupportedFileType } -} \ No newline at end of file +} diff --git a/hack/benchmark/assets/x1b/loader_test_bench.go b/hack/benchmark/assets/x1b/loader_test_bench.go index 17f5ebf21e..d798aeab67 100644 --- a/hack/benchmark/assets/x1b/loader_test_bench.go +++ b/hack/benchmark/assets/x1b/loader_test_bench.go @@ -132,4 +132,4 @@ func BenchmarkIVecs(b *testing.B) { bb.Log(v) } }) -} \ No newline at end of file +} diff --git a/hack/benchmark/core/benchmark/strategy/insert.go b/hack/benchmark/core/benchmark/strategy/insert.go index 2a3c8e504e..f68a4d58af 100644 --- a/hack/benchmark/core/benchmark/strategy/insert.go +++ b/hack/benchmark/core/benchmark/strategy/insert.go @@ -32,14 +32,20 @@ func NewInsert(opts ...StrategyOption) benchmark.Strategy { WithPropName("Insert"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.Train() - return c.Insert(train[int(atomic.LoadUint64(cnt))%len(train)]) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + return c.Insert(v.([]float32)) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.TrainAsFloat64() - return c.Insert(train[int(atomic.LoadUint64(cnt))%len(train)]) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + return c.Insert(float32To64(v.([]float32))) }, ), }, opts...)...) diff --git a/hack/benchmark/core/benchmark/strategy/insert_commit.go b/hack/benchmark/core/benchmark/strategy/insert_commit.go index 9a4efa11e0..ec55e5296e 100644 --- a/hack/benchmark/core/benchmark/strategy/insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/insert_commit.go @@ -32,14 +32,20 @@ func NewInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Strategy WithPropName("InsertCommit"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.Train() - return c.InsertCommit(train[int(atomic.LoadUint64(cnt))%len(train)], poolSize) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + return c.InsertCommit(v.([]float32), poolSize) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.TrainAsFloat64() - return c.InsertCommit(train[int(atomic.LoadUint64(cnt))%len(train)], poolSize) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + return c.InsertCommit(float32To64(v.([]float32)), poolSize) }, ), }, opts...)...) diff --git a/hack/benchmark/core/benchmark/strategy/search.go b/hack/benchmark/core/benchmark/strategy/search.go index 897c3f4ed6..96cc034fde 100644 --- a/hack/benchmark/core/benchmark/strategy/search.go +++ b/hack/benchmark/core/benchmark/strategy/search.go @@ -37,8 +37,11 @@ func NewSearch(size int, epsilon, radius float32, opts ...StrategyOption) benchm ), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - query := dataset.Query() - return c.Search(query[int(atomic.LoadUint64(cnt))%len(query)], size, epsilon, radius) + v, err := dataset.Query(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + return c.Search(v.([]float32), size, epsilon, radius) }, ), WithPreProp64( @@ -48,8 +51,11 @@ func NewSearch(size int, epsilon, radius float32, opts ...StrategyOption) benchm ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - query := dataset.QueryAsFloat64() - return c.Search(query[int(atomic.LoadUint64(cnt))%len(query)], size, epsilon, radius) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + return c.Search(float32To64(v.([]float32)), size, epsilon, radius) }, ), }, opts...)...) diff --git a/hack/benchmark/core/benchmark/strategy/util.go b/hack/benchmark/core/benchmark/strategy/util.go index c64d9aeace..a67c8a80ad 100644 --- a/hack/benchmark/core/benchmark/strategy/util.go +++ b/hack/benchmark/core/benchmark/strategy/util.go @@ -43,10 +43,20 @@ func wrapErrors(errs []error) (wrapped error) { } func insertAndCreateIndex32(ctx context.Context, c core.Core32, dataset assets.Dataset) (ids []uint, err error) { - train := dataset.Train() - ids = make([]uint, 0, len(train)*bulkInsertCnt) + ids = make([]uint, 0, dataset.TrainSize()*bulkInsertCnt) + n := 0 for i := 0; i < bulkInsertCnt; i++ { + train := make([][]float32, 0, dataset.TrainSize()/bulkInsertCnt) + for j := 0; j < len(train); j++ { + v, err := dataset.Train(n) + if err != nil { + n = 0 + break + } + train = append(train, v.([]float32)) + n++ + } inserted, errs := c.BulkInsert(train) err = wrapErrors(errs) if err != nil { @@ -55,7 +65,7 @@ func insertAndCreateIndex32(ctx context.Context, c core.Core32, dataset assets.D ids = append(ids, inserted...) } - err = c.CreateIndex(uint32((len(train) * bulkInsertCnt) / 100)) + err = c.CreateIndex(uint32((dataset.TrainSize() * bulkInsertCnt) / 100)) if err != nil { return nil, err } @@ -63,10 +73,20 @@ func insertAndCreateIndex32(ctx context.Context, c core.Core32, dataset assets.D } func insertAndCreateIndex64(ctx context.Context, c core.Core64, dataset assets.Dataset) (ids []uint, err error) { - train := dataset.TrainAsFloat64() - ids = make([]uint, 0, len(train)*bulkInsertCnt) + ids = make([]uint, 0, dataset.TrainSize()*bulkInsertCnt) + n := 0 for i := 0; i < bulkInsertCnt; i++ { + train := make([][]float64, 0, dataset.TrainSize()/bulkInsertCnt) + for j := 0; j < len(train); j++ { + v, err := dataset.Train(n) + if err != nil { + n = 0 + break + } + train = append(train, float32To64(v.([]float32))) + n++ + } inserted, errs := c.BulkInsert(train) err = wrapErrors(errs) if err != nil { @@ -75,9 +95,17 @@ func insertAndCreateIndex64(ctx context.Context, c core.Core64, dataset assets.D ids = append(ids, inserted...) } - err = c.CreateIndex(uint32((len(train) * bulkInsertCnt) / 100)) + err = c.CreateIndex(uint32((dataset.TrainSize() * bulkInsertCnt) / 100)) if err != nil { return nil, err } return } + +func float32To64(x []float32) (y []float64) { + y = make([]float64, len(x)) + for i, a := range x { + y[i] = float64(a) + } + return y +} diff --git a/hack/benchmark/internal/e2e/strategy/insert.go b/hack/benchmark/internal/e2e/strategy/insert.go index e83cf432a4..c655e9aa21 100644 --- a/hack/benchmark/internal/e2e/strategy/insert.go +++ b/hack/benchmark/internal/e2e/strategy/insert.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -50,14 +51,17 @@ func (isrt *insert) Run(ctx context.Context, b *testing.B, c client.Client, data func (isrt *insert) run(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { cnt := 0 b.Run("Insert", func(bb *testing.B) { - ids, train := dataset.IDs(), dataset.Train() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() bb.StartTimer() for i := 0; i < bb.N; i++ { - isrt.do(ctx, bb, c, ids[cnt%len(ids)], train[cnt%len(train)]) + v, err := dataset.Train(cnt % dataset.TrainSize()) + if err != nil { + cnt = 0 + break + } + isrt.do(ctx, bb, c, fmt.Sprint(cnt), v.([]float32)) cnt++ } bb.StopTimer() @@ -67,8 +71,6 @@ func (isrt *insert) run(ctx context.Context, b *testing.B, c client.Client, data func (isrt *insert) runParallel(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { var cnt int64 b.Run("ParallelInsert", func(bb *testing.B) { - ids, train := dataset.IDs(), dataset.Train() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() @@ -76,7 +78,13 @@ func (isrt *insert) runParallel(ctx context.Context, b *testing.B, c client.Clie bb.RunParallel(func(pb *testing.PB) { for pb.Next() { n := int(atomic.AddInt64(&cnt, 1)) - 1 - isrt.do(ctx, bb, c, ids[n%len(ids)], train[n%len(train)]) + v, err := dataset.Train(n % dataset.TrainSize()) + if err != nil { + cnt = 0 + break + } + + isrt.do(ctx, bb, c, fmt.Sprint(cnt), v.([]float32)) } }) bb.StopTimer() diff --git a/hack/benchmark/internal/e2e/strategy/remove.go b/hack/benchmark/internal/e2e/strategy/remove.go index e9afd16dbe..7661b764be 100644 --- a/hack/benchmark/internal/e2e/strategy/remove.go +++ b/hack/benchmark/internal/e2e/strategy/remove.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -50,14 +51,12 @@ func (r *remove) Run(ctx context.Context, b *testing.B, c client.Client, dataset func (r *remove) run(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { cnt := 0 b.Run("Remove", func(bb *testing.B) { - ids := dataset.IDs() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() bb.StartTimer() for i := 0; i < bb.N; i++ { - r.do(ctx, bb, c, ids[cnt%len(ids)]) + r.do(ctx, bb, c, fmt.Sprint(cnt)) cnt++ } bb.StopTimer() @@ -67,8 +66,6 @@ func (r *remove) run(ctx context.Context, b *testing.B, c client.Client, dataset func (r *remove) runParallel(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { var cnt int64 b.Run("ParallelRemove", func(bb *testing.B) { - ids := dataset.IDs() - bb.StartTimer() bb.ReportAllocs() bb.ResetTimer() @@ -76,7 +73,7 @@ func (r *remove) runParallel(ctx context.Context, b *testing.B, c client.Client, bb.RunParallel(func(pb *testing.PB) { for pb.Next() { n := int(atomic.AddInt64(&cnt, 1)) - 1 - r.do(ctx, bb, c, ids[n%len(ids)]) + r.do(ctx, bb, c, fmt.Sprint(n)) } }) bb.StopTimer() diff --git a/hack/benchmark/internal/e2e/strategy/search.go b/hack/benchmark/internal/e2e/strategy/search.go index 81e7b90d64..d0ca62a386 100644 --- a/hack/benchmark/internal/e2e/strategy/search.go +++ b/hack/benchmark/internal/e2e/strategy/search.go @@ -49,15 +49,21 @@ func (s *search) Run(ctx context.Context, b *testing.B, c client.Client, dataset } func (s *search) run(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { + cnt := 0 b.Run("Search", func(bb *testing.B) { - queries := dataset.Query() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() bb.StartTimer() for i := 0; i < bb.N; i++ { - s.do(ctx, bb, c, queries[i%len(queries)]) + v, err := dataset.Query(cnt % dataset.QuerySize()) + if err != nil { + cnt = 0 + break + } + + s.do(ctx, bb, c, v.([]float32)) + cnt++ } bb.StopTimer() }) @@ -66,8 +72,6 @@ func (s *search) run(ctx context.Context, b *testing.B, c client.Client, dataset func (s *search) runParallel(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { var cnt int64 b.Run("ParallelSearch", func(bb *testing.B) { - queries := dataset.Query() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() @@ -75,7 +79,13 @@ func (s *search) runParallel(ctx context.Context, b *testing.B, c client.Client, bb.RunParallel(func(pb *testing.PB) { for pb.Next() { n := int(atomic.AddInt64(&cnt, 1)) - 1 - s.do(ctx, b, c, queries[n%len(queries)]) + v, err := dataset.Query(n % dataset.QuerySize()) + if err != nil { + cnt = 0 + break + } + + s.do(ctx, b, c, v.([]float32)) } }) bb.StopTimer() diff --git a/hack/benchmark/internal/e2e/strategy/stream_insert.go b/hack/benchmark/internal/e2e/strategy/stream_insert.go index 77d64d2733..1e1cdfc97a 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_insert.go +++ b/hack/benchmark/internal/e2e/strategy/stream_insert.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -38,8 +39,6 @@ func NewStreamInsert(opts ...StreamInsertOption) e2e.Strategy { } func (sisrt *streamInsert) dataProvider(total *uint32, b *testing.B, dataset assets.Dataset) func() *client.ObjectVector { - ids, trains := dataset.IDs(), dataset.Train() - var cnt uint32 b.StopTimer() @@ -54,9 +53,13 @@ func (sisrt *streamInsert) dataProvider(total *uint32, b *testing.B, dataset ass } total := int(atomic.AddUint32(total, 1)) - 1 + v, err := dataset.Train(total % dataset.TrainSize()) + if err != nil { + return nil + } return &client.ObjectVector{ - Id: ids[total%len(ids)], - Vector: trains[total%len(trains)], + Id: fmt.Sprint(n), + Vector: v.([]float32), } } } diff --git a/hack/benchmark/internal/e2e/strategy/stream_remove.go b/hack/benchmark/internal/e2e/strategy/stream_remove.go index 23207bd599..82eda440c8 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_remove.go +++ b/hack/benchmark/internal/e2e/strategy/stream_remove.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -38,8 +39,6 @@ func NewStreamRemove(opts ...StreamRemoveOption) e2e.Strategy { } func (sr *streamRemove) dataProvider(total *uint32, b *testing.B, dataset assets.Dataset) func() *client.ObjectID { - ids := dataset.IDs() - var cnt uint32 b.StopTimer() @@ -56,7 +55,7 @@ func (sr *streamRemove) dataProvider(total *uint32, b *testing.B, dataset assets total := int(atomic.AddUint32(total, 1)) - 1 return &client.ObjectID{ - Id: ids[total%len(ids)], + Id: fmt.Sprint(total%dataset.TrainSize()), } } } diff --git a/hack/benchmark/internal/e2e/strategy/stream_search.go b/hack/benchmark/internal/e2e/strategy/stream_search.go index f16e6b8db8..30ebabf794 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_search.go +++ b/hack/benchmark/internal/e2e/strategy/stream_search.go @@ -40,8 +40,6 @@ func NewStreamSearch(opts ...StreamSearchOption) e2e.Strategy { } func (s *streamSearch) dataProvider(total *uint32, b *testing.B, dataset assets.Dataset) func() *client.SearchRequest { - queries := dataset.Query() - var cnt uint32 b.StopTimer() @@ -56,8 +54,12 @@ func (s *streamSearch) dataProvider(total *uint32, b *testing.B, dataset assets. } total := int(atomic.AddUint32(total, 1)) - 1 + v, err := dataset.Query(total % dataset.QuerySize()) + if err != nil { + return nil + } return &client.SearchRequest{ - Vector: queries[total%len(queries)], + Vector: v.([]float32), Config: s.cfg, } } diff --git a/pkg/tools/cli/loadtest/assets/dataset.go b/pkg/tools/cli/loadtest/assets/dataset.go index 98df8a88d3..1f13572e68 100644 --- a/pkg/tools/cli/loadtest/assets/dataset.go +++ b/pkg/tools/cli/loadtest/assets/dataset.go @@ -18,8 +18,11 @@ package assets import ( "os" "path/filepath" + "strconv" "strings" + "github.com/vdaas/vald/internal/log" + "github.com/vdaas/vald/hack/benchmark/assets/x1b" ) @@ -30,9 +33,13 @@ var ( // Dataset is representation of train and test dataset. type Dataset interface { Train(i int) (interface{}, error) + TrainSize() int Query(i int) (interface{}, error) + QuerySize() int Distance(i int) ([]float32, error) + DistanceSize() int Neighbor(i int) ([]int, error) + NeighborSize() int Name() string Dimension() int DistanceType() string @@ -81,4 +88,56 @@ func findDir(path string) (string, error) { } }(wd) return filepath.Join(root, path) + "/", nil -} \ No newline at end of file +} + +// Data loads specified dataset and returns it. +func Data(name string) func() (Dataset, error) { + log.Debugf("start loading: %s", name) + defer log.Debugf("finish loading: %s", name) + if strings.HasPrefix(name, "identity-") { + l := strings.Split(name, "-") + i, _ := strconv.Atoi(l[1]) + return identity(i) + } + if strings.HasPrefix(name, "random-") { + l := strings.Split(name, "-") + d, _ := strconv.Atoi(l[1]) + s, _ := strconv.Atoi(l[2]) + return random(d, s) + } + if strings.HasPrefix(name, "gaussian-") { + l := strings.Split(name, "-") + d, _ := strconv.Atoi(l[1]) + s, _ := strconv.Atoi(l[2]) + m, _ := strconv.ParseFloat(l[3], 64) + sd, _ := strconv.ParseFloat(l[4], 64) + return gaussian(d, s, m, sd) + } + switch name { + case "fashion-mnist": + return loadSmallData("fashion-mnist-784-euclidean.hdf5", name, "l2", "float") + case "mnist": + return loadSmallData("mnist-784-euclidean.hdf5", name, "l2", "float") + case "glove-25": + return loadSmallData("glove-25-angular.hdf5", name, "cosine", "float") + case "glove-50": + return loadSmallData("glove-50-angular.hdf5", name, "cosine", "float") + case "glove-100": + return loadSmallData("glove-100-angular.hdf5", name, "cosine", "float") + case "glove-200": + return loadSmallData("glove-200-angular.hdf5", name, "cosine", "float") + case "nytimes": + return loadSmallData("nytimes-256-angular.hdf5", name, "cosine", "float") + case "sift": + return loadSmallData("sift-128-euclidean.hdf5", name, "l2", "float") + case "gist": + return loadSmallData("gist-960-euclidean.hdf5", name, "l2", "float") + case "kosarak": + return loadSmallData("kosarak-jaccard.hdf5", name, "jaccard", "float") + case "sift1b": + return loadLargeData("bigann_base.bvecs", "bigann_query.bvecs", "gnd/idx_1000M.ivecs", "gnd/dis_1000M.fvecs", name, "l2", "uint8") + case "deep1b": + return loadLargeData("deep1B_base.fvecs", "deep1B_query.fvecs", "deep1B_groundtruth.ivecs", "", name, "l2", "float") + } + return nil +} diff --git a/pkg/tools/cli/loadtest/assets/hdf5_loader.go b/pkg/tools/cli/loadtest/assets/hdf5_loader.go index 57b84aa8ae..cd758b608d 100644 --- a/pkg/tools/cli/loadtest/assets/hdf5_loader.go +++ b/pkg/tools/cli/loadtest/assets/hdf5_loader.go @@ -115,4 +115,4 @@ func Load(path string) (train, test, distances [][]float32, neighbors [][]int, d } return train, test, distances, neighbors, dim, nil -} \ No newline at end of file +} diff --git a/pkg/tools/cli/loadtest/assets/large_dataset.go b/pkg/tools/cli/loadtest/assets/large_dataset.go index 80a7a786cd..273d0ecdfd 100644 --- a/pkg/tools/cli/loadtest/assets/large_dataset.go +++ b/pkg/tools/cli/loadtest/assets/large_dataset.go @@ -24,10 +24,10 @@ import ( type largeDataset struct { *dataset - train x1b.X1b - query x1b.X1b + train x1b.X1b + query x1b.X1b groundTruth [][]int - distances x1b.Fvecs + distances x1b.Fvecs } func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFileName, name, distanceType, objectType string) func() (Dataset, error) { @@ -51,7 +51,7 @@ func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFi } iv, err := x1b.NewIVecs(filepath.Join(dir, groundTruthFileName)) if err != nil { - return nil, er + return nil, err } groundTruth := make([][]int, 0, iv.Size()) for i := 0; ; i++ { @@ -62,7 +62,7 @@ func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFi } } gt := make([]int, 0, len(gt32)) - for _, v := range gt32{ + for _, v := range gt32 { gt = append(gt, int(v)) } groundTruth = append(groundTruth, gt) @@ -79,10 +79,10 @@ func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFi distanceType: distanceType, objectType: objectType, }, - train: train, - query: query, + train: train, + query: query, groundTruth: groundTruth, - distances: distances, + distances: distances, }, nil } @@ -92,14 +92,26 @@ func (d *largeDataset) Train(i int) (interface{}, error) { return d.train.Load(i) } +func (d *largeDataset) TrainSize() int { + return d.train.Size() +} + func (d *largeDataset) Query(i int) (interface{}, error) { return d.query.Load(i) } +func (d *largeDataset) QuerySize() int { + return d.query.Size() +} + func (d *largeDataset) Distance(i int) ([]float32, error) { return d.distances.LoadFloat32(i) } +func (d *largeDataset) DistanceSize() int { + return d.distances.Size() +} + func (d *largeDataset) Neighbor(i int) ([]int, error) { if i >= len(d.groundTruth) { return nil, ErrOutOfBounds @@ -107,6 +119,10 @@ func (d *largeDataset) Neighbor(i int) ([]int, error) { return d.groundTruth[i], nil } +func (d *largeDataset) NeighborSize() int { + return len(d.groundTruth) +} + func (d *largeDataset) Dimension() int { return d.dimension } @@ -121,4 +137,4 @@ func (d *largeDataset) ObjectType() string { func (d *largeDataset) Name() string { return d.name -} \ No newline at end of file +} diff --git a/pkg/tools/cli/loadtest/assets/small_dataset.go b/pkg/tools/cli/loadtest/assets/small_dataset.go index 447d977e39..f5a71a5f9b 100644 --- a/pkg/tools/cli/loadtest/assets/small_dataset.go +++ b/pkg/tools/cli/loadtest/assets/small_dataset.go @@ -4,10 +4,6 @@ import ( "fmt" "math/rand" "path/filepath" - "strconv" - "strings" - - "github.com/vdaas/vald/internal/log" ) type smallDataset struct { @@ -30,7 +26,7 @@ func loadSmallData(fileName, datasetName, distanceType, objectType string) func( } return &smallDataset{ - dataset: &dataset { + dataset: &dataset{ name: datasetName, dimension: dim, distanceType: distanceType, @@ -114,58 +110,6 @@ func gaussian(dim, size int, mean, stdDev float64) func() (Dataset, error) { } } -// Data loads specified dataset and returns it. -func Data(name string) func() (Dataset, error) { - log.Debugf("start loading: %s", name) - defer log.Debugf("finish loading: %s", name) - if strings.HasPrefix(name, "identity-") { - l := strings.Split(name, "-") - i, _ := strconv.Atoi(l[1]) - return identity(i) - } - if strings.HasPrefix(name, "random-") { - l := strings.Split(name, "-") - d, _ := strconv.Atoi(l[1]) - s, _ := strconv.Atoi(l[2]) - return random(d, s) - } - if strings.HasPrefix(name, "gaussian-") { - l := strings.Split(name, "-") - d, _ := strconv.Atoi(l[1]) - s, _ := strconv.Atoi(l[2]) - m, _ := strconv.ParseFloat(l[3], 64) - sd, _ := strconv.ParseFloat(l[4], 64) - return gaussian(d, s, m, sd) - } - switch name { - case "fashion-mnist": - return loadSmallData("fashion-mnist-784-euclidean.hdf5", name, "l2", "float") - case "mnist": - return loadSmallData("mnist-784-euclidean.hdf5", name, "l2", "float") - case "glove-25": - return loadSmallData("glove-25-angular.hdf5", name, "cosine", "float") - case "glove-50": - return loadSmallData("glove-50-angular.hdf5", name, "cosine", "float") - case "glove-100": - return loadSmallData("glove-100-angular.hdf5", name, "cosine", "float") - case "glove-200": - return loadSmallData("glove-200-angular.hdf5", name, "cosine", "float") - case "nytimes": - return loadSmallData("nytimes-256-angular.hdf5", name, "cosine", "float") - case "sift": - return loadSmallData("sift-128-euclidean.hdf5", name, "l2", "float") - case "gist": - return loadSmallData("gist-960-euclidean.hdf5", name, "l2", "float") - case "kosarak": - return loadSmallData("kosarak-jaccard.hdf5", name, "jaccard", "float") - case "sift1b": - return loadLargeData("bigann_base.bvecs", "bigann_query.bvecs", "gnd/idx_1000M.ivecs", "gnd/dis_1000M.fvecs", name, "l2", "uint8") - case "deep1b": - return loadLargeData("deep1B_base.fvecs", "deep1B_query.fvecs", "deep1B_groundtruth.ivecs", "", name, "l2", "float") - } - return nil -} - // Train returns vectors for train. func (s *smallDataset) Train(i int) (interface{}, error) { if i >= len(s.train) { @@ -174,6 +118,11 @@ func (s *smallDataset) Train(i int) (interface{}, error) { return s.train[i], nil } +// TrainSize return size of vectors for train. +func (s *smallDataset) TrainSize() int { + return len(s.train) +} + // Query returns vectors for test. func (s *smallDataset) Query(i int) (interface{}, error) { if i >= len(s.query) { @@ -182,6 +131,11 @@ func (s *smallDataset) Query(i int) (interface{}, error) { return s.query[i], nil } +// QuerySize return size of vectors for query. +func (s *smallDataset) QuerySize() int { + return len(s.query) +} + // Distance returns distances between queries and answers. func (s *smallDataset) Distance(i int) ([]float32, error) { if i >= len(s.distances) { @@ -190,6 +144,11 @@ func (s *smallDataset) Distance(i int) ([]float32, error) { return s.distances[i], nil } +// DistanceSize returns size of distances +func (s *smallDataset) DistanceSize() int { + return len(s.distances) +} + // Neighbors returns nearest vectors from queries. func (s *smallDataset) Neighbor(i int) ([]int, error) { if i >= len(s.neighbors) { @@ -198,14 +157,7 @@ func (s *smallDataset) Neighbor(i int) ([]int, error) { return s.neighbors[i], nil } -func float32To64(x [][]float32) (y [][]float64) { - y = make([][]float64, len(x)) - for i, z := range x { - y[i] = make([]float64, len(z)) - for j, a := range z { - y[i][j] = float64(a) - } - } - return y +// NeighborSize returns size of neighbors. +func (s *smallDataset) NeighborSize() int { + return len(s.neighbors) } - diff --git a/pkg/tools/cli/loadtest/service/insert.go b/pkg/tools/cli/loadtest/service/insert.go index f876173533..9bf24d2b1b 100644 --- a/pkg/tools/cli/loadtest/service/insert.go +++ b/pkg/tools/cli/loadtest/service/insert.go @@ -19,6 +19,7 @@ import ( "context" "sync/atomic" + "github.com/kpango/fuid" "github.com/vdaas/vald/apis/grpc/agent/core" "github.com/vdaas/vald/apis/grpc/gateway/vald" "github.com/vdaas/vald/apis/grpc/payload" @@ -44,15 +45,17 @@ func insertRequestProvider(dataset assets.Dataset, batchSize int) (f func() inte } func objectVectorProvider(dataset assets.Dataset) (func() interface{}, int) { - v := dataset.Train() - ids := dataset.IDs() idx := int32(-1) - size := len(v) + size := dataset.TrainSize() return func() (ret interface{}) { if i := int(atomic.AddInt32(&idx, 1)); i < size { + v, err := dataset.Train(i) + if err != nil { + return nil + } ret = &payload.Object_Vector{ - Id: ids[i], - Vector: v[i], + Id: fuid.String(), + Vector: v.([]float32), } } return ret diff --git a/pkg/tools/cli/loadtest/service/search.go b/pkg/tools/cli/loadtest/service/search.go index b23bc5c7f3..a51e0728db 100644 --- a/pkg/tools/cli/loadtest/service/search.go +++ b/pkg/tools/cli/loadtest/service/search.go @@ -29,13 +29,16 @@ import ( ) func searchRequestProvider(dataset assets.Dataset) (func() interface{}, int, error) { - v := dataset.Query() - size := len(v) + size := dataset.QuerySize() idx := int32(-1) return func() (ret interface{}) { if i := int(atomic.AddInt32(&idx, 1)); i < size { + v, err := dataset.Query(i) + if err != nil { + return nil + } ret = &payload.Search_Request{ - Vector: v[i], + Vector: v.([]float32), } } return ret From 530f5a9fceee1505d6ad449cb3c1c75f3aa6ea96 Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Mon, 24 Aug 2020 17:11:29 +0900 Subject: [PATCH 04/15] fix benchmark timer Signed-off-by: Kosuke Morimoto --- .../core/benchmark/strategy/bulk_insert.go | 42 ++++++++++++++++++- .../benchmark/strategy/bulk_insert_commit.go | 38 ++++++++++++++++- .../core/benchmark/strategy/insert.go | 4 ++ .../core/benchmark/strategy/insert_commit.go | 4 ++ .../core/benchmark/strategy/search.go | 4 ++ .../internal/e2e/strategy/stream_remove.go | 2 +- pkg/tools/cli/loadtest/assets/dataset.go | 3 +- 7 files changed, 90 insertions(+), 7 deletions(-) diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert.go b/hack/benchmark/core/benchmark/strategy/bulk_insert.go index 9b0e10a1fb..f6abf5c690 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert.go @@ -26,18 +26,56 @@ import ( "github.com/vdaas/vald/hack/benchmark/internal/core" ) +const ( + maxBulkSize = 100000 +) + func NewBulkInsert(opts ...StrategyOption) benchmark.Strategy { return newStrategy(append([]StrategyOption{ WithPropName("BulkInsert"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsert(dataset.Train()) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float32, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + break + } + v = append(v, arr.([]float32)) + } + b.StartTimer() + defer b.StopTimer() + ids, errs := c.BulkInsert(v) return ids, wrapErrors(errs) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsert(dataset.TrainAsFloat64()) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float64, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + break + } + v = append(v, float32To64(arr.([]float32))) + } + b.StartTimer() + defer b.StopTimer() + ids, errs := c.BulkInsert(v) return ids, wrapErrors(errs) }, ), diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go index df2d9bf33d..e2bf52b76b 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go @@ -31,13 +31,47 @@ func NewBulkInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Stra WithPropName("BulkInsertCommit"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsertCommit(dataset.Train(), poolSize) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float32, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + break + } + v = append(v, arr.([]float32)) + } + b.StartTimer() + defer b.StopTimer() + ids, errs := c.BulkInsertCommit(v, poolSize) return ids, wrapErrors(errs) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsertCommit(dataset.TrainAsFloat64(), poolSize) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float64, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + break + } + v = append(v, float32To64(arr.([]float32))) + } + b.StartTimer() + defer b.StopTimer() + ids, errs := c.BulkInsertCommit(v, poolSize) return ids, wrapErrors(errs) }, ), diff --git a/hack/benchmark/core/benchmark/strategy/insert.go b/hack/benchmark/core/benchmark/strategy/insert.go index f68a4d58af..688d4c68cb 100644 --- a/hack/benchmark/core/benchmark/strategy/insert.go +++ b/hack/benchmark/core/benchmark/strategy/insert.go @@ -36,6 +36,8 @@ func NewInsert(opts ...StrategyOption) benchmark.Strategy { if err != nil { return nil, err } + b.StartTimer() + defer b.StopTimer() return c.Insert(v.([]float32)) }, ), @@ -45,6 +47,8 @@ func NewInsert(opts ...StrategyOption) benchmark.Strategy { if err != nil { return nil, err } + b.StartTimer() + defer b.StopTimer() return c.Insert(float32To64(v.([]float32))) }, ), diff --git a/hack/benchmark/core/benchmark/strategy/insert_commit.go b/hack/benchmark/core/benchmark/strategy/insert_commit.go index ec55e5296e..7b8bb98baa 100644 --- a/hack/benchmark/core/benchmark/strategy/insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/insert_commit.go @@ -36,6 +36,8 @@ func NewInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Strategy if err != nil { return nil, err } + b.StartTimer() + defer b.StopTimer() return c.InsertCommit(v.([]float32), poolSize) }, ), @@ -45,6 +47,8 @@ func NewInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Strategy if err != nil { return nil, err } + b.StartTimer() + defer b.StopTimer() return c.InsertCommit(float32To64(v.([]float32)), poolSize) }, ), diff --git a/hack/benchmark/core/benchmark/strategy/search.go b/hack/benchmark/core/benchmark/strategy/search.go index 96cc034fde..d05d753283 100644 --- a/hack/benchmark/core/benchmark/strategy/search.go +++ b/hack/benchmark/core/benchmark/strategy/search.go @@ -41,6 +41,8 @@ func NewSearch(size int, epsilon, radius float32, opts ...StrategyOption) benchm if err != nil { return nil, err } + b.StartTimer() + defer b.StopTimer() return c.Search(v.([]float32), size, epsilon, radius) }, ), @@ -55,6 +57,8 @@ func NewSearch(size int, epsilon, radius float32, opts ...StrategyOption) benchm if err != nil { return nil, err } + b.StartTimer() + defer b.StopTimer() return c.Search(float32To64(v.([]float32)), size, epsilon, radius) }, ), diff --git a/hack/benchmark/internal/e2e/strategy/stream_remove.go b/hack/benchmark/internal/e2e/strategy/stream_remove.go index 82eda440c8..6143ae4158 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_remove.go +++ b/hack/benchmark/internal/e2e/strategy/stream_remove.go @@ -55,7 +55,7 @@ func (sr *streamRemove) dataProvider(total *uint32, b *testing.B, dataset assets total := int(atomic.AddUint32(total, 1)) - 1 return &client.ObjectID{ - Id: fmt.Sprint(total%dataset.TrainSize()), + Id: fmt.Sprint(total % dataset.TrainSize()), } } } diff --git a/pkg/tools/cli/loadtest/assets/dataset.go b/pkg/tools/cli/loadtest/assets/dataset.go index 1f13572e68..7c06be1a76 100644 --- a/pkg/tools/cli/loadtest/assets/dataset.go +++ b/pkg/tools/cli/loadtest/assets/dataset.go @@ -21,9 +21,8 @@ import ( "strconv" "strings" - "github.com/vdaas/vald/internal/log" - "github.com/vdaas/vald/hack/benchmark/assets/x1b" + "github.com/vdaas/vald/internal/log" ) var ( From ba2c440fb2d91e786465553f24b03dd995308b30 Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Tue, 25 Aug 2020 18:17:40 +0900 Subject: [PATCH 05/15] fix error handling Signed-off-by: Kosuke Morimoto --- hack/benchmark/assets/x1b/loader.go | 2 +- .../benchmark/assets/x1b/loader_test_bench.go | 25 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/hack/benchmark/assets/x1b/loader.go b/hack/benchmark/assets/x1b/loader.go index 5a10040db2..80649ba732 100644 --- a/hack/benchmark/assets/x1b/loader.go +++ b/hack/benchmark/assets/x1b/loader.go @@ -78,7 +78,7 @@ func open(fname string, elementSize int) (f *file, err error) { return nil, err } defer func() { - err = fp.Close() + err = errors.Wrap(err, fp.Close().Error()) }() fi, err := fp.Stat() diff --git a/hack/benchmark/assets/x1b/loader_test_bench.go b/hack/benchmark/assets/x1b/loader_test_bench.go index d798aeab67..3af9895543 100644 --- a/hack/benchmark/assets/x1b/loader_test_bench.go +++ b/hack/benchmark/assets/x1b/loader_test_bench.go @@ -27,17 +27,17 @@ const ( func BenchmarkBVecs(b *testing.B) { bv, err := NewBVecs(bvecsFile) + if err != nil { + b.Fatal(err) + } defer func() { if err := bv.Close(); err != nil { b.Fatal(err) } }() - if err != nil { - b.Fatal(err) - } i := 0 - b.Run("", func(bb *testing.B) { + b.Run(bvecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() for n := 0; n < b.N; n++ { @@ -63,17 +63,17 @@ func BenchmarkBVecs(b *testing.B) { func BenchmarkFVecs(b *testing.B) { fv, err := NewFVecs(fvecsFile) + if err != nil { + b.Fatal(err) + } defer func() { if err := fv.Close(); err != nil { b.Fatal(err) } }() - if err != nil { - b.Fatal(err) - } i := 0 - b.Run("", func(bb *testing.B) { + b.Run(fvecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() for n := 0; n < b.N; n++ { @@ -99,18 +99,17 @@ func BenchmarkFVecs(b *testing.B) { func BenchmarkIVecs(b *testing.B) { iv, err := NewIVecs(ivecsFile) + if err != nil { + b.Fatal(err) + } defer func() { if err := iv.Close(); err != nil { b.Fatal(err) } }() - if err != nil { - b.Fatal(err) - } - i := 0 - b.Run("", func(bb *testing.B) { + b.Run(ivecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() for n := 0; n < b.N; n++ { From c7308f15cca12e2d70ab4c136d879497539c58ad Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Tue, 25 Aug 2020 18:20:06 +0900 Subject: [PATCH 06/15] fix benchmark timers Signed-off-by: Kosuke Morimoto --- hack/benchmark/core/benchmark/strategy/bulk_insert.go | 10 ++++++++-- .../core/benchmark/strategy/bulk_insert_commit.go | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert.go b/hack/benchmark/core/benchmark/strategy/bulk_insert.go index f6abf5c690..cc3c3069ba 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert.go @@ -50,8 +50,11 @@ func NewBulkInsert(opts ...StrategyOption) benchmark.Strategy { } v = append(v, arr.([]float32)) } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() b.StartTimer() - defer b.StopTimer() ids, errs := c.BulkInsert(v) return ids, wrapErrors(errs) }, @@ -73,8 +76,11 @@ func NewBulkInsert(opts ...StrategyOption) benchmark.Strategy { } v = append(v, float32To64(arr.([]float32))) } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() b.StartTimer() - defer b.StopTimer() ids, errs := c.BulkInsert(v) return ids, wrapErrors(errs) }, diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go index e2bf52b76b..a618e664d8 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go @@ -46,8 +46,11 @@ func NewBulkInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Stra } v = append(v, arr.([]float32)) } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() b.StartTimer() - defer b.StopTimer() ids, errs := c.BulkInsertCommit(v, poolSize) return ids, wrapErrors(errs) }, @@ -69,8 +72,11 @@ func NewBulkInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Stra } v = append(v, float32To64(arr.([]float32))) } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() b.StartTimer() - defer b.StopTimer() ids, errs := c.BulkInsertCommit(v, poolSize) return ids, wrapErrors(errs) }, From 38464c553e34c04541310a5513835539eff2873a Mon Sep 17 00:00:00 2001 From: vdaas-ci Date: Thu, 27 Aug 2020 07:14:11 +0000 Subject: [PATCH 07/15] :robot: Update license headers / Format go codes and yaml files Signed-off-by: vdaas-ci --- pkg/tools/cli/loadtest/assets/small_dataset.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/tools/cli/loadtest/assets/small_dataset.go b/pkg/tools/cli/loadtest/assets/small_dataset.go index f5a71a5f9b..72bb9ff659 100644 --- a/pkg/tools/cli/loadtest/assets/small_dataset.go +++ b/pkg/tools/cli/loadtest/assets/small_dataset.go @@ -1,3 +1,18 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// package assets import ( From f43d5589f6b2b2f23d0433203cf378e200514fd2 Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Thu, 27 Aug 2020 16:45:11 +0900 Subject: [PATCH 08/15] add x1b for docker build Signed-off-by: Kosuke Morimoto --- dockers/tools/cli/loadtest/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dockers/tools/cli/loadtest/Dockerfile b/dockers/tools/cli/loadtest/Dockerfile index 4f1b0bf450..4ce4d06c0f 100644 --- a/dockers/tools/cli/loadtest/Dockerfile +++ b/dockers/tools/cli/loadtest/Dockerfile @@ -39,6 +39,9 @@ COPY pkg/${PKG} . WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO}/cmd/${PKG} COPY cmd/${PKG} . +WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO}/hack/benchmark/assets/x1b +COPY hack/benchmark/assets/x1b . + WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO} COPY versions/GO_VERSION . COPY versions/VALD_VERSION . From 2635f81ccb2dea65453534f5054333b074ab031e Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Thu, 27 Aug 2020 17:25:12 +0900 Subject: [PATCH 09/15] regenerate tests Signed-off-by: Kosuke Morimoto --- hack/benchmark/assets/x1b/loader_test.go | 1288 +++++++++++++++++ .../cli/loadtest/assets/large_dataset_test.go | 1250 ++++++++++++++++ .../cli/loadtest/assets/small_dataset_test.go | 1105 ++++++++++++++ 3 files changed, 3643 insertions(+) create mode 100644 hack/benchmark/assets/x1b/loader_test.go create mode 100644 pkg/tools/cli/loadtest/assets/large_dataset_test.go create mode 100644 pkg/tools/cli/loadtest/assets/small_dataset_test.go diff --git a/hack/benchmark/assets/x1b/loader_test.go b/hack/benchmark/assets/x1b/loader_test.go new file mode 100644 index 0000000000..efa72ad779 --- /dev/null +++ b/hack/benchmark/assets/x1b/loader_test.go @@ -0,0 +1,1288 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package x1b + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" +) + +func Test_open(t *testing.T) { + type args struct { + fname string + elementSize int + } + type want struct { + wantF *file + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, *file, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, gotF *file, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(gotF, w.wantF) { + return errors.Errorf("got = %v, want %v", gotF, w.wantF) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + elementSize: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + elementSize: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + gotF, err := open(test.args.fname, test.args.elementSize) + if err := test.checkFunc(test.want, gotF, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_Close(t *testing.T) { + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + err error + } + type test struct { + name string + fields fields + want want + checkFunc func(want, error) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + err := f.Close() + if err := test.checkFunc(test.want, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_load(t *testing.T) { + type args struct { + i int + } + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + want []byte + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []byte, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []byte, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + got, err := f.load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_Dimension(t *testing.T) { + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + got := f.Dimension() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_Size(t *testing.T) { + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + got := f.Size() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_bvecs_LoadUint8(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want []uint8 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []uint8, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []uint8, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + bv := &bvecs{ + file: test.fields.file, + } + + got, err := bv.LoadUint8(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_bvecs_Load(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + bv := &bvecs{ + file: test.fields.file, + } + + got, err := bv.Load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_fvecs_LoadFloat32(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want []float32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []float32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []float32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + fv := &fvecs{ + file: test.fields.file, + } + + got, err := fv.LoadFloat32(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_fvecs_Load(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + fv := &fvecs{ + file: test.fields.file, + } + + got, err := fv.Load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_ivecs_LoadInt32(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want []int32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []int32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []int32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + iv := &ivecs{ + file: test.fields.file, + } + + got, err := iv.LoadInt32(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_ivecs_Load(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + iv := &ivecs{ + file: test.fields.file, + } + + got, err := iv.Load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestNewBVecs(t *testing.T) { + type args struct { + fname string + } + type want struct { + want Bvecs + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, Bvecs, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got Bvecs, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := NewBVecs(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestNewFVecs(t *testing.T) { + type args struct { + fname string + } + type want struct { + want Fvecs + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, Fvecs, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got Fvecs, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := NewFVecs(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestNewIVecs(t *testing.T) { + type args struct { + fname string + } + type want struct { + want Ivecs + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, Ivecs, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got Ivecs, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := NewIVecs(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestOpen(t *testing.T) { + type args struct { + fname string + } + type want struct { + want X1b + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, X1b, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got X1b, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := Open(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} diff --git a/pkg/tools/cli/loadtest/assets/large_dataset_test.go b/pkg/tools/cli/loadtest/assets/large_dataset_test.go new file mode 100644 index 0000000000..b2990875ec --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/large_dataset_test.go @@ -0,0 +1,1250 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/hack/benchmark/assets/x1b" + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" +) + +func Test_loadLargeData(t *testing.T) { + type args struct { + trainFileName string + queryFileName string + groundTruthFileName string + distanceFileName string + name string + distanceType string + objectType string + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + trainFileName: "", + queryFileName: "", + groundTruthFileName: "", + distanceFileName: "", + name: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + trainFileName: "", + queryFileName: "", + groundTruthFileName: "", + distanceFileName: "", + name: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := loadLargeData(test.args.trainFileName, test.args.queryFileName, test.args.groundTruthFileName, test.args.distanceFileName, test.args.name, test.args.distanceType, test.args.objectType) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Train(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Train(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_TrainSize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.TrainSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Query(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Query(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_QuerySize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.QuerySize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Distance(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want []float32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []float32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []float32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Distance(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_DistanceSize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.DistanceSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Neighbor(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want []int + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []int, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []int, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Neighbor(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_NeighborSize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.NeighborSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Dimension(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.Dimension() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_DistanceType(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want string + } + type test struct { + name string + fields fields + want want + checkFunc func(want, string) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got string) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.DistanceType() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_ObjectType(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want string + } + type test struct { + name string + fields fields + want want + checkFunc func(want, string) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got string) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.ObjectType() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Name(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.X1b + query x1b.X1b + groundTruth [][]int + distances x1b.Fvecs + } + type want struct { + want string + } + type test struct { + name string + fields fields + want want + checkFunc func(want, string) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got string) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.Name() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} diff --git a/pkg/tools/cli/loadtest/assets/small_dataset_test.go b/pkg/tools/cli/loadtest/assets/small_dataset_test.go new file mode 100644 index 0000000000..4bae3775da --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/small_dataset_test.go @@ -0,0 +1,1105 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" +) + +func Test_loadSmallData(t *testing.T) { + type args struct { + fileName string + datasetName string + distanceType string + objectType string + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fileName: "", + datasetName: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fileName: "", + datasetName: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := loadSmallData(test.args.fileName, test.args.datasetName, test.args.distanceType, test.args.objectType) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_identity(t *testing.T) { + type args struct { + dim int + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := identity(test.args.dim) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_random(t *testing.T) { + type args struct { + dim int + size int + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dim: 0, + size: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dim: 0, + size: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := random(test.args.dim, test.args.size) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_gaussian(t *testing.T) { + type args struct { + dim int + size int + mean float64 + stdDev float64 + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dim: 0, + size: 0, + mean: 0, + stdDev: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dim: 0, + size: 0, + mean: 0, + stdDev: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := gaussian(test.args.dim, test.args.size, test.args.mean, test.args.stdDev) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Train(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Train(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_TrainSize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.TrainSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Query(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Query(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_QuerySize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.QuerySize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Distance(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want []float32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []float32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []float32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Distance(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_DistanceSize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.DistanceSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Neighbor(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want []int + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []int, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []int, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Neighbor(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_NeighborSize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.NeighborSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} From f64b1cc277741aab020843308247fe6926d3f5fa Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Thu, 27 Aug 2020 17:46:03 +0900 Subject: [PATCH 10/15] regenerate tests Signed-off-by: Kosuke Morimoto --- pkg/tools/cli/loadtest/assets/dataset_test.go | 1601 +---------------- .../cli/loadtest/assets/hdf5_loader_test.go | 400 +--- .../cli/loadtest/assets/small_dataset_test.go | 2 +- 3 files changed, 107 insertions(+), 1896 deletions(-) diff --git a/pkg/tools/cli/loadtest/assets/dataset_test.go b/pkg/tools/cli/loadtest/assets/dataset_test.go index d4b2c8cc03..fb833ff2ff 100644 --- a/pkg/tools/cli/loadtest/assets/dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/dataset_test.go @@ -17,1106 +17,33 @@ package assets import ( "reflect" - "sync" "testing" - "github.com/vdaas/vald/internal/errors" + "github.com/pkg/errors" "go.uber.org/goleak" ) -func Test_identity(t *testing.T) { - type args struct { - dim int - } - type want struct { - want func() (Dataset, error) - } - type test struct { - name string - args args - want want - checkFunc func(want, func() (Dataset, error)) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got func() (Dataset, error)) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := identity(test.args.dim) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_random(t *testing.T) { - type args struct { - dim int - size int - } - type want struct { - want func() (Dataset, error) - } - type test struct { - name string - args args - want want - checkFunc func(want, func() (Dataset, error)) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got func() (Dataset, error)) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - dim: 0, - size: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - dim: 0, - size: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := random(test.args.dim, test.args.size) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_datasetDir(t *testing.T) { - type want struct { - want string - err error - } - type test struct { - name string - want want - checkFunc func(want, string, error) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got string, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := smallDatasetDir() - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestData(t *testing.T) { - type args struct { - name string - } - type want struct { - want func() (Dataset, error) - } - type test struct { - name string - args args - want want - checkFunc func(want, func() (Dataset, error)) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got func() (Dataset, error)) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - name: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - name: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := Data(test.args.name) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Train(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float32 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float32) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float32) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Train() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_TrainAsFloat64(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float64 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float64) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float64) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.TrainAsFloat64() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Query(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float32 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float32) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float32) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Query() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_QueryAsFloat64(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float64 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float64) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float64) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.QueryAsFloat64() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Distances(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float32 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float32) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float32) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Distances() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_DistancesAsFloat64(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float64 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float64) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float64) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.DistancesAsFloat64() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Neighbors(t *testing.T) { +func Test_dataset_Name(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { - want [][]int + want string } type test struct { name string fields fields want want - checkFunc func(want, [][]int) error + checkFunc func(want, string) error beforeFunc func() afterFunc func() } - defaultCheckFunc := func(w want, got [][]int) error { + defaultCheckFunc := func(w want, got string) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1126,17 +53,6 @@ func Test_dataset_Neighbors(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1153,17 +69,6 @@ func Test_dataset_Neighbors(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1178,7 +83,7 @@ func Test_dataset_Neighbors(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1189,24 +94,13 @@ func Test_dataset_Neighbors(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.Neighbors() + got := d.Name() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1215,38 +109,27 @@ func Test_dataset_Neighbors(t *testing.T) { } } -func Test_dataset_IDs(t *testing.T) { +func Test_dataset_Dimension(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { - want []string + want int } type test struct { name string fields fields want want - checkFunc func(want, []string) error + checkFunc func(want, int) error beforeFunc func() afterFunc func() } - defaultCheckFunc := func(w want, got []string) error { + defaultCheckFunc := func(w want, got int) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1256,17 +139,6 @@ func Test_dataset_IDs(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1283,17 +155,6 @@ func Test_dataset_IDs(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1308,7 +169,7 @@ func Test_dataset_IDs(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1319,24 +180,13 @@ func Test_dataset_IDs(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.IDs() + got := d.Dimension() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1345,23 +195,12 @@ func Test_dataset_IDs(t *testing.T) { } } -func Test_dataset_Name(t *testing.T) { +func Test_dataset_DistanceType(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { want string @@ -1376,137 +215,7 @@ func Test_dataset_Name(t *testing.T) { } defaultCheckFunc := func(w want, got string) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Name() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Dimension(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want int - } - type test struct { - name string - fields fields - want want - checkFunc func(want, int) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got int) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1516,17 +225,6 @@ func Test_dataset_Dimension(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1543,17 +241,6 @@ func Test_dataset_Dimension(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1568,7 +255,7 @@ func Test_dataset_Dimension(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1579,24 +266,13 @@ func Test_dataset_Dimension(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.Dimension() + got := d.DistanceType() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1605,23 +281,12 @@ func Test_dataset_Dimension(t *testing.T) { } } -func Test_dataset_DistanceType(t *testing.T) { +func Test_dataset_ObjectType(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { want string @@ -1636,7 +301,7 @@ func Test_dataset_DistanceType(t *testing.T) { } defaultCheckFunc := func(w want, got string) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1646,17 +311,6 @@ func Test_dataset_DistanceType(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1673,17 +327,6 @@ func Test_dataset_DistanceType(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1698,7 +341,7 @@ func Test_dataset_DistanceType(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1709,24 +352,13 @@ func Test_dataset_DistanceType(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.DistanceType() + got := d.ObjectType() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1735,38 +367,28 @@ func Test_dataset_DistanceType(t *testing.T) { } } -func Test_dataset_ObjectType(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string +func Test_findDir(t *testing.T) { + type args struct { + path string } type want struct { want string + err error } type test struct { name string - fields fields + args args want want - checkFunc func(want, string) error - beforeFunc func() - afterFunc func() + checkFunc func(want, string, error) error + beforeFunc func(args) + afterFunc func(args) } - defaultCheckFunc := func(w want, got string) error { + defaultCheckFunc := func(w want, got string, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1775,22 +397,8 @@ func Test_dataset_ObjectType(t *testing.T) { /* { name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", + args: args { + path: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1802,22 +410,8 @@ func Test_dataset_ObjectType(t *testing.T) { func() test { return test { name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", + args: args { + path: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1828,36 +422,19 @@ func Test_dataset_ObjectType(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { - test.beforeFunc() + test.beforeFunc(test.args) } if test.afterFunc != nil { - defer test.afterFunc() + defer test.afterFunc(test.args) } if test.checkFunc == nil { test.checkFunc = defaultCheckFunc } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - got := d.ObjectType() - if err := test.checkFunc(test.want, got); err != nil { + got, err := findDir(test.args.path) + if err := test.checkFunc(test.want, got, err); err != nil { tt.Errorf("error = %v", err) } @@ -1865,24 +442,24 @@ func Test_dataset_ObjectType(t *testing.T) { } } -func Test_float32To64(t *testing.T) { +func TestData(t *testing.T) { type args struct { - x [][]float32 + name string } type want struct { - wantY [][]float64 + want func() (Dataset, error) } type test struct { name string args args want want - checkFunc func(want, [][]float64) error + checkFunc func(want, func() (Dataset, error)) error beforeFunc func(args) afterFunc func(args) } - defaultCheckFunc := func(w want, gotY [][]float64) error { - if !reflect.DeepEqual(gotY, w.wantY) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotY, w.wantY) + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1892,7 +469,7 @@ func Test_float32To64(t *testing.T) { { name: "test_case_1", args: args { - x: nil, + name: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1905,7 +482,7 @@ func Test_float32To64(t *testing.T) { return test { name: "test_case_2", args: args { - x: nil, + name: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1916,7 +493,7 @@ func Test_float32To64(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc(test.args) } @@ -1927,8 +504,8 @@ func Test_float32To64(t *testing.T) { test.checkFunc = defaultCheckFunc } - gotY := float32To64(test.args.x) - if err := test.checkFunc(test.want, gotY); err != nil { + got := Data(test.args.name) + if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } diff --git a/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go b/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go index 151f89c0b6..55ed91fcbf 100644 --- a/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go +++ b/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go @@ -45,10 +45,10 @@ func Test_loadFloat32(t *testing.T) { } defaultCheckFunc := func(w want, got interface{}, err error) error { if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + return errors.Errorf("got error = %v, want %v", err, w.err) } if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -88,7 +88,7 @@ func Test_loadFloat32(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc(test.args) } @@ -129,10 +129,10 @@ func Test_loadInt(t *testing.T) { } defaultCheckFunc := func(w want, got interface{}, err error) error { if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + return errors.Errorf("got error = %v, want %v", err, w.err) } if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -172,7 +172,7 @@ func Test_loadInt(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc(test.args) } @@ -213,13 +213,13 @@ func Test_loadDataset(t *testing.T) { } defaultCheckFunc := func(w want, gotDim int, gotVec interface{}, err error) error { if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + return errors.Errorf("got error = %v, want %v", err, w.err) } if !reflect.DeepEqual(gotDim, w.wantDim) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotDim, w.wantDim) + return errors.Errorf("got = %v, want %v", gotDim, w.wantDim) } if !reflect.DeepEqual(gotVec, w.wantVec) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotVec, w.wantVec) + return errors.Errorf("got = %v, want %v", gotVec, w.wantVec) } return nil } @@ -257,7 +257,7 @@ func Test_loadDataset(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc(test.args) } @@ -299,22 +299,22 @@ func TestLoad(t *testing.T) { } defaultCheckFunc := func(w want, gotTrain [][]float32, gotTest [][]float32, gotDistances [][]float32, gotNeighbors [][]int, gotDim int, err error) error { if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + return errors.Errorf("got error = %v, want %v", err, w.err) } if !reflect.DeepEqual(gotTrain, w.wantTrain) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotTrain, w.wantTrain) + return errors.Errorf("got = %v, want %v", gotTrain, w.wantTrain) } if !reflect.DeepEqual(gotTest, w.wantTest) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotTest, w.wantTest) + return errors.Errorf("got = %v, want %v", gotTest, w.wantTest) } if !reflect.DeepEqual(gotDistances, w.wantDistances) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotDistances, w.wantDistances) + return errors.Errorf("got = %v, want %v", gotDistances, w.wantDistances) } if !reflect.DeepEqual(gotNeighbors, w.wantNeighbors) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotNeighbors, w.wantNeighbors) + return errors.Errorf("got = %v, want %v", gotNeighbors, w.wantNeighbors) } if !reflect.DeepEqual(gotDim, w.wantDim) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotDim, w.wantDim) + return errors.Errorf("got = %v, want %v", gotDim, w.wantDim) } return nil } @@ -348,7 +348,7 @@ func TestLoad(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc(test.args) } @@ -367,369 +367,3 @@ func TestLoad(t *testing.T) { }) } } - -func TestCreateRandomIDs(t *testing.T) { - type args struct { - n int - } - type want struct { - wantIds []string - } - type test struct { - name string - args args - want want - checkFunc func(want, []string) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, gotIds []string) error { - if !reflect.DeepEqual(gotIds, w.wantIds) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotIds, w.wantIds) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - gotIds := CreateRandomIDs(test.args.n) - if err := test.checkFunc(test.want, gotIds); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestCreateRandomIDsWithLength(t *testing.T) { - type args struct { - n int - l int - } - type want struct { - wantIds []string - } - type test struct { - name string - args args - want want - checkFunc func(want, []string) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, gotIds []string) error { - if !reflect.DeepEqual(gotIds, w.wantIds) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotIds, w.wantIds) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - n: 0, - l: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - n: 0, - l: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - gotIds := CreateRandomIDsWithLength(test.args.n, test.args.l) - if err := test.checkFunc(test.want, gotIds); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestCreateSerialIDs(t *testing.T) { - type args struct { - n int - } - type want struct { - want []string - } - type test struct { - name string - args args - want want - checkFunc func(want, []string) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got []string) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := CreateSerialIDs(test.args.n) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestLoadDataWithRandomIDs(t *testing.T) { - type args struct { - path string - } - type want struct { - want Dataset - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, Dataset, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got Dataset, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := LoadDataWithRandomIDs(test.args.path) - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestLoadDataWithSerialIDs(t *testing.T) { - type args struct { - path string - } - type want struct { - want Dataset - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, Dataset, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got Dataset, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := LoadDataWithSerialIDs(test.args.path) - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} diff --git a/pkg/tools/cli/loadtest/assets/small_dataset_test.go b/pkg/tools/cli/loadtest/assets/small_dataset_test.go index 4bae3775da..004241e323 100644 --- a/pkg/tools/cli/loadtest/assets/small_dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/small_dataset_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "github.com/vdaas/vald/internal/errors" + "github.com/pkg/errors" "go.uber.org/goleak" ) From bdd80f2fbf06b71ad89da897c37899cf10576719 Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Fri, 28 Aug 2020 14:40:36 +0900 Subject: [PATCH 11/15] fix generated test Signed-off-by: Kosuke Morimoto --- pkg/tools/cli/loadtest/assets/dataset_test.go | 2 +- pkg/tools/cli/loadtest/assets/small_dataset_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/tools/cli/loadtest/assets/dataset_test.go b/pkg/tools/cli/loadtest/assets/dataset_test.go index fb833ff2ff..ef7c6aab94 100644 --- a/pkg/tools/cli/loadtest/assets/dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/dataset_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "github.com/pkg/errors" + "github.com/vdaas/vald/internal/errors" "go.uber.org/goleak" ) diff --git a/pkg/tools/cli/loadtest/assets/small_dataset_test.go b/pkg/tools/cli/loadtest/assets/small_dataset_test.go index 004241e323..4bae3775da 100644 --- a/pkg/tools/cli/loadtest/assets/small_dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/small_dataset_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "github.com/pkg/errors" + "github.com/vdaas/vald/internal/errors" "go.uber.org/goleak" ) From 868e338d5719d030920249f2cdc02fb9d9786002 Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Tue, 1 Sep 2020 14:16:49 +0900 Subject: [PATCH 12/15] by reviewdog Signed-off-by: Kosuke Morimoto --- pkg/tools/cli/loadtest/assets/dataset.go | 1 + pkg/tools/cli/loadtest/assets/large_dataset.go | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tools/cli/loadtest/assets/dataset.go b/pkg/tools/cli/loadtest/assets/dataset.go index 7c06be1a76..a5e71a6161 100644 --- a/pkg/tools/cli/loadtest/assets/dataset.go +++ b/pkg/tools/cli/loadtest/assets/dataset.go @@ -112,6 +112,7 @@ func Data(name string) func() (Dataset, error) { sd, _ := strconv.ParseFloat(l[4], 64) return gaussian(d, s, m, sd) } + switch name { case "fashion-mnist": return loadSmallData("fashion-mnist-784-euclidean.hdf5", name, "l2", "float") diff --git a/pkg/tools/cli/loadtest/assets/large_dataset.go b/pkg/tools/cli/loadtest/assets/large_dataset.go index 273d0ecdfd..c384bcbf68 100644 --- a/pkg/tools/cli/loadtest/assets/large_dataset.go +++ b/pkg/tools/cli/loadtest/assets/large_dataset.go @@ -84,7 +84,6 @@ func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFi groundTruth: groundTruth, distances: distances, }, nil - } } From fd16f330050a6cf43c3a0ad1526cae3f52ac527e Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Tue, 1 Sep 2020 16:52:25 +0900 Subject: [PATCH 13/15] by comments Signed-off-by: Kosuke Morimoto --- hack/benchmark/assets/x1b/loader.go | 32 +++++---- ...der_test_bench.go => loader_bench_test.go} | 45 ++++-------- hack/benchmark/assets/x1b/loader_test.go | 30 ++++---- .../cli/loadtest/assets/large_dataset.go | 22 +++--- .../cli/loadtest/assets/large_dataset_test.go | 72 +++++++++---------- .../cli/loadtest/assets/small_dataset.go | 6 +- 6 files changed, 97 insertions(+), 110 deletions(-) rename hack/benchmark/assets/x1b/{loader_test_bench.go => loader_bench_test.go} (72%) diff --git a/hack/benchmark/assets/x1b/loader.go b/hack/benchmark/assets/x1b/loader.go index 80649ba732..277db20338 100644 --- a/hack/benchmark/assets/x1b/loader.go +++ b/hack/benchmark/assets/x1b/loader.go @@ -33,25 +33,25 @@ var ( ErrUnsupportedFileType = errors.New("unsupported file type") ) -type X1b interface { +type BillionScaleVectors interface { Load(i int) (interface{}, error) Dimension() int Size() int Close() error } -type Bvecs interface { - X1b +type Uint8Vectors interface { + BillionScaleVectors LoadUint8(i int) ([]uint8, error) } -type Fvecs interface { - X1b +type FloatVectors interface { + BillionScaleVectors LoadFloat32(i int) ([]float32, error) } -type Ivecs interface { - X1b +type Int32Vectors interface { + BillionScaleVectors LoadInt32(i int) ([]int32, error) } @@ -78,7 +78,9 @@ func open(fname string, elementSize int) (f *file, err error) { return nil, err } defer func() { - err = errors.Wrap(err, fp.Close().Error()) + if e := fp.Close(); e != nil { + err = errors.Wrap(err, e.Error()) + } }() fi, err := fp.Stat() @@ -157,7 +159,7 @@ func (iv *ivecs) Load(i int) (interface{}, error) { return iv.LoadInt32(i) } -func NewBVecs(fname string) (Bvecs, error) { +func NewUint8Vectors(fname string) (Uint8Vectors, error) { f, err := open(fname, 1) if err != nil { return nil, err @@ -165,7 +167,7 @@ func NewBVecs(fname string) (Bvecs, error) { return &bvecs{f}, nil } -func NewFVecs(fname string) (Fvecs, error) { +func NewFloatVectors(fname string) (FloatVectors, error) { f, err := open(fname, 4) if err != nil { return nil, err @@ -173,7 +175,7 @@ func NewFVecs(fname string) (Fvecs, error) { return &fvecs{f}, nil } -func NewIVecs(fname string) (Ivecs, error) { +func NewInt32Vectors(fname string) (Int32Vectors, error) { f, err := open(fname, 4) if err != nil { return nil, err @@ -181,14 +183,14 @@ func NewIVecs(fname string) (Ivecs, error) { return &ivecs{f}, nil } -func Open(fname string) (X1b, error) { +func Open(fname string) (BillionScaleVectors, error) { switch filepath.Ext(fname) { case ".bvecs": - return NewBVecs(fname) + return NewUint8Vectors(fname) case ".fvecs": - return NewFVecs(fname) + return NewFloatVectors(fname) case ".ivecs": - return NewIVecs(fname) + return NewInt32Vectors(fname) default: return nil, ErrUnsupportedFileType } diff --git a/hack/benchmark/assets/x1b/loader_test_bench.go b/hack/benchmark/assets/x1b/loader_bench_test.go similarity index 72% rename from hack/benchmark/assets/x1b/loader_test_bench.go rename to hack/benchmark/assets/x1b/loader_bench_test.go index 3af9895543..ac38ca6d9e 100644 --- a/hack/benchmark/assets/x1b/loader_test_bench.go +++ b/hack/benchmark/assets/x1b/loader_bench_test.go @@ -26,7 +26,7 @@ const ( ) func BenchmarkBVecs(b *testing.B) { - bv, err := NewBVecs(bvecsFile) + bv, err := NewUint8Vectors(bvecsFile) if err != nil { b.Fatal(err) } @@ -40,29 +40,22 @@ func BenchmarkBVecs(b *testing.B) { b.Run(bvecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() - for n := 0; n < b.N; n++ { - v, err := bv.Load(i) + for n := 0; n < bb.N; n++ { + _, err := bv.Load(i) switch err { case nil: i++ - continue case ErrOutOfBounds: - if err := bv.Close(); err != nil { - bb.Fatal(err) - } - bv, err = NewBVecs(bvecsFile) i = 0 - } - if err != nil { + default: bb.Fatal(err) } - bb.Log(v) } }) } func BenchmarkFVecs(b *testing.B) { - fv, err := NewFVecs(fvecsFile) + fv, err := NewFloatVectors(fvecsFile) if err != nil { b.Fatal(err) } @@ -76,29 +69,22 @@ func BenchmarkFVecs(b *testing.B) { b.Run(fvecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() - for n := 0; n < b.N; n++ { - v, err := fv.Load(i) + for n := 0; n < bb.N; n++ { + _, err := fv.Load(i) switch err { case nil: i++ - continue case ErrOutOfBounds: - if err := fv.Close(); err != nil { - bb.Fatal(err) - } - fv, err = NewFVecs(fvecsFile) i = 0 - } - if err != nil { + default: bb.Fatal(err) } - bb.Log(v) } }) } func BenchmarkIVecs(b *testing.B) { - iv, err := NewIVecs(ivecsFile) + iv, err := NewInt32Vectors(ivecsFile) if err != nil { b.Fatal(err) } @@ -112,23 +98,16 @@ func BenchmarkIVecs(b *testing.B) { b.Run(ivecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() - for n := 0; n < b.N; n++ { - v, err := iv.Load(i) + for n := 0; n < bb.N; n++ { + _, err := iv.Load(i) switch err { case nil: i++ - continue case ErrOutOfBounds: - if err := iv.Close(); err != nil { - bb.Fatal(err) - } - iv, err = NewIVecs(ivecsFile) i = 0 - } - if err != nil { + default: bb.Fatal(err) } - bb.Log(v) } }) } diff --git a/hack/benchmark/assets/x1b/loader_test.go b/hack/benchmark/assets/x1b/loader_test.go index efa72ad779..aae11b84f6 100644 --- a/hack/benchmark/assets/x1b/loader_test.go +++ b/hack/benchmark/assets/x1b/loader_test.go @@ -992,18 +992,18 @@ func TestNewBVecs(t *testing.T) { fname string } type want struct { - want Bvecs + want Uint8Vectors err error } type test struct { name string args args want want - checkFunc func(want, Bvecs, error) error + checkFunc func(want, Uint8Vectors, error) error beforeFunc func(args) afterFunc func(args) } - defaultCheckFunc := func(w want, got Bvecs, err error) error { + defaultCheckFunc := func(w want, got Uint8Vectors, err error) error { if !errors.Is(err, w.err) { return errors.Errorf("got error = %v, want %v", err, w.err) } @@ -1053,7 +1053,7 @@ func TestNewBVecs(t *testing.T) { test.checkFunc = defaultCheckFunc } - got, err := NewBVecs(test.args.fname) + got, err := NewUint8Vectors(test.args.fname) if err := test.checkFunc(test.want, got, err); err != nil { tt.Errorf("error = %v", err) } @@ -1067,18 +1067,18 @@ func TestNewFVecs(t *testing.T) { fname string } type want struct { - want Fvecs + want FloatVectors err error } type test struct { name string args args want want - checkFunc func(want, Fvecs, error) error + checkFunc func(want, FloatVectors, error) error beforeFunc func(args) afterFunc func(args) } - defaultCheckFunc := func(w want, got Fvecs, err error) error { + defaultCheckFunc := func(w want, got FloatVectors, err error) error { if !errors.Is(err, w.err) { return errors.Errorf("got error = %v, want %v", err, w.err) } @@ -1128,7 +1128,7 @@ func TestNewFVecs(t *testing.T) { test.checkFunc = defaultCheckFunc } - got, err := NewFVecs(test.args.fname) + got, err := NewFloatVectors(test.args.fname) if err := test.checkFunc(test.want, got, err); err != nil { tt.Errorf("error = %v", err) } @@ -1142,18 +1142,18 @@ func TestNewIVecs(t *testing.T) { fname string } type want struct { - want Ivecs + want Int32Vectors err error } type test struct { name string args args want want - checkFunc func(want, Ivecs, error) error + checkFunc func(want, Int32Vectors, error) error beforeFunc func(args) afterFunc func(args) } - defaultCheckFunc := func(w want, got Ivecs, err error) error { + defaultCheckFunc := func(w want, got Int32Vectors, err error) error { if !errors.Is(err, w.err) { return errors.Errorf("got error = %v, want %v", err, w.err) } @@ -1203,7 +1203,7 @@ func TestNewIVecs(t *testing.T) { test.checkFunc = defaultCheckFunc } - got, err := NewIVecs(test.args.fname) + got, err := NewInt32Vectors(test.args.fname) if err := test.checkFunc(test.want, got, err); err != nil { tt.Errorf("error = %v", err) } @@ -1217,18 +1217,18 @@ func TestOpen(t *testing.T) { fname string } type want struct { - want X1b + want BillionScaleVectors err error } type test struct { name string args args want want - checkFunc func(want, X1b, error) error + checkFunc func(want, BillionScaleVectors, error) error beforeFunc func(args) afterFunc func(args) } - defaultCheckFunc := func(w want, got X1b, err error) error { + defaultCheckFunc := func(w want, got BillionScaleVectors, err error) error { if !errors.Is(err, w.err) { return errors.Errorf("got error = %v, want %v", err, w.err) } diff --git a/pkg/tools/cli/loadtest/assets/large_dataset.go b/pkg/tools/cli/loadtest/assets/large_dataset.go index c384bcbf68..7776dbf10c 100644 --- a/pkg/tools/cli/loadtest/assets/large_dataset.go +++ b/pkg/tools/cli/loadtest/assets/large_dataset.go @@ -22,17 +22,21 @@ import ( "github.com/vdaas/vald/internal/errors" ) +const ( + largeDatasetPath = "hack/benchmark/assets/dataset/large" +) + type largeDataset struct { *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFileName, name, distanceType, objectType string) func() (Dataset, error) { return func() (Dataset, error) { - dir, err := findDir("hack/benchmark/assets/dataset/large") + dir, err := findDir(largeDatasetPath) if err != nil { return nil, err } @@ -49,17 +53,15 @@ func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFi if tdim != qdim { return nil, errors.New("dimension must be same train and query.") } - iv, err := x1b.NewIVecs(filepath.Join(dir, groundTruthFileName)) + iv, err := x1b.NewInt32Vectors(filepath.Join(dir, groundTruthFileName)) if err != nil { return nil, err } groundTruth := make([][]int, 0, iv.Size()) for i := 0; ; i++ { gt32, err := iv.LoadInt32(i) - if err != nil { - if err == ErrOutOfBounds { - break - } + if err == ErrOutOfBounds { + break } gt := make([]int, 0, len(gt32)) for _, v := range gt32 { @@ -68,7 +70,7 @@ func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFi groundTruth = append(groundTruth, gt) } - distances, err := x1b.NewFVecs(filepath.Join(dir, distanceFileName)) + distances, err := x1b.NewFloatVectors(filepath.Join(dir, distanceFileName)) if err != nil { return nil, err } diff --git a/pkg/tools/cli/loadtest/assets/large_dataset_test.go b/pkg/tools/cli/loadtest/assets/large_dataset_test.go index b2990875ec..9ac09a2104 100644 --- a/pkg/tools/cli/loadtest/assets/large_dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/large_dataset_test.go @@ -119,10 +119,10 @@ func Test_largeDataset_Train(t *testing.T) { } type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want interface{} @@ -220,10 +220,10 @@ func Test_largeDataset_Train(t *testing.T) { func Test_largeDataset_TrainSize(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want int @@ -313,10 +313,10 @@ func Test_largeDataset_Query(t *testing.T) { } type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want interface{} @@ -414,10 +414,10 @@ func Test_largeDataset_Query(t *testing.T) { func Test_largeDataset_QuerySize(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want int @@ -507,10 +507,10 @@ func Test_largeDataset_Distance(t *testing.T) { } type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want []float32 @@ -608,10 +608,10 @@ func Test_largeDataset_Distance(t *testing.T) { func Test_largeDataset_DistanceSize(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want int @@ -701,10 +701,10 @@ func Test_largeDataset_Neighbor(t *testing.T) { } type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want []int @@ -802,10 +802,10 @@ func Test_largeDataset_Neighbor(t *testing.T) { func Test_largeDataset_NeighborSize(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want int @@ -892,10 +892,10 @@ func Test_largeDataset_NeighborSize(t *testing.T) { func Test_largeDataset_Dimension(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want int @@ -982,10 +982,10 @@ func Test_largeDataset_Dimension(t *testing.T) { func Test_largeDataset_DistanceType(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want string @@ -1072,10 +1072,10 @@ func Test_largeDataset_DistanceType(t *testing.T) { func Test_largeDataset_ObjectType(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want string @@ -1162,10 +1162,10 @@ func Test_largeDataset_ObjectType(t *testing.T) { func Test_largeDataset_Name(t *testing.T) { type fields struct { dataset *dataset - train x1b.X1b - query x1b.X1b + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors groundTruth [][]int - distances x1b.Fvecs + distances x1b.FloatVectors } type want struct { want string diff --git a/pkg/tools/cli/loadtest/assets/small_dataset.go b/pkg/tools/cli/loadtest/assets/small_dataset.go index 72bb9ff659..0c43642bdc 100644 --- a/pkg/tools/cli/loadtest/assets/small_dataset.go +++ b/pkg/tools/cli/loadtest/assets/small_dataset.go @@ -21,6 +21,10 @@ import ( "path/filepath" ) +const ( + smallDatasetPath = "hack/benchmark/assets/dataset" +) + type smallDataset struct { *dataset train [][]float32 @@ -31,7 +35,7 @@ type smallDataset struct { func loadSmallData(fileName, datasetName, distanceType, objectType string) func() (Dataset, error) { return func() (Dataset, error) { - dir, err := findDir("hack/benchmark/assets/dataset") + dir, err := findDir(smallDatasetPath) if err != nil { return nil, err } From 1d9d0ba35fd06e1f67304402c715d7f4e9e2f7e8 Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Tue, 15 Sep 2020 11:22:45 +0900 Subject: [PATCH 14/15] stop benchmark if error is occurred Signed-off-by: Kosuke Morimoto --- hack/benchmark/core/benchmark/strategy/bulk_insert.go | 4 ++-- .../benchmark/core/benchmark/strategy/bulk_insert_commit.go | 4 ++-- hack/benchmark/internal/e2e/strategy/insert.go | 6 ++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert.go b/hack/benchmark/core/benchmark/strategy/bulk_insert.go index cc3c3069ba..8846ea1014 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert.go @@ -46,7 +46,7 @@ func NewBulkInsert(opts ...StrategyOption) benchmark.Strategy { for i := 0; i < size; i++ { arr, err := dataset.Train(i) if err != nil { - break + b.Fatal(err) } v = append(v, arr.([]float32)) } @@ -72,7 +72,7 @@ func NewBulkInsert(opts ...StrategyOption) benchmark.Strategy { for i := 0; i < size; i++ { arr, err := dataset.Train(i) if err != nil { - break + b.Fatal(err) } v = append(v, float32To64(arr.([]float32))) } diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go index a618e664d8..80eea2a5b1 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go @@ -42,7 +42,7 @@ func NewBulkInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Stra for i := 0; i < size; i++ { arr, err := dataset.Train(i) if err != nil { - break + b.Fatal(err) } v = append(v, arr.([]float32)) } @@ -68,7 +68,7 @@ func NewBulkInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Stra for i := 0; i < size; i++ { arr, err := dataset.Train(i) if err != nil { - break + b.Fatal(err) } v = append(v, float32To64(arr.([]float32))) } diff --git a/hack/benchmark/internal/e2e/strategy/insert.go b/hack/benchmark/internal/e2e/strategy/insert.go index c655e9aa21..4baf51f953 100644 --- a/hack/benchmark/internal/e2e/strategy/insert.go +++ b/hack/benchmark/internal/e2e/strategy/insert.go @@ -58,8 +58,7 @@ func (isrt *insert) run(ctx context.Context, b *testing.B, c client.Client, data for i := 0; i < bb.N; i++ { v, err := dataset.Train(cnt % dataset.TrainSize()) if err != nil { - cnt = 0 - break + b.Fatal(err) } isrt.do(ctx, bb, c, fmt.Sprint(cnt), v.([]float32)) cnt++ @@ -80,8 +79,7 @@ func (isrt *insert) runParallel(ctx context.Context, b *testing.B, c client.Clie n := int(atomic.AddInt64(&cnt, 1)) - 1 v, err := dataset.Train(n % dataset.TrainSize()) if err != nil { - cnt = 0 - break + b.Fatal(err) } isrt.do(ctx, bb, c, fmt.Sprint(cnt), v.([]float32)) From 7a57e13c80fdcfe2e5324d8172d8f170d6df068c Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Wed, 23 Sep 2020 12:41:23 +0900 Subject: [PATCH 15/15] fix benchmark Signed-off-by: Kosuke Morimoto --- .../benchmark/assets/x1b/loader_bench_test.go | 42 ++++++++----------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/hack/benchmark/assets/x1b/loader_bench_test.go b/hack/benchmark/assets/x1b/loader_bench_test.go index ac38ca6d9e..0a24d0658f 100644 --- a/hack/benchmark/assets/x1b/loader_bench_test.go +++ b/hack/benchmark/assets/x1b/loader_bench_test.go @@ -36,20 +36,18 @@ func BenchmarkBVecs(b *testing.B) { } }() - i := 0 b.Run(bvecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() + + i := 0 + size := bv.Size() for n := 0; n < bb.N; n++ { - _, err := bv.Load(i) - switch err { - case nil: - i++ - case ErrOutOfBounds: - i = 0 - default: + _, err := bv.Load(i % size) + if err != nil { bb.Fatal(err) } + i++ } }) } @@ -65,20 +63,18 @@ func BenchmarkFVecs(b *testing.B) { } }() - i := 0 b.Run(fvecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() + + i := 0 + size := fv.Size() for n := 0; n < bb.N; n++ { - _, err := fv.Load(i) - switch err { - case nil: - i++ - case ErrOutOfBounds: - i = 0 - default: + _, err := fv.Load(i % size) + if err != nil { bb.Fatal(err) } + i++ } }) } @@ -94,20 +90,18 @@ func BenchmarkIVecs(b *testing.B) { } }() - i := 0 b.Run(ivecsFile, func(bb *testing.B) { bb.ReportAllocs() bb.ResetTimer() + + i := 0 + size := iv.Size() for n := 0; n < bb.N; n++ { - _, err := iv.Load(i) - switch err { - case nil: - i++ - case ErrOutOfBounds: - i = 0 - default: + _, err := iv.Load(i % size) + if err != nil { bb.Fatal(err) } + i++ } }) }