diff --git a/Makefile b/Makefile index 069e7c11f4..b2614d3c0d 100644 --- a/Makefile +++ b/Makefile @@ -93,6 +93,31 @@ CXXFLAGS ?= $(CFLAGS) BENCH_DATASET_MD5S := $(eval BENCH_DATASET_MD5S := $(shell find $(BENCH_DATASET_MD5_DIR) -type f -regex ".*\.md5"))$(BENCH_DATASET_MD5S) BENCH_DATASETS = $(BENCH_DATASET_MD5S:$(BENCH_DATASET_MD5_DIR)/%.md5=$(BENCH_DATASET_HDF5_DIR)/%.hdf5) +BENCH_LARGE_DATASET_BASE_DIR = $(BENCH_DATASET_BASE_DIR)/large/dataset + +SIFT1B_ROOT_DIR = $(BENCH_LARGE_DATASET_BASE_DIR)/sift1b + +SIFT1B_BASE_FILE = $(SIFT1B_ROOT_DIR)/bigann_base.bvecs +SIFT1B_LEARN_FILE = $(SIFT1B_ROOT_DIR)/bigann_learn.bvecs +SIFT1B_QUERY_FILE = $(SIFT1B_ROOT_DIR)/bigann_query.bvecs +SIFT1B_GROUNDTRUTH_DIR = $(SIFT1B_ROOT_DIR)/gnd + +SIFT1B_BASE_URL = ftp://ftp.irisa.fr/local/texmex/corpus/ + +DEEP1B_ROOT_DIR = $(BENCH_LARGE_DATASET_BASE_DIR)/deep1b + +DEEP1B_BASE_FILE = $(DEEP1B_ROOT_DIR)/deep1B_base.fvecs +DEEP1B_LEARN_FILE = $(DEEP1B_ROOT_DIR)/deep1B_learn.fvecs +DEEP1B_QUERY_FILE = $(DEEP1B_ROOT_DIR)/deep1B_queries.fvecs +DEEP1B_GROUNDTRUTH_FILE = $(DEEP1B_ROOT_DIR)/deep1B_groundtruth.ivecs + +DEEP1B_BASE_DIR = $(DEEP1B_ROOT_DIR)/base +DEEP1B_BASE_CHUNK_FILES = $(shell printf "$(DEEP1B_BASE_DIR)/base_%02d\n" {0..36}) +DEEP1B_LEARN_DIR = $(DEEP1B_ROOT_DIR)/learn +DEEP1B_LEARN_CHUNK_FILES = $(shell printf "$(DEEP1B_LEARN_DIR)/learn_%02d\n" {0..13}) + +DEEP1B_API_URL = https://cloud-api.yandex.net/v1/disk/public/resources/download?public_key=https://yadi.sk/d/11eDCm7Dsn9GA&path= + DATASET_ARGS ?= identity-128 ADDRESS_ARGS ?= "" diff --git a/Makefile.d/bench.mk b/Makefile.d/bench.mk index da6113f1f8..5a2838b378 100644 --- a/Makefile.d/bench.mk +++ b/Makefile.d/bench.mk @@ -24,6 +24,24 @@ $(BENCH_DATASET_HDF5_DIR): $(call mkdir, $@) $(call rm, -rf, $@/*) +%.large_dataset_dir: + @test -f $* || mkdir -p $* + +$(SIFT1B_BASE_FILE) $(SIFT1B_LEARN_FILE) $(SIFT1B_QUERY_FILE): | $(SIFT1B_ROOT_DIR).large_dataset_dir + test -f $@ || curl -fsSL $(SIFT1B_BASE_URL)$(subst $(SIFT1B_ROOT_DIR)/,,$@).gz | gunzip -d > $@ + +$(SIFT1B_GROUNDTRUTH_DIR): | $(SIFT1B_ROOT_DIR).large_dataset_dir + test -f $@ || curl -fsSL $(SIFT1B_BASE_URL)bigann_gnd.tar.gz | tar -C $(SIFT1B_ROOT_DIR) -zx + +$(DEEP1B_GROUNDTRUTH_FILE) $(DEEP1B_QUERY_FILE) $(DEEP1B_BASE_CHUNK_FILES) $(DEEP1B_LEARN_CHUNK_FILES): | $(DEEP1B_ROOT_DIR).large_dataset_dir + test -f $@ || curl -fsSL "$(shell curl -fsSL "$(DEEP1B_API_URL)$(subst $(DEEP1B_ROOT_DIR),,$@)" | sed -e 's/^{\(.*\)}$$/\1/' | tr ',' '\n' | grep href | cut -d ':' -f 2- | tr -d '"')" -o $@ + +$(DEEP1B_BASE_FILE): | $(DEEP1B_BASE_DIR).large_dataset_dir $(DEEP1B_BASE_CHUNK_FILES) + cat $(DEEP1B_BASE_CHUNK_FILES) > $@ + +$(DEEP1B_LEARN_FILE): | $(DEEP1B_LEARN_DIR).large_dataset_dir $(DEEP1B_LEARN_CHUNK_FILES) + cat $(DEEP1B_LEARN_CHUNK_FILES) > $@ + .PHONY: bench/datasets ## fetch datasets for benchmark bench/datasets: $(BENCH_DATASETS) @@ -45,6 +63,28 @@ bench/datasets/md5dir/print: bench/datasets/hdf5dir/print: @echo $(BENCH_DATASET_HDF5_DIR) +.PHONY: bench/datasets/large +## fetch large datasets for benchmark +bench/datasets/large: \ + bench/datasets/large/sift1b \ + bench/datasets/large/deep1b + +.PHONY: bench/datasets/large/sift1b +## fetch sift1b dataset for benchmark +bench/datasets/large/sift1b: \ + $(SIFT1B_BASE_FILE) \ + $(SIFT1B_LEARN_FILE) \ + $(SIFT1B_QUERY_FILE) \ + $(SIFT1B_GROUNDTRUTH_DIR) + +.PHONY: bench/datasets/large/deep1b +## fetch deep1b dataset for benchmark +bench/datasets/large/deep1b: \ + $(DEEP1B_BASE_FILE) \ + $(DEEP1B_LEARN_FILE) \ + $(DEEP1B_QUERY_FILE) \ + $(DEEP1B_GROUNDTRUTH_FILE) + .PHONY: bench ## run all benchmarks bench: \ diff --git a/dockers/tools/cli/loadtest/Dockerfile b/dockers/tools/cli/loadtest/Dockerfile index 4f1b0bf450..4ce4d06c0f 100644 --- a/dockers/tools/cli/loadtest/Dockerfile +++ b/dockers/tools/cli/loadtest/Dockerfile @@ -39,6 +39,9 @@ COPY pkg/${PKG} . WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO}/cmd/${PKG} COPY cmd/${PKG} . +WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO}/hack/benchmark/assets/x1b +COPY hack/benchmark/assets/x1b . + WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO} COPY versions/GO_VERSION . COPY versions/VALD_VERSION . diff --git a/hack/benchmark/assets/large/dataset/.gitignore b/hack/benchmark/assets/large/dataset/.gitignore new file mode 100644 index 0000000000..c96a04f008 --- /dev/null +++ b/hack/benchmark/assets/large/dataset/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/hack/benchmark/assets/x1b/loader.go b/hack/benchmark/assets/x1b/loader.go new file mode 100644 index 0000000000..277db20338 --- /dev/null +++ b/hack/benchmark/assets/x1b/loader.go @@ -0,0 +1,197 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package x1b + +import ( + "os" + "path/filepath" + "syscall" + "unsafe" + + "github.com/vdaas/vald/internal/errors" +) + +const ( + headerSize = 4 +) + +var ( + ErrOutOfBounds = errors.New("out of bounds") + ErrUnsupportedFileType = errors.New("unsupported file type") +) + +type BillionScaleVectors interface { + Load(i int) (interface{}, error) + Dimension() int + Size() int + Close() error +} + +type Uint8Vectors interface { + BillionScaleVectors + LoadUint8(i int) ([]uint8, error) +} + +type FloatVectors interface { + BillionScaleVectors + LoadFloat32(i int) ([]float32, error) +} + +type Int32Vectors interface { + BillionScaleVectors + LoadInt32(i int) ([]int32, error) +} + +type file struct { + mem []byte + dim int + size int + block int +} + +type bvecs struct { + *file +} +type fvecs struct { + *file +} +type ivecs struct { + *file +} + +func open(fname string, elementSize int) (f *file, err error) { + fp, err := os.Open(fname) + if err != nil { + return nil, err + } + defer func() { + if e := fp.Close(); e != nil { + err = errors.Wrap(err, e.Error()) + } + }() + + fi, err := fp.Stat() + if err != nil { + return nil, err + } + + mem, err := syscall.Mmap(int(fp.Fd()), 0, int(fi.Size()), syscall.PROT_READ, syscall.MAP_SHARED) + if err != nil { + return nil, err + } + + dim := int(*(*int32)(unsafe.Pointer(&mem[0]))) + block := headerSize + dim*elementSize + return &file{ + mem: mem, + dim: dim, + size: len(mem) / block, + block: block, + }, nil +} + +func (f *file) Close() error { + return syscall.Munmap(f.mem) +} + +func (f *file) load(i int) ([]byte, error) { + if i >= f.size { + return nil, ErrOutOfBounds + } + + return f.mem[i*f.block+headerSize : (i+1)*f.block], nil +} + +func (f *file) Dimension() int { + return f.dim +} + +func (f *file) Size() int { + return f.size +} + +func (bv *bvecs) LoadUint8(i int) ([]uint8, error) { + buf, err := bv.load(i) + if err != nil { + return nil, err + } + return ((*[1 << 26]uint8)(unsafe.Pointer(&buf[0])))[:bv.dim:bv.dim], nil +} + +func (bv *bvecs) Load(i int) (interface{}, error) { + return bv.LoadUint8(i) +} + +func (fv *fvecs) LoadFloat32(i int) ([]float32, error) { + buf, err := fv.load(i) + if err != nil { + return nil, err + } + return ((*[1 << 26]float32)(unsafe.Pointer(&buf[0])))[:fv.dim:fv.dim], nil +} + +func (fv *fvecs) Load(i int) (interface{}, error) { + return fv.LoadFloat32(i) +} + +func (iv *ivecs) LoadInt32(i int) ([]int32, error) { + buf, err := iv.load(i) + if err != nil { + return nil, err + } + return ((*[1 << 26]int32)(unsafe.Pointer(&buf[0])))[:iv.dim:iv.dim], nil +} + +func (iv *ivecs) Load(i int) (interface{}, error) { + return iv.LoadInt32(i) +} + +func NewUint8Vectors(fname string) (Uint8Vectors, error) { + f, err := open(fname, 1) + if err != nil { + return nil, err + } + return &bvecs{f}, nil +} + +func NewFloatVectors(fname string) (FloatVectors, error) { + f, err := open(fname, 4) + if err != nil { + return nil, err + } + return &fvecs{f}, nil +} + +func NewInt32Vectors(fname string) (Int32Vectors, error) { + f, err := open(fname, 4) + if err != nil { + return nil, err + } + return &ivecs{f}, nil +} + +func Open(fname string) (BillionScaleVectors, error) { + switch filepath.Ext(fname) { + case ".bvecs": + return NewUint8Vectors(fname) + case ".fvecs": + return NewFloatVectors(fname) + case ".ivecs": + return NewInt32Vectors(fname) + default: + return nil, ErrUnsupportedFileType + } +} diff --git a/hack/benchmark/assets/x1b/loader_bench_test.go b/hack/benchmark/assets/x1b/loader_bench_test.go new file mode 100644 index 0000000000..0a24d0658f --- /dev/null +++ b/hack/benchmark/assets/x1b/loader_bench_test.go @@ -0,0 +1,107 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package x1b + +import ( + "testing" +) + +const ( + bvecsFile = "../large/sift1b/bigann_base.bvecs" + fvecsFile = "../large/sift1b/gnd/dis_1000M.fvecs" + ivecsFile = "../large/sift1b/gnd/idx_1000M.ivecs" +) + +func BenchmarkBVecs(b *testing.B) { + bv, err := NewUint8Vectors(bvecsFile) + if err != nil { + b.Fatal(err) + } + defer func() { + if err := bv.Close(); err != nil { + b.Fatal(err) + } + }() + + b.Run(bvecsFile, func(bb *testing.B) { + bb.ReportAllocs() + bb.ResetTimer() + + i := 0 + size := bv.Size() + for n := 0; n < bb.N; n++ { + _, err := bv.Load(i % size) + if err != nil { + bb.Fatal(err) + } + i++ + } + }) +} + +func BenchmarkFVecs(b *testing.B) { + fv, err := NewFloatVectors(fvecsFile) + if err != nil { + b.Fatal(err) + } + defer func() { + if err := fv.Close(); err != nil { + b.Fatal(err) + } + }() + + b.Run(fvecsFile, func(bb *testing.B) { + bb.ReportAllocs() + bb.ResetTimer() + + i := 0 + size := fv.Size() + for n := 0; n < bb.N; n++ { + _, err := fv.Load(i % size) + if err != nil { + bb.Fatal(err) + } + i++ + } + }) +} + +func BenchmarkIVecs(b *testing.B) { + iv, err := NewInt32Vectors(ivecsFile) + if err != nil { + b.Fatal(err) + } + defer func() { + if err := iv.Close(); err != nil { + b.Fatal(err) + } + }() + + b.Run(ivecsFile, func(bb *testing.B) { + bb.ReportAllocs() + bb.ResetTimer() + + i := 0 + size := iv.Size() + for n := 0; n < bb.N; n++ { + _, err := iv.Load(i % size) + if err != nil { + bb.Fatal(err) + } + i++ + } + }) +} diff --git a/hack/benchmark/assets/x1b/loader_test.go b/hack/benchmark/assets/x1b/loader_test.go new file mode 100644 index 0000000000..aae11b84f6 --- /dev/null +++ b/hack/benchmark/assets/x1b/loader_test.go @@ -0,0 +1,1288 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package x1b + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" +) + +func Test_open(t *testing.T) { + type args struct { + fname string + elementSize int + } + type want struct { + wantF *file + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, *file, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, gotF *file, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(gotF, w.wantF) { + return errors.Errorf("got = %v, want %v", gotF, w.wantF) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + elementSize: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + elementSize: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + gotF, err := open(test.args.fname, test.args.elementSize) + if err := test.checkFunc(test.want, gotF, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_Close(t *testing.T) { + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + err error + } + type test struct { + name string + fields fields + want want + checkFunc func(want, error) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + err := f.Close() + if err := test.checkFunc(test.want, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_load(t *testing.T) { + type args struct { + i int + } + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + want []byte + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []byte, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []byte, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + got, err := f.load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_Dimension(t *testing.T) { + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + got := f.Dimension() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_file_Size(t *testing.T) { + type fields struct { + mem []byte + dim int + size int + block int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + mem: nil, + dim: 0, + size: 0, + block: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + f := &file{ + mem: test.fields.mem, + dim: test.fields.dim, + size: test.fields.size, + block: test.fields.block, + } + + got := f.Size() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_bvecs_LoadUint8(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want []uint8 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []uint8, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []uint8, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + bv := &bvecs{ + file: test.fields.file, + } + + got, err := bv.LoadUint8(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_bvecs_Load(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + bv := &bvecs{ + file: test.fields.file, + } + + got, err := bv.Load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_fvecs_LoadFloat32(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want []float32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []float32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []float32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + fv := &fvecs{ + file: test.fields.file, + } + + got, err := fv.LoadFloat32(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_fvecs_Load(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + fv := &fvecs{ + file: test.fields.file, + } + + got, err := fv.Load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_ivecs_LoadInt32(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want []int32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []int32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []int32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + iv := &ivecs{ + file: test.fields.file, + } + + got, err := iv.LoadInt32(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_ivecs_Load(t *testing.T) { + type args struct { + i int + } + type fields struct { + file *file + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + file: file{}, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + iv := &ivecs{ + file: test.fields.file, + } + + got, err := iv.Load(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestNewBVecs(t *testing.T) { + type args struct { + fname string + } + type want struct { + want Uint8Vectors + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, Uint8Vectors, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got Uint8Vectors, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := NewUint8Vectors(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestNewFVecs(t *testing.T) { + type args struct { + fname string + } + type want struct { + want FloatVectors + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, FloatVectors, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got FloatVectors, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := NewFloatVectors(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestNewIVecs(t *testing.T) { + type args struct { + fname string + } + type want struct { + want Int32Vectors + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, Int32Vectors, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got Int32Vectors, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := NewInt32Vectors(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestOpen(t *testing.T) { + type args struct { + fname string + } + type want struct { + want BillionScaleVectors + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, BillionScaleVectors, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got BillionScaleVectors, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fname: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := Open(test.args.fname) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert.go b/hack/benchmark/core/benchmark/strategy/bulk_insert.go index 9b0e10a1fb..8846ea1014 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert.go @@ -26,18 +26,62 @@ import ( "github.com/vdaas/vald/hack/benchmark/internal/core" ) +const ( + maxBulkSize = 100000 +) + func NewBulkInsert(opts ...StrategyOption) benchmark.Strategy { return newStrategy(append([]StrategyOption{ WithPropName("BulkInsert"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsert(dataset.Train()) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float32, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + b.Fatal(err) + } + v = append(v, arr.([]float32)) + } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() + b.StartTimer() + ids, errs := c.BulkInsert(v) return ids, wrapErrors(errs) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsert(dataset.TrainAsFloat64()) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float64, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + b.Fatal(err) + } + v = append(v, float32To64(arr.([]float32))) + } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() + b.StartTimer() + ids, errs := c.BulkInsert(v) return ids, wrapErrors(errs) }, ), diff --git a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go index df2d9bf33d..80eea2a5b1 100644 --- a/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/bulk_insert_commit.go @@ -31,13 +31,53 @@ func NewBulkInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Stra WithPropName("BulkInsertCommit"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsertCommit(dataset.Train(), poolSize) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float32, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + b.Fatal(err) + } + v = append(v, arr.([]float32)) + } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() + b.StartTimer() + ids, errs := c.BulkInsertCommit(v, poolSize) return ids, wrapErrors(errs) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - ids, errs := c.BulkInsertCommit(dataset.TrainAsFloat64(), poolSize) + size := func() int { + if maxBulkSize < dataset.TrainSize() { + return maxBulkSize + } else { + return dataset.TrainSize() + } + }() + v := make([][]float64, 0, size) + for i := 0; i < size; i++ { + arr, err := dataset.Train(i) + if err != nil { + b.Fatal(err) + } + v = append(v, float32To64(arr.([]float32))) + } + + b.StopTimer() + b.ReportAllocs() + b.ResetTimer() + b.StartTimer() + ids, errs := c.BulkInsertCommit(v, poolSize) return ids, wrapErrors(errs) }, ), diff --git a/hack/benchmark/core/benchmark/strategy/insert.go b/hack/benchmark/core/benchmark/strategy/insert.go index 2a3c8e504e..688d4c68cb 100644 --- a/hack/benchmark/core/benchmark/strategy/insert.go +++ b/hack/benchmark/core/benchmark/strategy/insert.go @@ -32,14 +32,24 @@ func NewInsert(opts ...StrategyOption) benchmark.Strategy { WithPropName("Insert"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.Train() - return c.Insert(train[int(atomic.LoadUint64(cnt))%len(train)]) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + b.StartTimer() + defer b.StopTimer() + return c.Insert(v.([]float32)) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.TrainAsFloat64() - return c.Insert(train[int(atomic.LoadUint64(cnt))%len(train)]) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + b.StartTimer() + defer b.StopTimer() + return c.Insert(float32To64(v.([]float32))) }, ), }, opts...)...) diff --git a/hack/benchmark/core/benchmark/strategy/insert_commit.go b/hack/benchmark/core/benchmark/strategy/insert_commit.go index 9a4efa11e0..7b8bb98baa 100644 --- a/hack/benchmark/core/benchmark/strategy/insert_commit.go +++ b/hack/benchmark/core/benchmark/strategy/insert_commit.go @@ -32,14 +32,24 @@ func NewInsertCommit(poolSize uint32, opts ...StrategyOption) benchmark.Strategy WithPropName("InsertCommit"), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.Train() - return c.InsertCommit(train[int(atomic.LoadUint64(cnt))%len(train)], poolSize) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + b.StartTimer() + defer b.StopTimer() + return c.InsertCommit(v.([]float32), poolSize) }, ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - train := dataset.TrainAsFloat64() - return c.InsertCommit(train[int(atomic.LoadUint64(cnt))%len(train)], poolSize) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + b.StartTimer() + defer b.StopTimer() + return c.InsertCommit(float32To64(v.([]float32)), poolSize) }, ), }, opts...)...) diff --git a/hack/benchmark/core/benchmark/strategy/search.go b/hack/benchmark/core/benchmark/strategy/search.go index 897c3f4ed6..d05d753283 100644 --- a/hack/benchmark/core/benchmark/strategy/search.go +++ b/hack/benchmark/core/benchmark/strategy/search.go @@ -37,8 +37,13 @@ func NewSearch(size int, epsilon, radius float32, opts ...StrategyOption) benchm ), WithProp32( func(ctx context.Context, b *testing.B, c core.Core32, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - query := dataset.Query() - return c.Search(query[int(atomic.LoadUint64(cnt))%len(query)], size, epsilon, radius) + v, err := dataset.Query(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + b.StartTimer() + defer b.StopTimer() + return c.Search(v.([]float32), size, epsilon, radius) }, ), WithPreProp64( @@ -48,8 +53,13 @@ func NewSearch(size int, epsilon, radius float32, opts ...StrategyOption) benchm ), WithProp64( func(ctx context.Context, b *testing.B, c core.Core64, dataset assets.Dataset, ids []uint, cnt *uint64) (interface{}, error) { - query := dataset.QueryAsFloat64() - return c.Search(query[int(atomic.LoadUint64(cnt))%len(query)], size, epsilon, radius) + v, err := dataset.Train(int(atomic.LoadUint64(cnt)) % dataset.TrainSize()) + if err != nil { + return nil, err + } + b.StartTimer() + defer b.StopTimer() + return c.Search(float32To64(v.([]float32)), size, epsilon, radius) }, ), }, opts...)...) diff --git a/hack/benchmark/core/benchmark/strategy/util.go b/hack/benchmark/core/benchmark/strategy/util.go index c64d9aeace..a67c8a80ad 100644 --- a/hack/benchmark/core/benchmark/strategy/util.go +++ b/hack/benchmark/core/benchmark/strategy/util.go @@ -43,10 +43,20 @@ func wrapErrors(errs []error) (wrapped error) { } func insertAndCreateIndex32(ctx context.Context, c core.Core32, dataset assets.Dataset) (ids []uint, err error) { - train := dataset.Train() - ids = make([]uint, 0, len(train)*bulkInsertCnt) + ids = make([]uint, 0, dataset.TrainSize()*bulkInsertCnt) + n := 0 for i := 0; i < bulkInsertCnt; i++ { + train := make([][]float32, 0, dataset.TrainSize()/bulkInsertCnt) + for j := 0; j < len(train); j++ { + v, err := dataset.Train(n) + if err != nil { + n = 0 + break + } + train = append(train, v.([]float32)) + n++ + } inserted, errs := c.BulkInsert(train) err = wrapErrors(errs) if err != nil { @@ -55,7 +65,7 @@ func insertAndCreateIndex32(ctx context.Context, c core.Core32, dataset assets.D ids = append(ids, inserted...) } - err = c.CreateIndex(uint32((len(train) * bulkInsertCnt) / 100)) + err = c.CreateIndex(uint32((dataset.TrainSize() * bulkInsertCnt) / 100)) if err != nil { return nil, err } @@ -63,10 +73,20 @@ func insertAndCreateIndex32(ctx context.Context, c core.Core32, dataset assets.D } func insertAndCreateIndex64(ctx context.Context, c core.Core64, dataset assets.Dataset) (ids []uint, err error) { - train := dataset.TrainAsFloat64() - ids = make([]uint, 0, len(train)*bulkInsertCnt) + ids = make([]uint, 0, dataset.TrainSize()*bulkInsertCnt) + n := 0 for i := 0; i < bulkInsertCnt; i++ { + train := make([][]float64, 0, dataset.TrainSize()/bulkInsertCnt) + for j := 0; j < len(train); j++ { + v, err := dataset.Train(n) + if err != nil { + n = 0 + break + } + train = append(train, float32To64(v.([]float32))) + n++ + } inserted, errs := c.BulkInsert(train) err = wrapErrors(errs) if err != nil { @@ -75,9 +95,17 @@ func insertAndCreateIndex64(ctx context.Context, c core.Core64, dataset assets.D ids = append(ids, inserted...) } - err = c.CreateIndex(uint32((len(train) * bulkInsertCnt) / 100)) + err = c.CreateIndex(uint32((dataset.TrainSize() * bulkInsertCnt) / 100)) if err != nil { return nil, err } return } + +func float32To64(x []float32) (y []float64) { + y = make([]float64, len(x)) + for i, a := range x { + y[i] = float64(a) + } + return y +} diff --git a/hack/benchmark/internal/e2e/strategy/insert.go b/hack/benchmark/internal/e2e/strategy/insert.go index e83cf432a4..4baf51f953 100644 --- a/hack/benchmark/internal/e2e/strategy/insert.go +++ b/hack/benchmark/internal/e2e/strategy/insert.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -50,14 +51,16 @@ func (isrt *insert) Run(ctx context.Context, b *testing.B, c client.Client, data func (isrt *insert) run(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { cnt := 0 b.Run("Insert", func(bb *testing.B) { - ids, train := dataset.IDs(), dataset.Train() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() bb.StartTimer() for i := 0; i < bb.N; i++ { - isrt.do(ctx, bb, c, ids[cnt%len(ids)], train[cnt%len(train)]) + v, err := dataset.Train(cnt % dataset.TrainSize()) + if err != nil { + b.Fatal(err) + } + isrt.do(ctx, bb, c, fmt.Sprint(cnt), v.([]float32)) cnt++ } bb.StopTimer() @@ -67,8 +70,6 @@ func (isrt *insert) run(ctx context.Context, b *testing.B, c client.Client, data func (isrt *insert) runParallel(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { var cnt int64 b.Run("ParallelInsert", func(bb *testing.B) { - ids, train := dataset.IDs(), dataset.Train() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() @@ -76,7 +77,12 @@ func (isrt *insert) runParallel(ctx context.Context, b *testing.B, c client.Clie bb.RunParallel(func(pb *testing.PB) { for pb.Next() { n := int(atomic.AddInt64(&cnt, 1)) - 1 - isrt.do(ctx, bb, c, ids[n%len(ids)], train[n%len(train)]) + v, err := dataset.Train(n % dataset.TrainSize()) + if err != nil { + b.Fatal(err) + } + + isrt.do(ctx, bb, c, fmt.Sprint(cnt), v.([]float32)) } }) bb.StopTimer() diff --git a/hack/benchmark/internal/e2e/strategy/remove.go b/hack/benchmark/internal/e2e/strategy/remove.go index e9afd16dbe..7661b764be 100644 --- a/hack/benchmark/internal/e2e/strategy/remove.go +++ b/hack/benchmark/internal/e2e/strategy/remove.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -50,14 +51,12 @@ func (r *remove) Run(ctx context.Context, b *testing.B, c client.Client, dataset func (r *remove) run(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { cnt := 0 b.Run("Remove", func(bb *testing.B) { - ids := dataset.IDs() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() bb.StartTimer() for i := 0; i < bb.N; i++ { - r.do(ctx, bb, c, ids[cnt%len(ids)]) + r.do(ctx, bb, c, fmt.Sprint(cnt)) cnt++ } bb.StopTimer() @@ -67,8 +66,6 @@ func (r *remove) run(ctx context.Context, b *testing.B, c client.Client, dataset func (r *remove) runParallel(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { var cnt int64 b.Run("ParallelRemove", func(bb *testing.B) { - ids := dataset.IDs() - bb.StartTimer() bb.ReportAllocs() bb.ResetTimer() @@ -76,7 +73,7 @@ func (r *remove) runParallel(ctx context.Context, b *testing.B, c client.Client, bb.RunParallel(func(pb *testing.PB) { for pb.Next() { n := int(atomic.AddInt64(&cnt, 1)) - 1 - r.do(ctx, bb, c, ids[n%len(ids)]) + r.do(ctx, bb, c, fmt.Sprint(n)) } }) bb.StopTimer() diff --git a/hack/benchmark/internal/e2e/strategy/search.go b/hack/benchmark/internal/e2e/strategy/search.go index 81e7b90d64..d0ca62a386 100644 --- a/hack/benchmark/internal/e2e/strategy/search.go +++ b/hack/benchmark/internal/e2e/strategy/search.go @@ -49,15 +49,21 @@ func (s *search) Run(ctx context.Context, b *testing.B, c client.Client, dataset } func (s *search) run(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { + cnt := 0 b.Run("Search", func(bb *testing.B) { - queries := dataset.Query() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() bb.StartTimer() for i := 0; i < bb.N; i++ { - s.do(ctx, bb, c, queries[i%len(queries)]) + v, err := dataset.Query(cnt % dataset.QuerySize()) + if err != nil { + cnt = 0 + break + } + + s.do(ctx, bb, c, v.([]float32)) + cnt++ } bb.StopTimer() }) @@ -66,8 +72,6 @@ func (s *search) run(ctx context.Context, b *testing.B, c client.Client, dataset func (s *search) runParallel(ctx context.Context, b *testing.B, c client.Client, dataset assets.Dataset) { var cnt int64 b.Run("ParallelSearch", func(bb *testing.B) { - queries := dataset.Query() - bb.StopTimer() bb.ReportAllocs() bb.ResetTimer() @@ -75,7 +79,13 @@ func (s *search) runParallel(ctx context.Context, b *testing.B, c client.Client, bb.RunParallel(func(pb *testing.PB) { for pb.Next() { n := int(atomic.AddInt64(&cnt, 1)) - 1 - s.do(ctx, b, c, queries[n%len(queries)]) + v, err := dataset.Query(n % dataset.QuerySize()) + if err != nil { + cnt = 0 + break + } + + s.do(ctx, b, c, v.([]float32)) } }) bb.StopTimer() diff --git a/hack/benchmark/internal/e2e/strategy/stream_insert.go b/hack/benchmark/internal/e2e/strategy/stream_insert.go index 77d64d2733..1e1cdfc97a 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_insert.go +++ b/hack/benchmark/internal/e2e/strategy/stream_insert.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -38,8 +39,6 @@ func NewStreamInsert(opts ...StreamInsertOption) e2e.Strategy { } func (sisrt *streamInsert) dataProvider(total *uint32, b *testing.B, dataset assets.Dataset) func() *client.ObjectVector { - ids, trains := dataset.IDs(), dataset.Train() - var cnt uint32 b.StopTimer() @@ -54,9 +53,13 @@ func (sisrt *streamInsert) dataProvider(total *uint32, b *testing.B, dataset ass } total := int(atomic.AddUint32(total, 1)) - 1 + v, err := dataset.Train(total % dataset.TrainSize()) + if err != nil { + return nil + } return &client.ObjectVector{ - Id: ids[total%len(ids)], - Vector: trains[total%len(trains)], + Id: fmt.Sprint(n), + Vector: v.([]float32), } } } diff --git a/hack/benchmark/internal/e2e/strategy/stream_remove.go b/hack/benchmark/internal/e2e/strategy/stream_remove.go index 23207bd599..6143ae4158 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_remove.go +++ b/hack/benchmark/internal/e2e/strategy/stream_remove.go @@ -19,6 +19,7 @@ package strategy import ( "context" + "fmt" "sync/atomic" "testing" @@ -38,8 +39,6 @@ func NewStreamRemove(opts ...StreamRemoveOption) e2e.Strategy { } func (sr *streamRemove) dataProvider(total *uint32, b *testing.B, dataset assets.Dataset) func() *client.ObjectID { - ids := dataset.IDs() - var cnt uint32 b.StopTimer() @@ -56,7 +55,7 @@ func (sr *streamRemove) dataProvider(total *uint32, b *testing.B, dataset assets total := int(atomic.AddUint32(total, 1)) - 1 return &client.ObjectID{ - Id: ids[total%len(ids)], + Id: fmt.Sprint(total % dataset.TrainSize()), } } } diff --git a/hack/benchmark/internal/e2e/strategy/stream_search.go b/hack/benchmark/internal/e2e/strategy/stream_search.go index f16e6b8db8..30ebabf794 100644 --- a/hack/benchmark/internal/e2e/strategy/stream_search.go +++ b/hack/benchmark/internal/e2e/strategy/stream_search.go @@ -40,8 +40,6 @@ func NewStreamSearch(opts ...StreamSearchOption) e2e.Strategy { } func (s *streamSearch) dataProvider(total *uint32, b *testing.B, dataset assets.Dataset) func() *client.SearchRequest { - queries := dataset.Query() - var cnt uint32 b.StopTimer() @@ -56,8 +54,12 @@ func (s *streamSearch) dataProvider(total *uint32, b *testing.B, dataset assets. } total := int(atomic.AddUint32(total, 1)) - 1 + v, err := dataset.Query(total % dataset.QuerySize()) + if err != nil { + return nil + } return &client.SearchRequest{ - Vector: queries[total%len(queries)], + Vector: v.([]float32), Config: s.cfg, } } diff --git a/pkg/tools/cli/loadtest/assets/dataset.go b/pkg/tools/cli/loadtest/assets/dataset.go index 5319dc532a..a5e71a6161 100644 --- a/pkg/tools/cli/loadtest/assets/dataset.go +++ b/pkg/tools/cli/loadtest/assets/dataset.go @@ -16,27 +16,29 @@ package assets import ( - "fmt" - "math/rand" "os" "path/filepath" "strconv" "strings" - "sync" + "github.com/vdaas/vald/hack/benchmark/assets/x1b" "github.com/vdaas/vald/internal/log" ) +var ( + ErrOutOfBounds = x1b.ErrOutOfBounds +) + // Dataset is representation of train and test dataset. type Dataset interface { - Train() [][]float32 - TrainAsFloat64() [][]float64 - Query() [][]float32 - QueryAsFloat64() [][]float64 - Distances() [][]float32 - DistancesAsFloat64() [][]float64 - Neighbors() [][]int - IDs() []string + Train(i int) (interface{}, error) + TrainSize() int + Query(i int) (interface{}, error) + QuerySize() int + Distance(i int) ([]float32, error) + DistanceSize() int + Neighbor(i int) ([]int, error) + NeighborSize() int Name() string Dimension() int DistanceType() string @@ -44,309 +46,33 @@ type Dataset interface { } type dataset struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } -var ( - data = map[string]func() (Dataset, error){ - "fashion-mnist": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "fashion-mnist-784-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "fashion-mnist", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, nil - }, - "mnist": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "mnist-784-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "mnist", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, err - }, - "glove-25": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-25-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-25", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "glove-50": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-50-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-50", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "glove-100": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-100-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-100", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "glove-200": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "glove-200-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "glove-200", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "nytimes": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "nytimes-256-angular.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "nytimes", - dimension: d.Dimension(), - distanceType: "cosine", - objectType: "float", - }, nil - }, - "sift": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "sift-128-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "sift", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, nil - }, - "gist": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "gist-960-euclidean.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "gist", - dimension: d.Dimension(), - distanceType: "l2", - objectType: "float", - }, nil - }, - "kosarak": func() (Dataset, error) { - dir, err := datasetDir() - if err != nil { - return nil, err - } - d, err := LoadDataWithSerialIDs(dir + "/kosarak-jaccard.hdf5") - if err != nil { - return nil, err - } - return &dataset{ - train: d.Train(), - query: d.Query(), - distances: d.Distances(), - neighbors: d.Neighbors(), - ids: d.IDs(), - name: "kosarak", - dimension: d.Dimension(), - distanceType: "jaccard", - objectType: "float", - }, nil - }, - } -) +// Name returns dataset name. +func (d *dataset) Name() string { + return d.name +} -func identity(dim int) func() (Dataset, error) { - return func() (Dataset, error) { - ids := CreateSerialIDs(dim * 1000) - train := make([][]float32, dim) - for i := range train { - train[i] = make([]float32, dim) - train[i][i] = 1 - } - return &dataset{ - train: train, - query: train, - ids: ids, - name: fmt.Sprintf("identity-%d", dim), - dimension: dim, - distanceType: "l2", - objectType: "float", - }, nil - } +// Dimension returns vector dimension. +func (d *dataset) Dimension() int { + return d.dimension } -func random(dim, size int) func() (Dataset, error) { - return func() (Dataset, error) { - ids := CreateRandomIDs(size) - train := make([][]float32, size) - query := make([][]float32, size) - for i := range train { - train[i] = make([]float32, dim) - query[i] = make([]float32, dim) - for j := range train[i] { - train[i][j] = rand.Float32() - query[i][j] = rand.Float32() - } - } - return &dataset{ - train: train, - query: query, - ids: ids, - name: fmt.Sprintf("random-%d-%d", dim, size), - dimension: dim, - distanceType: "l2", - objectType: "float", - }, nil - } +// DistanceType returns dataset distance type like l2, cosine, jaccard or etc. +func (d *dataset) DistanceType() string { + return d.distanceType } -func gaussian(dim, size int, mean, stdDev float64) func() (Dataset, error) { - return func() (Dataset, error) { - ids := CreateRandomIDs(size) - train := make([][]float32, size) - query := make([][]float32, size) - for i := range train { - train[i] = make([]float32, dim) - query[i] = make([]float32, dim) - for j := range train[i] { - train[i][j] = float32(rand.NormFloat64()*stdDev + mean) - query[i][j] = float32(rand.NormFloat64()*stdDev + mean) - } - } - return &dataset{ - train: train, - query: query, - ids: ids, - name: fmt.Sprintf("gaussian-%d-%d-%f-%f", dim, size, mean, stdDev), - dimension: dim, - distanceType: "l2", - objectType: "float", - }, nil - } +// ObjectType returns dataset vector type like float or int. +func (d *dataset) ObjectType() string { + return d.objectType } -func datasetDir() (string, error) { +func findDir(path string) (string, error) { wd, err := os.Getwd() if err != nil { return "", err @@ -360,7 +86,7 @@ func datasetDir() (string, error) { } } }(wd) - return filepath.Join(root, "hack/benchmark/assets/dataset") + "/", nil + return filepath.Join(root, path) + "/", nil } // Data loads specified dataset and returns it. @@ -386,88 +112,32 @@ func Data(name string) func() (Dataset, error) { sd, _ := strconv.ParseFloat(l[4], 64) return gaussian(d, s, m, sd) } - if d, ok := data[name]; ok { - return d - } - return nil -} - -// Train returns vectors for train. -func (d *dataset) Train() [][]float32 { - return d.train -} - -// TrainAsFloat64 returns casted float64 vectors for train. -func (d *dataset) TrainAsFloat64() [][]float64 { - d.trainOnce.Do(func() { - d.trainAsFloat64 = float32To64(d.train) - }) - return d.trainAsFloat64 -} - -// Query returns vectors for test. -func (d *dataset) Query() [][]float32 { - return d.query -} - -// QueryAsFloat64 returns casted float64 vectors for test. -func (d *dataset) QueryAsFloat64() [][]float64 { - d.queryOnce.Do(func() { - d.queryAsFloat64 = float32To64(d.query) - }) - return d.queryAsFloat64 -} - -// Distances returns distances between queries and answers. -func (d *dataset) Distances() [][]float32 { - return d.distances -} - -// Distances returns casted float64 distances between queries and answers. -func (d *dataset) DistancesAsFloat64() [][]float64 { - d.distancesOnce.Do(func() { - d.distancesAsFloat64 = float32To64(d.distances) - }) - return d.distancesAsFloat64 -} - -// Neighbors returns nearest vectors from queries. -func (d *dataset) Neighbors() [][]int { - return d.neighbors -} - -// IDs returns ids of train vectors. -func (d *dataset) IDs() []string { - return d.ids -} -// Name returns dataset name. -func (d *dataset) Name() string { - return d.name -} - -// Dimension returns vector dimension. -func (d *dataset) Dimension() int { - return d.dimension -} - -// DistanceType returns dataset distance type like l2, cosine, jaccard or etc. -func (d *dataset) DistanceType() string { - return d.distanceType -} - -// ObjectType returns dataset vector type like float or int. -func (d *dataset) ObjectType() string { - return d.objectType -} - -func float32To64(x [][]float32) (y [][]float64) { - y = make([][]float64, len(x)) - for i, z := range x { - y[i] = make([]float64, len(z)) - for j, a := range z { - y[i][j] = float64(a) - } + switch name { + case "fashion-mnist": + return loadSmallData("fashion-mnist-784-euclidean.hdf5", name, "l2", "float") + case "mnist": + return loadSmallData("mnist-784-euclidean.hdf5", name, "l2", "float") + case "glove-25": + return loadSmallData("glove-25-angular.hdf5", name, "cosine", "float") + case "glove-50": + return loadSmallData("glove-50-angular.hdf5", name, "cosine", "float") + case "glove-100": + return loadSmallData("glove-100-angular.hdf5", name, "cosine", "float") + case "glove-200": + return loadSmallData("glove-200-angular.hdf5", name, "cosine", "float") + case "nytimes": + return loadSmallData("nytimes-256-angular.hdf5", name, "cosine", "float") + case "sift": + return loadSmallData("sift-128-euclidean.hdf5", name, "l2", "float") + case "gist": + return loadSmallData("gist-960-euclidean.hdf5", name, "l2", "float") + case "kosarak": + return loadSmallData("kosarak-jaccard.hdf5", name, "jaccard", "float") + case "sift1b": + return loadLargeData("bigann_base.bvecs", "bigann_query.bvecs", "gnd/idx_1000M.ivecs", "gnd/dis_1000M.fvecs", name, "l2", "uint8") + case "deep1b": + return loadLargeData("deep1B_base.fvecs", "deep1B_query.fvecs", "deep1B_groundtruth.ivecs", "", name, "l2", "float") } - return y + return nil } diff --git a/pkg/tools/cli/loadtest/assets/dataset_test.go b/pkg/tools/cli/loadtest/assets/dataset_test.go index e081553f7b..ef7c6aab94 100644 --- a/pkg/tools/cli/loadtest/assets/dataset_test.go +++ b/pkg/tools/cli/loadtest/assets/dataset_test.go @@ -17,1106 +17,33 @@ package assets import ( "reflect" - "sync" "testing" "github.com/vdaas/vald/internal/errors" "go.uber.org/goleak" ) -func Test_identity(t *testing.T) { - type args struct { - dim int - } - type want struct { - want func() (Dataset, error) - } - type test struct { - name string - args args - want want - checkFunc func(want, func() (Dataset, error)) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got func() (Dataset, error)) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := identity(test.args.dim) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_random(t *testing.T) { - type args struct { - dim int - size int - } - type want struct { - want func() (Dataset, error) - } - type test struct { - name string - args args - want want - checkFunc func(want, func() (Dataset, error)) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got func() (Dataset, error)) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - dim: 0, - size: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - dim: 0, - size: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := random(test.args.dim, test.args.size) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_datasetDir(t *testing.T) { - type want struct { - want string - err error - } - type test struct { - name string - want want - checkFunc func(want, string, error) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got string, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := datasetDir() - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestData(t *testing.T) { - type args struct { - name string - } - type want struct { - want func() (Dataset, error) - } - type test struct { - name string - args args - want want - checkFunc func(want, func() (Dataset, error)) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got func() (Dataset, error)) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - name: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - name: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := Data(test.args.name) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Train(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float32 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float32) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float32) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Train() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_TrainAsFloat64(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float64 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float64) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float64) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.TrainAsFloat64() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Query(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float32 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float32) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float32) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Query() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_QueryAsFloat64(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float64 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float64) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float64) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.QueryAsFloat64() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Distances(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float32 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float32) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float32) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Distances() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_DistancesAsFloat64(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want [][]float64 - } - type test struct { - name string - fields fields - want want - checkFunc func(want, [][]float64) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got [][]float64) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.DistancesAsFloat64() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Neighbors(t *testing.T) { +func Test_dataset_Name(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { - want [][]int + want string } type test struct { name string fields fields want want - checkFunc func(want, [][]int) error + checkFunc func(want, string) error beforeFunc func() afterFunc func() } - defaultCheckFunc := func(w want, got [][]int) error { + defaultCheckFunc := func(w want, got string) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1126,17 +53,6 @@ func Test_dataset_Neighbors(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1153,17 +69,6 @@ func Test_dataset_Neighbors(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1178,7 +83,7 @@ func Test_dataset_Neighbors(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1189,24 +94,13 @@ func Test_dataset_Neighbors(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.Neighbors() + got := d.Name() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1215,38 +109,27 @@ func Test_dataset_Neighbors(t *testing.T) { } } -func Test_dataset_IDs(t *testing.T) { +func Test_dataset_Dimension(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { - want []string + want int } type test struct { name string fields fields want want - checkFunc func(want, []string) error + checkFunc func(want, int) error beforeFunc func() afterFunc func() } - defaultCheckFunc := func(w want, got []string) error { + defaultCheckFunc := func(w want, got int) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1256,17 +139,6 @@ func Test_dataset_IDs(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1283,17 +155,6 @@ func Test_dataset_IDs(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1308,7 +169,7 @@ func Test_dataset_IDs(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1319,24 +180,13 @@ func Test_dataset_IDs(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.IDs() + got := d.Dimension() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1345,23 +195,12 @@ func Test_dataset_IDs(t *testing.T) { } } -func Test_dataset_Name(t *testing.T) { +func Test_dataset_DistanceType(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { want string @@ -1376,137 +215,7 @@ func Test_dataset_Name(t *testing.T) { } defaultCheckFunc := func(w want, got string) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc() - } - if test.afterFunc != nil { - defer test.afterFunc() - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - - got := d.Name() - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_dataset_Dimension(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string - } - type want struct { - want int - } - type test struct { - name string - fields fields - want want - checkFunc func(want, int) error - beforeFunc func() - afterFunc func() - } - defaultCheckFunc := func(w want, got int) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1516,17 +225,6 @@ func Test_dataset_Dimension(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1543,17 +241,6 @@ func Test_dataset_Dimension(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1568,7 +255,7 @@ func Test_dataset_Dimension(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1579,24 +266,13 @@ func Test_dataset_Dimension(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.Dimension() + got := d.DistanceType() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1605,23 +281,12 @@ func Test_dataset_Dimension(t *testing.T) { } } -func Test_dataset_DistanceType(t *testing.T) { +func Test_dataset_ObjectType(t *testing.T) { type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string + name string + dimension int + distanceType string + objectType string } type want struct { want string @@ -1636,7 +301,7 @@ func Test_dataset_DistanceType(t *testing.T) { } defaultCheckFunc := func(w want, got string) error { if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1646,17 +311,6 @@ func Test_dataset_DistanceType(t *testing.T) { { name: "test_case_1", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1673,17 +327,6 @@ func Test_dataset_DistanceType(t *testing.T) { return test { name: "test_case_2", fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, name: "", dimension: 0, distanceType: "", @@ -1698,7 +341,7 @@ func Test_dataset_DistanceType(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc() } @@ -1709,24 +352,13 @@ func Test_dataset_DistanceType(t *testing.T) { test.checkFunc = defaultCheckFunc } d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, + name: test.fields.name, + dimension: test.fields.dimension, + distanceType: test.fields.distanceType, + objectType: test.fields.objectType, } - got := d.DistanceType() + got := d.ObjectType() if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } @@ -1735,38 +367,28 @@ func Test_dataset_DistanceType(t *testing.T) { } } -func Test_dataset_ObjectType(t *testing.T) { - type fields struct { - train [][]float32 - trainAsFloat64 [][]float64 - trainOnce sync.Once - query [][]float32 - queryAsFloat64 [][]float64 - queryOnce sync.Once - distances [][]float32 - distancesAsFloat64 [][]float64 - distancesOnce sync.Once - neighbors [][]int - ids []string - name string - dimension int - distanceType string - objectType string +func Test_findDir(t *testing.T) { + type args struct { + path string } type want struct { want string + err error } type test struct { name string - fields fields + args args want want - checkFunc func(want, string) error - beforeFunc func() - afterFunc func() + checkFunc func(want, string, error) error + beforeFunc func(args) + afterFunc func(args) } - defaultCheckFunc := func(w want, got string) error { + defaultCheckFunc := func(w want, got string, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1775,22 +397,8 @@ func Test_dataset_ObjectType(t *testing.T) { /* { name: "test_case_1", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", + args: args { + path: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1802,22 +410,8 @@ func Test_dataset_ObjectType(t *testing.T) { func() test { return test { name: "test_case_2", - fields: fields { - train: nil, - trainAsFloat64: nil, - trainOnce: nil, - query: nil, - queryAsFloat64: nil, - queryOnce: nil, - distances: nil, - distancesAsFloat64: nil, - distancesOnce: nil, - neighbors: nil, - ids: nil, - name: "", - dimension: 0, - distanceType: "", - objectType: "", + args: args { + path: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1828,36 +422,19 @@ func Test_dataset_ObjectType(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { - test.beforeFunc() + test.beforeFunc(test.args) } if test.afterFunc != nil { - defer test.afterFunc() + defer test.afterFunc(test.args) } if test.checkFunc == nil { test.checkFunc = defaultCheckFunc } - d := &dataset{ - train: test.fields.train, - trainAsFloat64: test.fields.trainAsFloat64, - trainOnce: test.fields.trainOnce, - query: test.fields.query, - queryAsFloat64: test.fields.queryAsFloat64, - queryOnce: test.fields.queryOnce, - distances: test.fields.distances, - distancesAsFloat64: test.fields.distancesAsFloat64, - distancesOnce: test.fields.distancesOnce, - neighbors: test.fields.neighbors, - ids: test.fields.ids, - name: test.fields.name, - dimension: test.fields.dimension, - distanceType: test.fields.distanceType, - objectType: test.fields.objectType, - } - got := d.ObjectType() - if err := test.checkFunc(test.want, got); err != nil { + got, err := findDir(test.args.path) + if err := test.checkFunc(test.want, got, err); err != nil { tt.Errorf("error = %v", err) } @@ -1865,24 +442,24 @@ func Test_dataset_ObjectType(t *testing.T) { } } -func Test_float32To64(t *testing.T) { +func TestData(t *testing.T) { type args struct { - x [][]float32 + name string } type want struct { - wantY [][]float64 + want func() (Dataset, error) } type test struct { name string args args want want - checkFunc func(want, [][]float64) error + checkFunc func(want, func() (Dataset, error)) error beforeFunc func(args) afterFunc func(args) } - defaultCheckFunc := func(w want, gotY [][]float64) error { - if !reflect.DeepEqual(gotY, w.wantY) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotY, w.wantY) + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) } return nil } @@ -1892,7 +469,7 @@ func Test_float32To64(t *testing.T) { { name: "test_case_1", args: args { - x: nil, + name: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1905,7 +482,7 @@ func Test_float32To64(t *testing.T) { return test { name: "test_case_2", args: args { - x: nil, + name: "", }, want: want{}, checkFunc: defaultCheckFunc, @@ -1916,7 +493,7 @@ func Test_float32To64(t *testing.T) { for _, test := range tests { t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) + defer goleak.VerifyNone(tt) if test.beforeFunc != nil { test.beforeFunc(test.args) } @@ -1927,8 +504,8 @@ func Test_float32To64(t *testing.T) { test.checkFunc = defaultCheckFunc } - gotY := float32To64(test.args.x) - if err := test.checkFunc(test.want, gotY); err != nil { + got := Data(test.args.name) + if err := test.checkFunc(test.want, got); err != nil { tt.Errorf("error = %v", err) } diff --git a/pkg/tools/cli/loadtest/assets/loader.go b/pkg/tools/cli/loadtest/assets/hdf5_loader.go similarity index 68% rename from pkg/tools/cli/loadtest/assets/loader.go rename to pkg/tools/cli/loadtest/assets/hdf5_loader.go index 916f5b5d70..cd758b608d 100644 --- a/pkg/tools/cli/loadtest/assets/loader.go +++ b/pkg/tools/cli/loadtest/assets/hdf5_loader.go @@ -16,9 +16,6 @@ package assets import ( - "strconv" - - "github.com/kpango/fuid" "github.com/vdaas/vald/internal/errors" "gonum.org/v1/hdf5" ) @@ -119,66 +116,3 @@ func Load(path string) (train, test, distances [][]float32, neighbors [][]int, d return train, test, distances, neighbors, dim, nil } - -// CreateRandomIDs generates random string IDs. -func CreateRandomIDs(n int) (ids []string) { - ids = make([]string, 0, n) - for i := 0; i < n; i++ { - ids = append(ids, fuid.String()) - } - return ids -} - -// CreateRandomIDsWithLength generates random string IDs that have specified length. -func CreateRandomIDsWithLength(n, l int) (ids []string) { - ids = make([]string, 0, n) - for i := 0; i < n; i++ { - id := fuid.String() - for len(id) < l { - id = id + fuid.String() - } - ids = append(ids, id[:l]) - } - return ids -} - -// CreateSerialIDs generates serial number IDs. -func CreateSerialIDs(n int) []string { - ids := make([]string, 0, n) - for i := 0; i < n; i++ { - ids = append(ids, strconv.Itoa(i)) - } - return ids -} - -// LoadDataWithRandomIDs returns approximate nearest neighbor benchmark dataset with random IDs. -func LoadDataWithRandomIDs(path string) (Dataset, error) { - train, test, distances, neighbors, dim, err := Load(path) - if err != nil { - return nil, err - } - return &dataset{ - train: train, - query: test, - distances: distances, - neighbors: neighbors, - ids: CreateRandomIDs(len(train)), - dimension: dim, - }, nil -} - -// LoadDataWithSerialIDs returns approximate nearest neighbor benchmark dataset with serial IDs. -func LoadDataWithSerialIDs(path string) (Dataset, error) { - train, test, distances, neighbors, dim, err := Load(path) - if err != nil { - return nil, err - } - return &dataset{ - train: train, - query: test, - distances: distances, - neighbors: neighbors, - ids: CreateSerialIDs(len(train)), - dimension: dim, - }, nil -} diff --git a/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go b/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go new file mode 100644 index 0000000000..55ed91fcbf --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/hdf5_loader_test.go @@ -0,0 +1,369 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" + "gonum.org/v1/hdf5" +) + +func Test_loadFloat32(t *testing.T) { + type args struct { + dset *hdf5.Dataset + npoints int + row int + dim int + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dset: nil, + npoints: 0, + row: 0, + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dset: nil, + npoints: 0, + row: 0, + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := loadFloat32(test.args.dset, test.args.npoints, test.args.row, test.args.dim) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_loadInt(t *testing.T) { + type args struct { + dset *hdf5.Dataset + npoints int + row int + dim int + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dset: nil, + npoints: 0, + row: 0, + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dset: nil, + npoints: 0, + row: 0, + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got, err := loadInt(test.args.dset, test.args.npoints, test.args.row, test.args.dim) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_loadDataset(t *testing.T) { + type args struct { + file *hdf5.File + name string + f loaderFunc + } + type want struct { + wantDim int + wantVec interface{} + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, int, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, gotDim int, gotVec interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(gotDim, w.wantDim) { + return errors.Errorf("got = %v, want %v", gotDim, w.wantDim) + } + if !reflect.DeepEqual(gotVec, w.wantVec) { + return errors.Errorf("got = %v, want %v", gotVec, w.wantVec) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + file: nil, + name: "", + f: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + file: nil, + name: "", + f: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + gotDim, gotVec, err := loadDataset(test.args.file, test.args.name, test.args.f) + if err := test.checkFunc(test.want, gotDim, gotVec, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func TestLoad(t *testing.T) { + type args struct { + path string + } + type want struct { + wantTrain [][]float32 + wantTest [][]float32 + wantDistances [][]float32 + wantNeighbors [][]int + wantDim int + err error + } + type test struct { + name string + args args + want want + checkFunc func(want, [][]float32, [][]float32, [][]float32, [][]int, int, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, gotTrain [][]float32, gotTest [][]float32, gotDistances [][]float32, gotNeighbors [][]int, gotDim int, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(gotTrain, w.wantTrain) { + return errors.Errorf("got = %v, want %v", gotTrain, w.wantTrain) + } + if !reflect.DeepEqual(gotTest, w.wantTest) { + return errors.Errorf("got = %v, want %v", gotTest, w.wantTest) + } + if !reflect.DeepEqual(gotDistances, w.wantDistances) { + return errors.Errorf("got = %v, want %v", gotDistances, w.wantDistances) + } + if !reflect.DeepEqual(gotNeighbors, w.wantNeighbors) { + return errors.Errorf("got = %v, want %v", gotNeighbors, w.wantNeighbors) + } + if !reflect.DeepEqual(gotDim, w.wantDim) { + return errors.Errorf("got = %v, want %v", gotDim, w.wantDim) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + path: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + path: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + gotTrain, gotTest, gotDistances, gotNeighbors, gotDim, err := Load(test.args.path) + if err := test.checkFunc(test.want, gotTrain, gotTest, gotDistances, gotNeighbors, gotDim, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} diff --git a/pkg/tools/cli/loadtest/assets/large_dataset.go b/pkg/tools/cli/loadtest/assets/large_dataset.go new file mode 100644 index 0000000000..7776dbf10c --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/large_dataset.go @@ -0,0 +1,141 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "path/filepath" + + "github.com/vdaas/vald/hack/benchmark/assets/x1b" + "github.com/vdaas/vald/internal/errors" +) + +const ( + largeDatasetPath = "hack/benchmark/assets/dataset/large" +) + +type largeDataset struct { + *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors +} + +func loadLargeData(trainFileName, queryFileName, groundTruthFileName, distanceFileName, name, distanceType, objectType string) func() (Dataset, error) { + return func() (Dataset, error) { + dir, err := findDir(largeDatasetPath) + if err != nil { + return nil, err + } + train, err := x1b.Open(filepath.Join(dir, trainFileName)) + if err != nil { + return nil, err + } + query, err := x1b.Open(filepath.Join(dir, queryFileName)) + if err != nil { + return nil, err + } + tdim := train.Dimension() + qdim := query.Dimension() + if tdim != qdim { + return nil, errors.New("dimension must be same train and query.") + } + iv, err := x1b.NewInt32Vectors(filepath.Join(dir, groundTruthFileName)) + if err != nil { + return nil, err + } + groundTruth := make([][]int, 0, iv.Size()) + for i := 0; ; i++ { + gt32, err := iv.LoadInt32(i) + if err == ErrOutOfBounds { + break + } + gt := make([]int, 0, len(gt32)) + for _, v := range gt32 { + gt = append(gt, int(v)) + } + groundTruth = append(groundTruth, gt) + } + + distances, err := x1b.NewFloatVectors(filepath.Join(dir, distanceFileName)) + if err != nil { + return nil, err + } + return &largeDataset{ + dataset: &dataset{ + name: name, + dimension: tdim, + distanceType: distanceType, + objectType: objectType, + }, + train: train, + query: query, + groundTruth: groundTruth, + distances: distances, + }, nil + } +} + +func (d *largeDataset) Train(i int) (interface{}, error) { + return d.train.Load(i) +} + +func (d *largeDataset) TrainSize() int { + return d.train.Size() +} + +func (d *largeDataset) Query(i int) (interface{}, error) { + return d.query.Load(i) +} + +func (d *largeDataset) QuerySize() int { + return d.query.Size() +} + +func (d *largeDataset) Distance(i int) ([]float32, error) { + return d.distances.LoadFloat32(i) +} + +func (d *largeDataset) DistanceSize() int { + return d.distances.Size() +} + +func (d *largeDataset) Neighbor(i int) ([]int, error) { + if i >= len(d.groundTruth) { + return nil, ErrOutOfBounds + } + return d.groundTruth[i], nil +} + +func (d *largeDataset) NeighborSize() int { + return len(d.groundTruth) +} + +func (d *largeDataset) Dimension() int { + return d.dimension +} + +func (d *largeDataset) DistanceType() string { + return d.distanceType +} + +func (d *largeDataset) ObjectType() string { + return d.objectType +} + +func (d *largeDataset) Name() string { + return d.name +} diff --git a/pkg/tools/cli/loadtest/assets/large_dataset_test.go b/pkg/tools/cli/loadtest/assets/large_dataset_test.go new file mode 100644 index 0000000000..9ac09a2104 --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/large_dataset_test.go @@ -0,0 +1,1250 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/hack/benchmark/assets/x1b" + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" +) + +func Test_loadLargeData(t *testing.T) { + type args struct { + trainFileName string + queryFileName string + groundTruthFileName string + distanceFileName string + name string + distanceType string + objectType string + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + trainFileName: "", + queryFileName: "", + groundTruthFileName: "", + distanceFileName: "", + name: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + trainFileName: "", + queryFileName: "", + groundTruthFileName: "", + distanceFileName: "", + name: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := loadLargeData(test.args.trainFileName, test.args.queryFileName, test.args.groundTruthFileName, test.args.distanceFileName, test.args.name, test.args.distanceType, test.args.objectType) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Train(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Train(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_TrainSize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.TrainSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Query(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Query(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_QuerySize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.QuerySize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Distance(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want []float32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []float32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []float32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Distance(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_DistanceSize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.DistanceSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Neighbor(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want []int + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []int, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []int, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got, err := d.Neighbor(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_NeighborSize(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.NeighborSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Dimension(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.Dimension() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_DistanceType(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want string + } + type test struct { + name string + fields fields + want want + checkFunc func(want, string) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got string) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.DistanceType() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_ObjectType(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want string + } + type test struct { + name string + fields fields + want want + checkFunc func(want, string) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got string) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.ObjectType() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_largeDataset_Name(t *testing.T) { + type fields struct { + dataset *dataset + train x1b.BillionScaleVectors + query x1b.BillionScaleVectors + groundTruth [][]int + distances x1b.FloatVectors + } + type want struct { + want string + } + type test struct { + name string + fields fields + want want + checkFunc func(want, string) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got string) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + groundTruth: nil, + distances: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + d := &largeDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + groundTruth: test.fields.groundTruth, + distances: test.fields.distances, + } + + got := d.Name() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} diff --git a/pkg/tools/cli/loadtest/assets/loader_test.go b/pkg/tools/cli/loadtest/assets/loader_test.go deleted file mode 100644 index 151f89c0b6..0000000000 --- a/pkg/tools/cli/loadtest/assets/loader_test.go +++ /dev/null @@ -1,735 +0,0 @@ -// -// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -package assets - -import ( - "reflect" - "testing" - - "github.com/vdaas/vald/internal/errors" - "go.uber.org/goleak" - "gonum.org/v1/hdf5" -) - -func Test_loadFloat32(t *testing.T) { - type args struct { - dset *hdf5.Dataset - npoints int - row int - dim int - } - type want struct { - want interface{} - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, interface{}, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got interface{}, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - dset: nil, - npoints: 0, - row: 0, - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - dset: nil, - npoints: 0, - row: 0, - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := loadFloat32(test.args.dset, test.args.npoints, test.args.row, test.args.dim) - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_loadInt(t *testing.T) { - type args struct { - dset *hdf5.Dataset - npoints int - row int - dim int - } - type want struct { - want interface{} - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, interface{}, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got interface{}, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - dset: nil, - npoints: 0, - row: 0, - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - dset: nil, - npoints: 0, - row: 0, - dim: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := loadInt(test.args.dset, test.args.npoints, test.args.row, test.args.dim) - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func Test_loadDataset(t *testing.T) { - type args struct { - file *hdf5.File - name string - f loaderFunc - } - type want struct { - wantDim int - wantVec interface{} - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, int, interface{}, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, gotDim int, gotVec interface{}, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(gotDim, w.wantDim) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotDim, w.wantDim) - } - if !reflect.DeepEqual(gotVec, w.wantVec) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotVec, w.wantVec) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - file: nil, - name: "", - f: nil, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - file: nil, - name: "", - f: nil, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - gotDim, gotVec, err := loadDataset(test.args.file, test.args.name, test.args.f) - if err := test.checkFunc(test.want, gotDim, gotVec, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestLoad(t *testing.T) { - type args struct { - path string - } - type want struct { - wantTrain [][]float32 - wantTest [][]float32 - wantDistances [][]float32 - wantNeighbors [][]int - wantDim int - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, [][]float32, [][]float32, [][]float32, [][]int, int, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, gotTrain [][]float32, gotTest [][]float32, gotDistances [][]float32, gotNeighbors [][]int, gotDim int, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(gotTrain, w.wantTrain) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotTrain, w.wantTrain) - } - if !reflect.DeepEqual(gotTest, w.wantTest) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotTest, w.wantTest) - } - if !reflect.DeepEqual(gotDistances, w.wantDistances) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotDistances, w.wantDistances) - } - if !reflect.DeepEqual(gotNeighbors, w.wantNeighbors) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotNeighbors, w.wantNeighbors) - } - if !reflect.DeepEqual(gotDim, w.wantDim) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotDim, w.wantDim) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - gotTrain, gotTest, gotDistances, gotNeighbors, gotDim, err := Load(test.args.path) - if err := test.checkFunc(test.want, gotTrain, gotTest, gotDistances, gotNeighbors, gotDim, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestCreateRandomIDs(t *testing.T) { - type args struct { - n int - } - type want struct { - wantIds []string - } - type test struct { - name string - args args - want want - checkFunc func(want, []string) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, gotIds []string) error { - if !reflect.DeepEqual(gotIds, w.wantIds) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotIds, w.wantIds) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - gotIds := CreateRandomIDs(test.args.n) - if err := test.checkFunc(test.want, gotIds); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestCreateRandomIDsWithLength(t *testing.T) { - type args struct { - n int - l int - } - type want struct { - wantIds []string - } - type test struct { - name string - args args - want want - checkFunc func(want, []string) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, gotIds []string) error { - if !reflect.DeepEqual(gotIds, w.wantIds) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", gotIds, w.wantIds) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - n: 0, - l: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - n: 0, - l: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - gotIds := CreateRandomIDsWithLength(test.args.n, test.args.l) - if err := test.checkFunc(test.want, gotIds); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestCreateSerialIDs(t *testing.T) { - type args struct { - n int - } - type want struct { - want []string - } - type test struct { - name string - args args - want want - checkFunc func(want, []string) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got []string) error { - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - n: 0, - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got := CreateSerialIDs(test.args.n) - if err := test.checkFunc(test.want, got); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestLoadDataWithRandomIDs(t *testing.T) { - type args struct { - path string - } - type want struct { - want Dataset - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, Dataset, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got Dataset, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := LoadDataWithRandomIDs(test.args.path) - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} - -func TestLoadDataWithSerialIDs(t *testing.T) { - type args struct { - path string - } - type want struct { - want Dataset - err error - } - type test struct { - name string - args args - want want - checkFunc func(want, Dataset, error) error - beforeFunc func(args) - afterFunc func(args) - } - defaultCheckFunc := func(w want, got Dataset, err error) error { - if !errors.Is(err, w.err) { - return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) - } - if !reflect.DeepEqual(got, w.want) { - return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) - } - return nil - } - tests := []test{ - // TODO test cases - /* - { - name: "test_case_1", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - }, - */ - - // TODO test cases - /* - func() test { - return test { - name: "test_case_2", - args: args { - path: "", - }, - want: want{}, - checkFunc: defaultCheckFunc, - } - }(), - */ - } - - for _, test := range tests { - t.Run(test.name, func(tt *testing.T) { - defer goleak.VerifyNone(t) - if test.beforeFunc != nil { - test.beforeFunc(test.args) - } - if test.afterFunc != nil { - defer test.afterFunc(test.args) - } - if test.checkFunc == nil { - test.checkFunc = defaultCheckFunc - } - - got, err := LoadDataWithSerialIDs(test.args.path) - if err := test.checkFunc(test.want, got, err); err != nil { - tt.Errorf("error = %v", err) - } - - }) - } -} diff --git a/pkg/tools/cli/loadtest/assets/small_dataset.go b/pkg/tools/cli/loadtest/assets/small_dataset.go new file mode 100644 index 0000000000..0c43642bdc --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/small_dataset.go @@ -0,0 +1,182 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "fmt" + "math/rand" + "path/filepath" +) + +const ( + smallDatasetPath = "hack/benchmark/assets/dataset" +) + +type smallDataset struct { + *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int +} + +func loadSmallData(fileName, datasetName, distanceType, objectType string) func() (Dataset, error) { + return func() (Dataset, error) { + dir, err := findDir(smallDatasetPath) + if err != nil { + return nil, err + } + t, q, d, n, dim, err := Load(filepath.Join(dir, fileName)) + if err != nil { + return nil, err + } + + return &smallDataset{ + dataset: &dataset{ + name: datasetName, + dimension: dim, + distanceType: distanceType, + objectType: objectType, + }, + train: t, + query: q, + distances: d, + neighbors: n, + }, nil + } +} + +func identity(dim int) func() (Dataset, error) { + return func() (Dataset, error) { + train := make([][]float32, dim) + for i := range train { + train[i] = make([]float32, dim) + train[i][i] = 1 + } + return &smallDataset{ + dataset: &dataset{ + name: fmt.Sprintf("identity-%d", dim), + dimension: dim, + distanceType: "l2", + objectType: "float", + }, + train: train, + query: train, + }, nil + } +} + +func random(dim, size int) func() (Dataset, error) { + return func() (Dataset, error) { + train := make([][]float32, size) + query := make([][]float32, size) + for i := range train { + train[i] = make([]float32, dim) + query[i] = make([]float32, dim) + for j := range train[i] { + train[i][j] = rand.Float32() + query[i][j] = rand.Float32() + } + } + return &smallDataset{ + dataset: &dataset{ + name: fmt.Sprintf("random-%d-%d", dim, size), + dimension: dim, + distanceType: "l2", + objectType: "float", + }, + train: train, + query: query, + }, nil + } +} + +func gaussian(dim, size int, mean, stdDev float64) func() (Dataset, error) { + return func() (Dataset, error) { + train := make([][]float32, size) + query := make([][]float32, size) + for i := range train { + train[i] = make([]float32, dim) + query[i] = make([]float32, dim) + for j := range train[i] { + train[i][j] = float32(rand.NormFloat64()*stdDev + mean) + query[i][j] = float32(rand.NormFloat64()*stdDev + mean) + } + } + return &smallDataset{ + dataset: &dataset{ + name: fmt.Sprintf("gaussian-%d-%d-%f-%f", dim, size, mean, stdDev), + dimension: dim, + distanceType: "l2", + objectType: "float", + }, + train: train, + query: query, + }, nil + } +} + +// Train returns vectors for train. +func (s *smallDataset) Train(i int) (interface{}, error) { + if i >= len(s.train) { + return nil, ErrOutOfBounds + } + return s.train[i], nil +} + +// TrainSize return size of vectors for train. +func (s *smallDataset) TrainSize() int { + return len(s.train) +} + +// Query returns vectors for test. +func (s *smallDataset) Query(i int) (interface{}, error) { + if i >= len(s.query) { + return nil, ErrOutOfBounds + } + return s.query[i], nil +} + +// QuerySize return size of vectors for query. +func (s *smallDataset) QuerySize() int { + return len(s.query) +} + +// Distance returns distances between queries and answers. +func (s *smallDataset) Distance(i int) ([]float32, error) { + if i >= len(s.distances) { + return nil, ErrOutOfBounds + } + return s.distances[i], nil +} + +// DistanceSize returns size of distances +func (s *smallDataset) DistanceSize() int { + return len(s.distances) +} + +// Neighbors returns nearest vectors from queries. +func (s *smallDataset) Neighbor(i int) ([]int, error) { + if i >= len(s.neighbors) { + return nil, ErrOutOfBounds + } + return s.neighbors[i], nil +} + +// NeighborSize returns size of neighbors. +func (s *smallDataset) NeighborSize() int { + return len(s.neighbors) +} diff --git a/pkg/tools/cli/loadtest/assets/small_dataset_test.go b/pkg/tools/cli/loadtest/assets/small_dataset_test.go new file mode 100644 index 0000000000..4bae3775da --- /dev/null +++ b/pkg/tools/cli/loadtest/assets/small_dataset_test.go @@ -0,0 +1,1105 @@ +// +// Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package assets + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/internal/errors" + "go.uber.org/goleak" +) + +func Test_loadSmallData(t *testing.T) { + type args struct { + fileName string + datasetName string + distanceType string + objectType string + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + fileName: "", + datasetName: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + fileName: "", + datasetName: "", + distanceType: "", + objectType: "", + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := loadSmallData(test.args.fileName, test.args.datasetName, test.args.distanceType, test.args.objectType) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_identity(t *testing.T) { + type args struct { + dim int + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dim: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := identity(test.args.dim) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_random(t *testing.T) { + type args struct { + dim int + size int + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dim: 0, + size: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dim: 0, + size: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := random(test.args.dim, test.args.size) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_gaussian(t *testing.T) { + type args struct { + dim int + size int + mean float64 + stdDev float64 + } + type want struct { + want func() (Dataset, error) + } + type test struct { + name string + args args + want want + checkFunc func(want, func() (Dataset, error)) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got func() (Dataset, error)) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + dim: 0, + size: 0, + mean: 0, + stdDev: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + dim: 0, + size: 0, + mean: 0, + stdDev: 0, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + + got := gaussian(test.args.dim, test.args.size, test.args.mean, test.args.stdDev) + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Train(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Train(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_TrainSize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.TrainSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Query(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want interface{} + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, interface{}, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got interface{}, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Query(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_QuerySize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.QuerySize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Distance(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want []float32 + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []float32, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []float32, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Distance(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_DistanceSize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.DistanceSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_Neighbor(t *testing.T) { + type args struct { + i int + } + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want []int + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, []int, error) error + beforeFunc func(args) + afterFunc func(args) + } + defaultCheckFunc := func(w want, got []int, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got error = %v, want %v", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + i: 0, + }, + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(test.args) + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got, err := s.Neighbor(test.args.i) + if err := test.checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} + +func Test_smallDataset_NeighborSize(t *testing.T) { + type fields struct { + dataset *dataset + train [][]float32 + query [][]float32 + distances [][]float32 + neighbors [][]int + } + type want struct { + want int + } + type test struct { + name string + fields fields + want want + checkFunc func(want, int) error + beforeFunc func() + afterFunc func() + } + defaultCheckFunc := func(w want, got int) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got = %v, want %v", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + dataset: dataset{}, + train: nil, + query: nil, + distances: nil, + neighbors: nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + } + }(), + */ + } + + for _, test := range tests { + t.Run(test.name, func(tt *testing.T) { + defer goleak.VerifyNone(tt) + if test.beforeFunc != nil { + test.beforeFunc() + } + if test.afterFunc != nil { + defer test.afterFunc() + } + if test.checkFunc == nil { + test.checkFunc = defaultCheckFunc + } + s := &smallDataset{ + dataset: test.fields.dataset, + train: test.fields.train, + query: test.fields.query, + distances: test.fields.distances, + neighbors: test.fields.neighbors, + } + + got := s.NeighborSize() + if err := test.checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + + }) + } +} diff --git a/pkg/tools/cli/loadtest/service/insert.go b/pkg/tools/cli/loadtest/service/insert.go index f876173533..9bf24d2b1b 100644 --- a/pkg/tools/cli/loadtest/service/insert.go +++ b/pkg/tools/cli/loadtest/service/insert.go @@ -19,6 +19,7 @@ import ( "context" "sync/atomic" + "github.com/kpango/fuid" "github.com/vdaas/vald/apis/grpc/agent/core" "github.com/vdaas/vald/apis/grpc/gateway/vald" "github.com/vdaas/vald/apis/grpc/payload" @@ -44,15 +45,17 @@ func insertRequestProvider(dataset assets.Dataset, batchSize int) (f func() inte } func objectVectorProvider(dataset assets.Dataset) (func() interface{}, int) { - v := dataset.Train() - ids := dataset.IDs() idx := int32(-1) - size := len(v) + size := dataset.TrainSize() return func() (ret interface{}) { if i := int(atomic.AddInt32(&idx, 1)); i < size { + v, err := dataset.Train(i) + if err != nil { + return nil + } ret = &payload.Object_Vector{ - Id: ids[i], - Vector: v[i], + Id: fuid.String(), + Vector: v.([]float32), } } return ret diff --git a/pkg/tools/cli/loadtest/service/search.go b/pkg/tools/cli/loadtest/service/search.go index b23bc5c7f3..a51e0728db 100644 --- a/pkg/tools/cli/loadtest/service/search.go +++ b/pkg/tools/cli/loadtest/service/search.go @@ -29,13 +29,16 @@ import ( ) func searchRequestProvider(dataset assets.Dataset) (func() interface{}, int, error) { - v := dataset.Query() - size := len(v) + size := dataset.QuerySize() idx := int32(-1) return func() (ret interface{}) { if i := int(atomic.AddInt32(&idx, 1)); i < size { + v, err := dataset.Query(i) + if err != nil { + return nil + } ret = &payload.Search_Request{ - Vector: v[i], + Vector: v.([]float32), } } return ret