diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml index a72af5c9a8..e962661dd4 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml @@ -252,6 +252,7 @@ spec: name: type: string enum: + - original - fashion-mnist range: type: object @@ -262,6 +263,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml index bbdfd8dd1d..6cf2d16630 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml @@ -84,6 +84,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/charts/vald-benchmark-operator/job-values.schema.json b/charts/vald-benchmark-operator/job-values.schema.json index 54ebd9b6a7..20b50910b6 100644 --- a/charts/vald-benchmark-operator/job-values.schema.json +++ b/charts/vald-benchmark-operator/job-values.schema.json @@ -305,7 +305,7 @@ "name": { "type": "string", "description": "the name of dataset", - "enum": ["fashion-mnist"] + "enum": ["original", "fashion-mnist"] }, "range": { "type": "object", @@ -321,7 +321,12 @@ "description": "start index number", "minimum": 1 } - } + }, + "required": ["start", "end"] + }, + "url": { + "type": "string", + "description": "the dataset url which is used for executing benchmark job with user defined hdf5 file" } }, "required": ["name", "indexes", "group", "range"] diff --git a/charts/vald-benchmark-operator/scenario-values.schema.json b/charts/vald-benchmark-operator/scenario-values.schema.json index e3e549e69d..8c6d117276 100644 --- a/charts/vald-benchmark-operator/scenario-values.schema.json +++ b/charts/vald-benchmark-operator/scenario-values.schema.json @@ -36,7 +36,12 @@ "description": "start index number", "minimum": 1 } - } + }, + "required": ["start", "end"] + }, + "url": { + "type": "string", + "description": "the dataset url which is used for executing benchmark job with user defined hdf5 file" } }, "required": ["name", "indexes", "group", "range"] diff --git a/charts/vald-benchmark-operator/schemas/job-values.yaml b/charts/vald-benchmark-operator/schemas/job-values.yaml index 89a3b1c8e8..835cb239dc 100644 --- a/charts/vald-benchmark-operator/schemas/job-values.yaml +++ b/charts/vald-benchmark-operator/schemas/job-values.yaml @@ -27,7 +27,7 @@ target: # @schema {"name": "dataset", "type": "object", "required": ["name", "indexes", "group", "range"]} # dataset -- dataset information dataset: - # @schema {"name": "dataset.name", "type": "string", "enum": ["fashion-mnist"] } + # @schema {"name": "dataset.name", "type": "string", "enum": ["original", "fashion-mnist"] } # dataset.name -- the name of dataset name: "fashion-mnist" # @schema {"name": "dataset.indexes", "type": "integer", "minimum": 0} @@ -36,7 +36,7 @@ dataset: # @schema {"name": "dataset.group", "type": "string", "minLength": 1} # dataset.group -- the hdf5 group name of dataset group: "test" - # @schema {"name": "dataset.range", "type": "object", "range": ["start", "port"]} + # @schema {"name": "dataset.range", "type": "object", "required": ["start", "end"]} # dataset.range -- the data range of indexes range: # @schema {"name": "dataset.range.start", "type": "integer", "minimum": 1} @@ -45,6 +45,9 @@ dataset: # @schema {"name": "dataset.range.end", "type": "integer", "minimum": 1} # dataset.range.end -- end index number end: 1000 + # @schema {"name": "dataset.url", "type": "string"} + # dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file + url: "" # @schema {"name": "dimension", "type": "integer", "minimum": 1} # dimension -- vector dimension dimension: 784 diff --git a/charts/vald-benchmark-operator/schemas/scenario-values.yaml b/charts/vald-benchmark-operator/schemas/scenario-values.yaml index 66a32ea707..1cb0c6c68d 100644 --- a/charts/vald-benchmark-operator/schemas/scenario-values.yaml +++ b/charts/vald-benchmark-operator/schemas/scenario-values.yaml @@ -37,7 +37,7 @@ dataset: # @schema {"name": "dataset.group", "type": "string", "minLength": 1} # dataset.group -- the hdf5 group name of dataset group: "test" - # @schema {"name": "dataset.range", "type": "object", "range": ["start", "port"]} + # @schema {"name": "dataset.range", "type": "object", "required": ["start", "end"]} # dataset.range -- the data range of indexes range: # @schema {"name": "dataset.range.start", "type": "integer", "minimum": 1} @@ -46,6 +46,9 @@ dataset: # @schema {"name": "dataset.range.end", "type": "integer", "minimum": 1} # dataset.range.end -- end index number end: 1000 + # @schema {"name": "dataset.url", "type": "string"} + # dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file + url: "" # @schema {"name": "jobs", "type": "array", "items": {"type": "object"}} jobs: diff --git a/internal/config/benchmark.go b/internal/config/benchmark.go index 1425756153..aa3cf1c687 100644 --- a/internal/config/benchmark.go +++ b/internal/config/benchmark.go @@ -62,10 +62,13 @@ type BenchmarkDataset struct { Group string `json:"group,omitempty"` Indexes int `json:"indexes,omitempty"` Range *BenchmarkDatasetRange `json:"range,omitempty"` + URL string `json:"url,omitempty"` } func (d *BenchmarkDataset) Bind() *BenchmarkDataset { d.Name = GetActualValue(d.Name) + d.Group = GetActualValue(d.Group) + d.URL = GetActualValue(d.URL) return d } diff --git a/internal/test/data/hdf5/hdf5.go b/internal/test/data/hdf5/hdf5.go index f178a52c2e..8fedfd76b1 100644 --- a/internal/test/data/hdf5/hdf5.go +++ b/internal/test/data/hdf5/hdf5.go @@ -28,7 +28,7 @@ import ( ) type Data interface { - Download() error + Download(url string) error Read() error GetName() DatasetName GetPath() string @@ -41,11 +41,14 @@ type Data interface { type DatasetName int const ( - FashionMNIST784Euclidean DatasetName = iota + Original DatasetName = iota + FashionMNIST784Euclidean ) func (d DatasetName) String() string { switch d { + case Original: + return "original" case FashionMNIST784Euclidean: return "fashion-mnist-784-euc" default: @@ -109,8 +112,10 @@ func New(opts ...Option) (Data, error) { // Get downloads the hdf5 file. // https://github.com/erikbern/ann-benchmarks/#data-sets -func (d *data) Download() error { +func (d *data) Download(url string) error { switch d.name { + case Original: + return downloadFile(url, d.path) case FashionMNIST784Euclidean: return downloadFile(FashionMNIST784EuclideanUrl.String(), d.path) default: diff --git a/internal/test/data/hdf5/hdf5_test.go b/internal/test/data/hdf5/hdf5_test.go index d308048781..509de3c6e5 100644 --- a/internal/test/data/hdf5/hdf5_test.go +++ b/internal/test/data/hdf5/hdf5_test.go @@ -353,12 +353,16 @@ func Test_data_Download(t *testing.T) { test [][]float32 neighbors [][]int } + type args struct { + url string + } type want struct { err error } type test struct { name string fields fields + args args want want checkFunc func(want, error) error beforeFunc func(*testing.T) @@ -440,7 +444,7 @@ func Test_data_Download(t *testing.T) { neighbors: test.fields.neighbors, } - err := d.Download() + err := d.Download(test.args.url) if err := checkFunc(test.want, err); err != nil { tt.Errorf("error = %v", err) } diff --git a/internal/test/data/hdf5/option.go b/internal/test/data/hdf5/option.go index 6fee9d0ae3..4d02b281f2 100644 --- a/internal/test/data/hdf5/option.go +++ b/internal/test/data/hdf5/option.go @@ -31,6 +31,8 @@ var defaultOptions = []Option{ func WithNameByString(n string) Option { var name DatasetName switch n { + case Original.String(): + name = Original case FashionMNIST784Euclidean.String(): name = FashionMNIST784Euclidean } @@ -40,6 +42,8 @@ func WithNameByString(n string) Option { func WithName(dn DatasetName) Option { return func(d *data) error { switch dn { + case Original: + d.name = dn case FashionMNIST784Euclidean: d.name = dn default: diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml index a72af5c9a8..e962661dd4 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml @@ -252,6 +252,7 @@ spec: name: type: string enum: + - original - fashion-mnist range: type: object @@ -262,6 +263,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml index bbdfd8dd1d..6cf2d16630 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml @@ -84,6 +84,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/pkg/tools/benchmark/job/service/job.go b/pkg/tools/benchmark/job/service/job.go index 2736ad898c..887fb31ca1 100644 --- a/pkg/tools/benchmark/job/service/job.go +++ b/pkg/tools/benchmark/job/service/job.go @@ -191,7 +191,7 @@ func New(opts ...Option) (Job, error) { func (j *job) PreStart(ctx context.Context) error { log.Infof("[benchmark job] start download dataset of %s", j.hdf5.GetName().String()) - if err := j.hdf5.Download(); err != nil { + if err := j.hdf5.Download(j.dataset.URL); err != nil { return err } log.Infof("[benchmark job] success download dataset of %s", j.hdf5.GetName().String()) diff --git a/pkg/tools/benchmark/job/service/option.go b/pkg/tools/benchmark/job/service/option.go index 772e638a6f..6f83979a4c 100644 --- a/pkg/tools/benchmark/job/service/option.go +++ b/pkg/tools/benchmark/job/service/option.go @@ -141,12 +141,15 @@ func WithHdf5(d hdf5.Data) Option { } } -// WithDataset sets the config.BenchmarkDataset including benchmakr dataset name, group name of hdf5.Data, the number of index, start range and end range. +// WithDataset sets the config.BenchmarkDataset including benchmark dataset name, group name of hdf5.Data, the number of index, start range and end range, and original URL which is used for download user defined hdf5. func WithDataset(d *config.BenchmarkDataset) Option { return func(j *job) error { if d == nil { return errors.NewErrInvalidOption("dataset", d) } + if d.Name == hdf5.Original.String() && len(d.URL) == 0 { + return errors.NewErrInvalidOption("dataset", d) + } j.dataset = d return nil }